Grab products from Newegg
Grab products from Newegg
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
1 | function main(env, args) { |
2 | var catUrl = 'http://www.newegg.com/Store/SubCategory.aspx?SubCategory=11&name=Controller-Panels'; |
3 | var maxpage = 1; |
4 | var links = args.get('links'); |
5 | var prods = grabProduct(catUrl, maxpage, env); |
6 | for (var i = 0; i < prods.size(); i++) { |
7 | links.add(prods.get(i)); |
8 | } |
9 | } |
10 | |
11 | function grabProduct(catUrl, maxpage, env) { |
12 | var tag = env.newArrayList(); |
13 | for (var no = 1; no <= maxpage; no++) { |
14 | try { |
15 | var link = env.newURL(catUrl + '&Page=' + no); |
16 | var doc = env.newJsoup().parse(link, 60000); |
17 | var elements = doc.select('.itemCell'); |
18 | for (var i = 0; i < elements.size(); i++) { |
19 | var element = elements.get(i); |
20 | if (element.hasClass('featuredProduct')) continue; |
21 | var item = env.newHashMap(); |
22 | var child = element.select('.itemText .wrapper a').first(); |
23 | if (child != null) { |
24 | item.put('url', env.newURL(link, child.attr('href')) + ''); |
25 | } |
26 | child = element.select('.itemText .wrapper a span').first(); |
27 | if (child != null) { |
28 | item.put('title', child.text()); |
29 | } |
30 | child = element.select('.itemAction .itemPricing .priceFinal').first(); |
31 | if (child != null) { |
32 | var price = env.newString(child.text()); |
33 | if (price.startsWith('Now: ')) { |
34 | price = price.substring(5); |
35 | } |
36 | item.put('price', price + ''); |
37 | } |
38 | child = element.select('.itemGraphics .itemImage img').first(); |
39 | if (child != null) { |
40 | item.put('small-image', env.newURL(link, child.attr('src')) + ''); |
41 | } |
42 | try { |
43 | var cdoc = env.newJsoup().parse(env.newURL(item.get('url')), 60000); |
44 | child = cdoc.select('#Specs').first(); |
45 | if (child != null) { |
46 | item.put('description', child.html()); |
47 | } |
48 | var children = cdoc.select('.navThumbs a.noLine img'); |
49 | var imgsA = ''; |
50 | var imgsB = ''; |
51 | for (var j = 0; j < children.size(); j++) { |
52 | var img = env.newURL(link, children.get(j).attr('src')) + ''; |
53 | var imgA = img.replace(/\$S35\$/g, '$S300W$'); |
54 | if (imgsA.length > 0) imgsA += '\n'; |
55 | imgsA += imgA + ''; |
56 | var imgB = img.replace(/\$S35\$/g, '$S125W$'); |
57 | if (imgsB.length > 0) imgsB += '\n'; |
58 | imgsB += imgB + ''; |
59 | |
60 | } |
61 | item.put('large-images', imgsA); |
62 | item.put('small-images', imgsB); |
63 | child = cdoc.select('.mainSlide img').first(); |
64 | if (child != null) { |
65 | item.put('large-image', env.newURL(link, child.attr('src')) + ''); |
66 | } |
67 | } catch (e) { |
68 | env.error(e); |
69 | } |
70 | tag.add(item); |
71 | } |
72 | } catch (e) { |
73 | env.error(e); |
74 | } |
75 | } |
76 | return tag; |
77 | } |
function main(env, args) { var catUrl = 'http://www.newegg.com/Store/SubCategory.aspx?SubCategory=11&name=Controller-Panels'; var maxpage = 1; var links = args.get('links'); var prods = grabProduct(catUrl, maxpage, env); for (var i = 0; i < prods.size(); i++) { links.add(prods.get(i)); } } function grabProduct(catUrl, maxpage, env) { var tag = env.newArrayList(); for (var no = 1; no <= maxpage; no++) { try { var link = env.newURL(catUrl + '&Page=' + no); var doc = env.newJsoup().parse(link, 60000); var elements = doc.select('.itemCell'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); if (element.hasClass('featuredProduct')) continue; var item = env.newHashMap(); var child = element.select('.itemText .wrapper a').first(); if (child != null) { item.put('url', env.newURL(link, child.attr('href')) + ''); } child = element.select('.itemText .wrapper a span').first(); if (child != null) { item.put('title', child.text()); } child = element.select('.itemAction .itemPricing .priceFinal').first(); if (child != null) { var price = env.newString(child.text()); if (price.startsWith('Now: ')) { price = price.substring(5); } item.put('price', price + ''); } child = element.select('.itemGraphics .itemImage img').first(); if (child != null) { item.put('small-image', env.newURL(link, child.attr('src')) + ''); } try { var cdoc = env.newJsoup().parse(env.newURL(item.get('url')), 60000); child = cdoc.select('#Specs').first(); if (child != null) { item.put('description', child.html()); } var children = cdoc.select('.navThumbs a.noLine img'); var imgsA = ''; var imgsB = ''; for (var j = 0; j < children.size(); j++) { var img = env.newURL(link, children.get(j).attr('src')) + ''; var imgA = img.replace(/\$S35\$/g, '$S300W$'); if (imgsA.length > 0) imgsA += '\n'; imgsA += imgA + ''; var imgB = img.replace(/\$S35\$/g, '$S125W$'); if (imgsB.length > 0) imgsB += '\n'; imgsB += imgB + ''; } item.put('large-images', imgsA); item.put('small-images', imgsB); child = cdoc.select('.mainSlide img').first(); if (child != null) { item.put('large-image', env.newURL(link, child.attr('src')) + ''); } } catch (e) { env.error(e); } tag.add(item); } } catch (e) { env.error(e); } } return tag; }
No comments:
Post a Comment