Grab products from BestBuy
Grab products from BestBuy
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
1 | function main(env, args) { |
2 | var catUrl = 'http://www.bestbuy.com/site/olstemplatemapper.jsp?id=pcat17080&type=page&qp=cabcat0100000%23%230%23%23wv~~cabcat0101000%23%23-1%23%23wv~~q466173746c696d69747067735f323236~~nf862%7C%7C53616d73756e67&list=y&nrp=15&sc=TVVideoSP&sp=-bestsellingsort+skuid&usc=abcat0100000'; |
3 | var maxpage = 1; |
4 | var products = grabProduct(catUrl, maxpage, env); |
5 | var links = args.get('links'); |
6 | for (var i = 0; i < products.size(); i++) { |
7 | links.add(products.get(i)); |
8 | } |
9 | } |
10 | |
11 | function grabProduct(catUrl, maxpage, env) { |
12 | var tag = env.newArrayList(); |
13 | for (var no = 1; no <= maxpage; no++) { |
14 | try { |
15 | var link = env.newURL(catUrl + '&cp=' + no); |
16 | var doc = env.newJsoup().parse(link, 60000); |
17 | var elements = doc.select('.hproduct'); |
18 | for (var i = 0; i < elements.size(); i++) { |
19 | var element = elements.get(i); |
20 | var child = element.select('.info-main .name a').first(); |
21 | if (child == null) continue; |
22 | var title = child.text(); |
23 | var url = env.newURL(link, child.attr('href')) + ''; |
24 | var item = env.newHashMap(); |
25 | item.put('title', title); |
26 | item.put('url', url); |
27 | child = element.select('.attributes').first(); |
28 | if (child != null) { |
29 | var desc = child.html(); |
30 | var bdoc = env.newJsoup().parse(desc, link + ''); |
31 | buildURL(bdoc, link + '', env); |
32 | item.put('attributes', bdoc.select('body').first().html()); |
33 | } |
34 | child = element.select('.description').first(); |
35 | if (child != null) { |
36 | var desc = child.html(); |
37 | var bdoc = env.newJsoup().parse(desc, link + ''); |
38 | buildURL(bdoc, link + '', env); |
39 | item.put('description', bdoc.select('body').first().html()); |
40 | } |
41 | child = element.select('.image-col a.uri img.thumb').first(); |
42 | if (child != null) { |
43 | item.put('small-image', env.newURL(link, child.attr('src')) + ''); |
44 | } |
45 | var cdoc = env.newJsoup().parse(env.newURL(url), 60000); |
46 | child = cdoc.select('#esrbcontent').first(); |
47 | if (child != null) { |
48 | var desc = child.parent().html(); |
49 | var bdoc = env.newJsoup().parse(desc, link + ''); |
50 | buildURL(bdoc, link + '', env); |
51 | item.put('overview', bdoc.select('body').first().html()); |
52 | } |
53 | child = cdoc.select('#tabbed-specifications').first(); |
54 | if (child != null) { |
55 | var desc = child.html(); |
56 | var bdoc = env.newJsoup().parse(desc, link + ''); |
57 | buildURL(bdoc, link + '', env); |
58 | item.put('specifications', bdoc.select('body').first().html()); |
59 | } |
60 | child = cdoc.select('.prciest').first(); |
61 | if (child != null) { |
62 | item.put('price', child.text()); |
63 | } |
64 | child = cdoc.select('.price').first(); |
65 | if (child != null) { |
66 | item.put('price', child.text()); |
67 | } |
68 | tag.add(item); |
69 | } |
70 | } catch (e) { |
71 | env.error(e); |
72 | } |
73 | } |
74 | return tag; |
75 | } |
76 | |
77 | function buildURL(doc, baseUrl, env) { |
78 | baseUrl = env.newURL(baseUrl); |
79 | var elements = doc.select('a'); |
80 | for (var i = 0; i < elements.size(); i++) { |
81 | var element = elements.get(i); |
82 | try { |
83 | var url = env.newURL(baseUrl, element.attr('href')); |
84 | element.attr('href', url + ''); |
85 | } catch (e) { |
86 | } |
87 | } |
88 | elements = doc.select('img'); |
89 | for (var i = 0; i < elements.size(); i++) { |
90 | var element = elements.get(i); |
91 | try { |
92 | var url = env.newURL(baseUrl, element.attr('src')); |
93 | element.attr('src', url + ''); |
94 | } catch (e) { |
95 | } |
96 | } |
97 | } |
function main(env, args) { var catUrl = 'http://www.bestbuy.com/site/olstemplatemapper.jsp?id=pcat17080&type=page&qp=cabcat0100000%23%230%23%23wv~~cabcat0101000%23%23-1%23%23wv~~q466173746c696d69747067735f323236~~nf862%7C%7C53616d73756e67&list=y&nrp=15&sc=TVVideoSP&sp=-bestsellingsort+skuid&usc=abcat0100000'; var maxpage = 1; var products = grabProduct(catUrl, maxpage, env); var links = args.get('links'); for (var i = 0; i < products.size(); i++) { links.add(products.get(i)); } } function grabProduct(catUrl, maxpage, env) { var tag = env.newArrayList(); for (var no = 1; no <= maxpage; no++) { try { var link = env.newURL(catUrl + '&cp=' + no); var doc = env.newJsoup().parse(link, 60000); var elements = doc.select('.hproduct'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var child = element.select('.info-main .name a').first(); if (child == null) continue; var title = child.text(); var url = env.newURL(link, child.attr('href')) + ''; var item = env.newHashMap(); item.put('title', title); item.put('url', url); child = element.select('.attributes').first(); if (child != null) { var desc = child.html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('attributes', bdoc.select('body').first().html()); } child = element.select('.description').first(); if (child != null) { var desc = child.html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('description', bdoc.select('body').first().html()); } child = element.select('.image-col a.uri img.thumb').first(); if (child != null) { item.put('small-image', env.newURL(link, child.attr('src')) + ''); } var cdoc = env.newJsoup().parse(env.newURL(url), 60000); child = cdoc.select('#esrbcontent').first(); if (child != null) { var desc = child.parent().html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('overview', bdoc.select('body').first().html()); } child = cdoc.select('#tabbed-specifications').first(); if (child != null) { var desc = child.html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('specifications', bdoc.select('body').first().html()); } child = cdoc.select('.prciest').first(); if (child != null) { item.put('price', child.text()); } child = cdoc.select('.price').first(); if (child != null) { item.put('price', child.text()); } tag.add(item); } } catch (e) { env.error(e); } } return tag; } function buildURL(doc, baseUrl, env) { baseUrl = env.newURL(baseUrl); var elements = doc.select('a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); try { var url = env.newURL(baseUrl, element.attr('href')); element.attr('href', url + ''); } catch (e) { } } elements = doc.select('img'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); try { var url = env.newURL(baseUrl, element.attr('src')); element.attr('src', url + ''); } catch (e) { } } }
No comments:
Post a Comment