Grab categories from BestBuy
Grab categories from BestBuy
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
1 | function main(env, args) { |
2 | try { |
3 | var categories = grabCategory(env); |
4 | var links = args.get('links'); |
5 | for (var i = 0; i < categories.size(); i++) { |
6 | links.add(categories.get(i)); |
7 | } |
8 | } catch (e) { |
9 | env.error(e); |
10 | } |
11 | } |
12 | |
13 | function grabCategory(env) { |
14 | var tag = env.newArrayList(); |
15 | try { |
16 | var link = env.newURL('http://www.bestbuy.com/site/index.jsp'); |
17 | var doc = env.newJsoup().parse(link, 60000); |
18 | var elements = doc.select('#nav li.nav-pro ul li ul li a'); |
19 | var saved = env.newArrayList(); |
20 | var topcat = env.newArrayList(); |
21 | for (var i = 0; i < elements.size(); i++) { |
22 | var element = elements.get(i); |
23 | var title = element.text(); |
24 | var url = element.attr('href'); |
25 | if (saved.indexOf(url) >= 0) continue; |
26 | saved.add(url); |
27 | if (url.indexOf('http://www.bestbuy.com/site/') < 0) continue; |
28 | var item = env.newHashMap(); |
29 | item.put('title', title); |
30 | item.put('url', url); |
31 | topcat.add(item); |
32 | } |
33 | for (var i = 0; i < topcat.size(); i++) { |
34 | var tc = topcat.get(i); |
35 | var lnk = env.newURL(tc.get('url')); |
36 | doc = env.newJsoup().parse(lnk, 60000); |
37 | elements = doc.select('.search .title'); |
38 | var element = null; |
39 | for (var j = 0; j < elements.size(); j++) { |
40 | element = elements.get(j); |
41 | if (env.newString(element.text()).trim() == 'Brand') { |
42 | break; |
43 | } |
44 | element = null; |
45 | } |
46 | if (element == null) continue; |
47 | var brands = element.parent(); |
48 | element = brands.select('.seeall a').first(); |
49 | if (element != null) { |
50 | lnk = env.newURL(link, element.attr('href')); |
51 | doc = env.newJsoup().parse(lnk, 60000); |
52 | elements = doc.select('.search .title'); |
53 | var element = null; |
54 | for (var j = 0; j < elements.size(); j++) { |
55 | element = elements.get(j); |
56 | if (env.newString(element.text()).trim() == 'Brand') { |
57 | break; |
58 | } |
59 | element = null; |
60 | } |
61 | if (element == null) continue; |
62 | brands = element.parent(); |
63 | } |
64 | elements = brands.select('li a'); |
65 | for (var j = 0; j < elements.size(); j++) { |
66 | element = elements.get(j); |
67 | var title = element.text() + ' | ' + tc.get('title'); |
68 | var url = env.newURL(link, element.attr('href')); |
69 | var item = env.newHashMap(); |
70 | item.put('title', title); |
71 | item.put('url', url); |
72 | tag.add(item); |
73 | } |
74 | } |
75 | } catch (e) { |
76 | env.error(e); |
77 | } |
78 | return tag; |
79 | } |
function main(env, args) { try { var categories = grabCategory(env); var links = args.get('links'); for (var i = 0; i < categories.size(); i++) { links.add(categories.get(i)); } } catch (e) { env.error(e); } } function grabCategory(env) { var tag = env.newArrayList(); try { var link = env.newURL('http://www.bestbuy.com/site/index.jsp'); var doc = env.newJsoup().parse(link, 60000); var elements = doc.select('#nav li.nav-pro ul li ul li a'); var saved = env.newArrayList(); var topcat = env.newArrayList(); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); if (saved.indexOf(url) >= 0) continue; saved.add(url); if (url.indexOf('http://www.bestbuy.com/site/') < 0) continue; var item = env.newHashMap(); item.put('title', title); item.put('url', url); topcat.add(item); } for (var i = 0; i < topcat.size(); i++) { var tc = topcat.get(i); var lnk = env.newURL(tc.get('url')); doc = env.newJsoup().parse(lnk, 60000); elements = doc.select('.search .title'); var element = null; for (var j = 0; j < elements.size(); j++) { element = elements.get(j); if (env.newString(element.text()).trim() == 'Brand') { break; } element = null; } if (element == null) continue; var brands = element.parent(); element = brands.select('.seeall a').first(); if (element != null) { lnk = env.newURL(link, element.attr('href')); doc = env.newJsoup().parse(lnk, 60000); elements = doc.select('.search .title'); var element = null; for (var j = 0; j < elements.size(); j++) { element = elements.get(j); if (env.newString(element.text()).trim() == 'Brand') { break; } element = null; } if (element == null) continue; brands = element.parent(); } elements = brands.select('li a'); for (var j = 0; j < elements.size(); j++) { element = elements.get(j); var title = element.text() + ' | ' + tc.get('title'); var url = env.newURL(link, element.attr('href')); var item = env.newHashMap(); item.put('title', title); item.put('url', url); tag.add(item); } } } catch (e) { env.error(e); } return tag; }
No comments:
Post a Comment