Grab categories from Amazon aStores
This task use javascript sandbox with jsoup support to grab categories from Amazon aStores.
Grab categories from Amazon aStores
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
1 | function main(env, args) { |
2 | var astore = 'paesia'; |
3 | try { |
4 | var categories = grabCategory(astore, env); |
5 | var map = env.newHashMap(); |
6 | for (var i = 0; i < categories.size(); i++) { |
7 | var cat = categories.get(i); |
8 | map.put(cat.get('node'), cat); |
9 | } |
10 | for (var i = 0; i < categories.size(); i++) { |
11 | var cat = categories.get(i); |
12 | var node = cat.get('node'); |
13 | var title = cat.get('title'); |
14 | var parentNode = cat.get('parent'); |
15 | var parentCat = map.get(parentNode); |
16 | var parentTitle = ''; |
17 | if (parentCat != null) { |
18 | parentTitle = parentCat.get('title'); |
19 | } |
20 | var line = ''; |
21 | line += '\r\nTitle: ' + title; |
22 | line += '\r\nNode: ' + node; |
23 | line += '\r\nParent: ' + parentNode; |
24 | line += '\r\nParent Title: ' + parentTitle; |
25 | env.info(line); |
26 | } |
27 | } catch (e) { |
28 | env.error(e); |
29 | } |
30 | } |
31 | |
32 | function grabCategory(astore, env) { |
33 | var tag = env.newArrayList(); |
34 | try { |
35 | var nodelist = env.newArrayList(); |
36 | var alink = env.newURL('http://astore.amazon.com/' + astore + '-20'); |
37 | var doc = env.newJsoup().parse(alink, 60000); |
38 | var elements = doc.select('#searchbrowse a'); |
39 | for (var i = 0; i < elements.size(); i++) { |
40 | var element = elements.get(i); |
41 | var title = element.text(); |
42 | var url = element.attr('href'); |
43 | var pos = url.lastIndexOf('node='); |
44 | if (pos < 0) continue; |
45 | var node = url.substring(pos + 5); |
46 | pos = node.indexOf('&'); |
47 | if (pos >= 0) { |
48 | node = node.substring(0, pos); |
49 | } |
50 | var item = env.newHashMap(); |
51 | item.put('title', title); |
52 | item.put('node', node); |
53 | item.put('parent', ''); |
54 | tag.add(item); |
55 | nodelist.add(node); |
56 | } |
57 | var no = 0; |
58 | while (no < nodelist.size()) { |
59 | alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + nodelist.get(no)); |
60 | doc = env.newJsoup().parse(alink, 60000); |
61 | elements = doc.select('#searchbrowse .indent a'); |
62 | for (var i = 0; i < elements.size(); i++) { |
63 | var element = elements.get(i); |
64 | var title = element.text(); |
65 | var url = element.attr('href'); |
66 | var pos = url.lastIndexOf('node='); |
67 | if (pos < 0) continue; |
68 | var node = url.substring(pos + 5); |
69 | pos = node.indexOf('&'); |
70 | if (pos >= 0) { |
71 | node = node.substring(0, pos); |
72 | } |
73 | if (nodelist.indexOf(node) >= 0) continue; |
74 | var item = env.newHashMap(); |
75 | item.put('title', title); |
76 | item.put('node', node); |
77 | item.put('parent', nodelist.get(no)); |
78 | tag.add(item); |
79 | nodelist.add(node); |
80 | } |
81 | no++; |
82 | } |
83 | } catch (e) { |
84 | env.error(e); |
85 | } |
86 | return tag; |
87 | } |
function main(env, args) { var astore = 'paesia'; try { var categories = grabCategory(astore, env); var map = env.newHashMap(); for (var i = 0; i < categories.size(); i++) { var cat = categories.get(i); map.put(cat.get('node'), cat); } for (var i = 0; i < categories.size(); i++) { var cat = categories.get(i); var node = cat.get('node'); var title = cat.get('title'); var parentNode = cat.get('parent'); var parentCat = map.get(parentNode); var parentTitle = ''; if (parentCat != null) { parentTitle = parentCat.get('title'); } var line = ''; line += '\r\nTitle: ' + title; line += '\r\nNode: ' + node; line += '\r\nParent: ' + parentNode; line += '\r\nParent Title: ' + parentTitle; env.info(line); } } catch (e) { env.error(e); } } function grabCategory(astore, env) { var tag = env.newArrayList(); try { var nodelist = env.newArrayList(); var alink = env.newURL('http://astore.amazon.com/' + astore + '-20'); var doc = env.newJsoup().parse(alink, 60000); var elements = doc.select('#searchbrowse a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); var pos = url.lastIndexOf('node='); if (pos < 0) continue; var node = url.substring(pos + 5); pos = node.indexOf('&'); if (pos >= 0) { node = node.substring(0, pos); } var item = env.newHashMap(); item.put('title', title); item.put('node', node); item.put('parent', ''); tag.add(item); nodelist.add(node); } var no = 0; while (no < nodelist.size()) { alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + nodelist.get(no)); doc = env.newJsoup().parse(alink, 60000); elements = doc.select('#searchbrowse .indent a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); var pos = url.lastIndexOf('node='); if (pos < 0) continue; var node = url.substring(pos + 5); pos = node.indexOf('&'); if (pos >= 0) { node = node.substring(0, pos); } if (nodelist.indexOf(node) >= 0) continue; var item = env.newHashMap(); item.put('title', title); item.put('node', node); item.put('parent', nodelist.get(no)); tag.add(item); nodelist.add(node); } no++; } } catch (e) { env.error(e); } return tag; }
No comments:
Post a Comment