Grab categories from Amazon aStores
This task use javascript sandbox with jsoup support to grab categories from Amazon aStores.
Grab categories from Amazon aStores
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) { var astore = 'paesia'; try { var categories = grabCategory(astore, env); var map = env.newHashMap(); for (var i = 0; i < categories.size(); i++) { var cat = categories.get(i); map.put(cat.get('node'), cat); } for (var i = 0; i < categories.size(); i++) { var cat = categories.get(i); var node = cat.get('node'); var title = cat.get('title'); var parentNode = cat.get('parent'); var parentCat = map.get(parentNode); var parentTitle = ''; if (parentCat != null) { parentTitle = parentCat.get('title'); } var line = ''; line += '\r\nTitle: ' + title; line += '\r\nNode: ' + node; line += '\r\nParent: ' + parentNode; line += '\r\nParent Title: ' + parentTitle; env.info(line); } } catch (e) { env.error(e); } } function grabCategory(astore, env) { var tag = env.newArrayList(); try { var nodelist = env.newArrayList(); var alink = env.newURL('http://astore.amazon.com/' + astore + '-20'); var doc = env.newJsoup().parse(alink, 60000); var elements = doc.select('#searchbrowse a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); var pos = url.lastIndexOf('node='); if (pos < 0) continue; var node = url.substring(pos + 5); pos = node.indexOf('&'); if (pos >= 0) { node = node.substring(0, pos); } var item = env.newHashMap(); item.put('title', title); item.put('node', node); item.put('parent', ''); tag.add(item); nodelist.add(node); } var no = 0; while (no < nodelist.size()) { alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + nodelist.get(no)); doc = env.newJsoup().parse(alink, 60000); elements = doc.select('#searchbrowse .indent a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); var pos = url.lastIndexOf('node='); if (pos < 0) continue; var node = url.substring(pos + 5); pos = node.indexOf('&'); if (pos >= 0) { node = node.substring(0, pos); } if (nodelist.indexOf(node) >= 0) continue; var item = env.newHashMap(); item.put('title', title); item.put('node', node); item.put('parent', nodelist.get(no)); tag.add(item); nodelist.add(node); } no++; } } catch (e) { env.error(e); } return tag; }
No comments:
Post a Comment