Grab categories from Amazon aStores
This task use javascript sandbox with jsoup support to grab categories from Amazon aStores.
Grab categories from Amazon aStores
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) {
var astore = 'paesia';
try {
var categories = grabCategory(astore, env);
var map = env.newHashMap();
for (var i = 0; i < categories.size(); i++) {
var cat = categories.get(i);
map.put(cat.get('node'), cat);
}
for (var i = 0; i < categories.size(); i++) {
var cat = categories.get(i);
var node = cat.get('node');
var title = cat.get('title');
var parentNode = cat.get('parent');
var parentCat = map.get(parentNode);
var parentTitle = '';
if (parentCat != null) {
parentTitle = parentCat.get('title');
}
var line = '';
line += '\r\nTitle: ' + title;
line += '\r\nNode: ' + node;
line += '\r\nParent: ' + parentNode;
line += '\r\nParent Title: ' + parentTitle;
env.info(line);
}
} catch (e) {
env.error(e);
}
}
function grabCategory(astore, env) {
var tag = env.newArrayList();
try {
var nodelist = env.newArrayList();
var alink = env.newURL('http://astore.amazon.com/' + astore + '-20');
var doc = env.newJsoup().parse(alink, 60000);
var elements = doc.select('#searchbrowse a');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var title = element.text();
var url = element.attr('href');
var pos = url.lastIndexOf('node=');
if (pos < 0) continue;
var node = url.substring(pos + 5);
pos = node.indexOf('&');
if (pos >= 0) {
node = node.substring(0, pos);
}
var item = env.newHashMap();
item.put('title', title);
item.put('node', node);
item.put('parent', '');
tag.add(item);
nodelist.add(node);
}
var no = 0;
while (no < nodelist.size()) {
alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + nodelist.get(no));
doc = env.newJsoup().parse(alink, 60000);
elements = doc.select('#searchbrowse .indent a');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var title = element.text();
var url = element.attr('href');
var pos = url.lastIndexOf('node=');
if (pos < 0) continue;
var node = url.substring(pos + 5);
pos = node.indexOf('&');
if (pos >= 0) {
node = node.substring(0, pos);
}
if (nodelist.indexOf(node) >= 0) continue;
var item = env.newHashMap();
item.put('title', title);
item.put('node', node);
item.put('parent', nodelist.get(no));
tag.add(item);
nodelist.add(node);
}
no++;
}
} catch (e) {
env.error(e);
}
return tag;
}
No comments:
Post a Comment