Grab categories from Newegg
Grab categories from Newegg
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) {
var links = args.get('links');
var cats = grabCategory(env);
for (var i = 0; i < cats.size(); i++) {
links.add(cats.get(i));
}
}
function grabCategory(env) {
var tag = env.newArrayList();
try {
var link = env.newURL('http://www.newegg.com/Info/SiteMap.aspx');
var doc = env.newJsoup().parse(link, 60000);
var elements = doc.select('h5 a.nolone');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var topcat = element.text();
var posA = topcat.lastIndexOf(' (');
var posB = topcat.lastIndexOf(')');
if (posA >= 0 && posB >= 0 && posA < posB) {
topcat = topcat.substring(0, posA);
}
var children = element.parent().nextElementSibling().select('a.nolone');
for (var j = 0; j < children.size(); j++) {
var child = children.get(j);
var title = child.text();
posA = title.lastIndexOf(' (');
posB = title.lastIndexOf(')');
if (posA >= 0 && posB >= 0 && posA < posB) {
title = title.substring(0, posA);
}
title = title + ' | ' + topcat;
var url = env.newURL(link, child.attr('href')) + '';
var item = env.newHashMap();
item.put('title', title);
item.put('url', url);
tag.add(item);
}
}
} catch (e) {
env.error(e);
}
return tag;
}
No comments:
Post a Comment