Grab categories from Newegg
Grab categories from Newegg
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) { var links = args.get('links'); var cats = grabCategory(env); for (var i = 0; i < cats.size(); i++) { links.add(cats.get(i)); } } function grabCategory(env) { var tag = env.newArrayList(); try { var link = env.newURL('http://www.newegg.com/Info/SiteMap.aspx'); var doc = env.newJsoup().parse(link, 60000); var elements = doc.select('h5 a.nolone'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var topcat = element.text(); var posA = topcat.lastIndexOf(' ('); var posB = topcat.lastIndexOf(')'); if (posA >= 0 && posB >= 0 && posA < posB) { topcat = topcat.substring(0, posA); } var children = element.parent().nextElementSibling().select('a.nolone'); for (var j = 0; j < children.size(); j++) { var child = children.get(j); var title = child.text(); posA = title.lastIndexOf(' ('); posB = title.lastIndexOf(')'); if (posA >= 0 && posB >= 0 && posA < posB) { title = title.substring(0, posA); } title = title + ' | ' + topcat; var url = env.newURL(link, child.attr('href')) + ''; var item = env.newHashMap(); item.put('title', title); item.put('url', url); tag.add(item); } } } catch (e) { env.error(e); } return tag; }
No comments:
Post a Comment