Grab categories from Newegg
  
Grab categories from Newegg
  - Create javascript sandbox with jsoup support
 - Create javascript as following
 
    javascript
    
    
function main(env, args) {
  var links = args.get('links');
  var cats = grabCategory(env);
  for (var i = 0; i < cats.size(); i++) {
    links.add(cats.get(i));
  }
}
function grabCategory(env) {
  var tag = env.newArrayList();
  try {
    var link = env.newURL('http://www.newegg.com/Info/SiteMap.aspx');
    var doc = env.newJsoup().parse(link, 60000);
    var elements = doc.select('h5 a.nolone');
    for (var i = 0; i < elements.size(); i++) {
      var element = elements.get(i);
      var topcat = element.text();
      var posA = topcat.lastIndexOf(' (');
      var posB = topcat.lastIndexOf(')');
      if (posA >= 0 && posB >= 0 && posA < posB) {
        topcat = topcat.substring(0, posA);
      }
      var children = element.parent().nextElementSibling().select('a.nolone');
      for (var j = 0; j < children.size(); j++) {
        var child = children.get(j);
        var title = child.text();
        posA = title.lastIndexOf(' (');
        posB = title.lastIndexOf(')');
        if (posA >= 0 && posB >= 0 && posA < posB) {
          title = title.substring(0, posA);
        }
        title = title + ' | ' + topcat;
        var url = env.newURL(link, child.attr('href')) + '';
        var item = env.newHashMap();
        item.put('title', title);
        item.put('url', url);
        tag.add(item);
      }
    }
  } catch (e) {
    env.error(e);
  }
  return tag;
}
    
  
 
No comments:
Post a Comment