Monday 16 April 2012

Grab products from Newegg

Grab products from Newegg
This task use javascript sandbox with jsoup support to grab products from Newegg.
Grab products from Newegg
  1. Create javascript sandbox with jsoup support
  2. Create javascript as following
javascript
function main(env, args) {
  var catUrl = 'http://www.newegg.com/Store/SubCategory.aspx?SubCategory=11&name=Controller-Panels';
  var maxpage = 1;
  var links = args.get('links');
  var prods = grabProduct(catUrl, maxpage, env);
  for (var i = 0; i < prods.size(); i++) {
    links.add(prods.get(i));
  }
}

function grabProduct(catUrl, maxpage, env) {
  var tag = env.newArrayList();
  for (var no = 1; no <= maxpage; no++) {
    try {
      var link = env.newURL(catUrl + '&Page=' + no);
      var doc = env.newJsoup().parse(link, 60000);
      var elements = doc.select('.itemCell');
      for (var i = 0; i < elements.size(); i++) {
        var element = elements.get(i);
        if (element.hasClass('featuredProduct')) continue;
        var item = env.newHashMap();
        var child = element.select('.itemText .wrapper a').first();
        if (child != null) {
          item.put('url', env.newURL(link, child.attr('href')) + '');
        }
        child = element.select('.itemText .wrapper a span').first();
        if (child != null) {
          item.put('title', child.text());
        }
        child = element.select('.itemAction .itemPricing .priceFinal').first();
        if (child != null) {
          var price = env.newString(child.text());
          if (price.startsWith('Now: ')) {
            price = price.substring(5);
          }
          item.put('price', price + '');
        }
        child = element.select('.itemGraphics .itemImage img').first();
        if (child != null) {
          item.put('small-image', env.newURL(link, child.attr('src')) + '');
        }
        try {
          var cdoc = env.newJsoup().parse(env.newURL(item.get('url')), 60000);
          child = cdoc.select('#Specs').first();
          if (child != null) {
            item.put('description', child.html());
          }
          var children = cdoc.select('.navThumbs a.noLine img');
          var imgsA = '';
          var imgsB = '';
          for (var j = 0; j < children.size(); j++) {
            var img = env.newURL(link, children.get(j).attr('src')) + '';
            var imgA = img.replace(/\$S35\$/g, '$S300W$');
            if (imgsA.length > 0) imgsA += '\n';
            imgsA += imgA + ''; 
            var imgB = img.replace(/\$S35\$/g, '$S125W$');
            if (imgsB.length > 0) imgsB += '\n';
            imgsB += imgB + ''; 

          }
          item.put('large-images', imgsA);
          item.put('small-images', imgsB);
          child = cdoc.select('.mainSlide img').first();
          if (child != null) {
            item.put('large-image', env.newURL(link, child.attr('src')) + '');
          }
        } catch (e) {
          env.error(e);
        }
        tag.add(item);
      }
    } catch (e) {
      env.error(e);
    }
  }
  return tag;
}
    

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment