Sunday 15 April 2012

Grab categories from BestBuy

Grab categories from BestBuy
This task use javascript sandbox with jsoup support to grab categories from BestBuy.
Grab categories from BestBuy
  1. Create javascript sandbox with jsoup support
  2. Create javascript as following
javascript
function main(env, args) {
  try {
    var categories = grabCategory(env);
    var links = args.get('links');
    for (var i = 0; i < categories.size(); i++) {
      links.add(categories.get(i));
    }
  } catch (e) {
    env.error(e);
  }
}

function grabCategory(env) {
  var tag = env.newArrayList();
  try {
    var link = env.newURL('http://www.bestbuy.com/site/index.jsp');
    var doc = env.newJsoup().parse(link, 60000);
    var elements = doc.select('#nav li.nav-pro ul li ul li a');
    var saved = env.newArrayList();
    var topcat = env.newArrayList();
    for (var i = 0; i < elements.size(); i++) {
      var element = elements.get(i);
      var title = element.text();
      var url = element.attr('href');
      if (saved.indexOf(url) >= 0) continue;
      saved.add(url);
      if (url.indexOf('http://www.bestbuy.com/site/') < 0) continue;
      var item = env.newHashMap();
      item.put('title', title);
      item.put('url', url);
      topcat.add(item);
    }
    for (var i = 0; i < topcat.size(); i++) {
      var tc = topcat.get(i);
      var lnk = env.newURL(tc.get('url'));
      doc = env.newJsoup().parse(lnk, 60000);
      elements = doc.select('.search .title');
      var element = null;
      for (var j = 0; j < elements.size(); j++) {
        element = elements.get(j);
        if (env.newString(element.text()).trim() == 'Brand') {
          break;
        }
        element = null;
      }
      if (element == null) continue;
      var brands = element.parent();
      element = brands.select('.seeall a').first();
      if (element != null) {
        lnk = env.newURL(link, element.attr('href'));
        doc = env.newJsoup().parse(lnk, 60000);
        elements = doc.select('.search .title');
        var element = null;
        for (var j = 0; j < elements.size(); j++) {
          element = elements.get(j);
          if (env.newString(element.text()).trim() == 'Brand') {
            break;
          }
          element = null;
        }
        if (element == null) continue;
        brands = element.parent();
      }
      elements = brands.select('li a');
      for (var j = 0; j < elements.size(); j++) {
        element = elements.get(j);
        var title = element.text() + ' | ' + tc.get('title');
        var url = env.newURL(link, element.attr('href'));
        var item = env.newHashMap();
        item.put('title', title);
        item.put('url', url);
        tag.add(item);
      }
    }
  } catch (e) {
    env.error(e);
  }
  return tag;
}
    

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment