Tuesday 22 May 2012

Grab categories from HP Shopping

Grab categories from HP Shopping
This task use javascript sandbox with jsoup support to grab categories from HP Shopping.
Grab categories from HP Shopping
  1. Create javascript sandbox with jsoup support
  2. Create javascript as following
javascript
function main(env, args) {
  var links = args.get('links');
  var cats = grabCategory(env);
  for (var i = 0; i < cats.size(); i++) {
    links.add(cats.get(i));
  }
}

function grabCategory(env) {
  var tag = env.newArrayList();
  try {
    var link = env.newURL('http://shopping.hp.com');
    var conn = env.newJsoup().connect(link).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101');
    var html = conn.timeout(60000).execute().body();
    var pat1 = 'var surveyInitData =';
    var pat2 = "$('body').hpOnSiteExit({";
    var pos1 = html.indexOf(pat1);
    if (pos1 < 0) {
      env.info('S1: javascript code not found');
      return tag;
    }
    var pos2 = html.indexOf(pat2, pos1);
    if (pos2 < 0) {
      env.info('S2: javascript code not found');
      return tag;
    }
    var js = html.substring(pos1 + pat1.length, pos2);
    var obj = null;
    eval('obj = ' + js);
    var pages = obj.surveyData[0].configPages;
    for (var i = 0; i < pages.length; i++) {
      var pg = pages[i];
      if (pg.pageType != 'category') continue;
      var it = env.newHashMap();
      it.put('title', pg.pageName);
      it.put('url', pg.fullpath);
      tag.add(it);
    }
  } catch (e) {
    env.error(e);
  }
  return tag;
}
    

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment