Grab categories from HP Shopping
This task use javascript sandbox with jsoup support to grab categories from HP Shopping.
Grab categories from HP Shopping
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) {
var links = args.get('links');
var cats = grabCategory(env);
for (var i = 0; i < cats.size(); i++) {
links.add(cats.get(i));
}
}
function grabCategory(env) {
var tag = env.newArrayList();
try {
var link = env.newURL('http://shopping.hp.com');
var conn = env.newJsoup().connect(link).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101');
var html = conn.timeout(60000).execute().body();
var pat1 = 'var surveyInitData =';
var pat2 = "$('body').hpOnSiteExit({";
var pos1 = html.indexOf(pat1);
if (pos1 < 0) {
env.info('S1: javascript code not found');
return tag;
}
var pos2 = html.indexOf(pat2, pos1);
if (pos2 < 0) {
env.info('S2: javascript code not found');
return tag;
}
var js = html.substring(pos1 + pat1.length, pos2);
var obj = null;
eval('obj = ' + js);
var pages = obj.surveyData[0].configPages;
for (var i = 0; i < pages.length; i++) {
var pg = pages[i];
if (pg.pageType != 'category') continue;
var it = env.newHashMap();
it.put('title', pg.pageName);
it.put('url', pg.fullpath);
tag.add(it);
}
} catch (e) {
env.error(e);
}
return tag;
}
No comments:
Post a Comment