Grab categories from HP Shopping
This task use javascript sandbox with jsoup support to grab categories from HP Shopping.
Grab categories from HP Shopping
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) { var links = args.get('links'); var cats = grabCategory(env); for (var i = 0; i < cats.size(); i++) { links.add(cats.get(i)); } } function grabCategory(env) { var tag = env.newArrayList(); try { var link = env.newURL('http://shopping.hp.com'); var conn = env.newJsoup().connect(link).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101'); var html = conn.timeout(60000).execute().body(); var pat1 = 'var surveyInitData ='; var pat2 = "$('body').hpOnSiteExit({"; var pos1 = html.indexOf(pat1); if (pos1 < 0) { env.info('S1: javascript code not found'); return tag; } var pos2 = html.indexOf(pat2, pos1); if (pos2 < 0) { env.info('S2: javascript code not found'); return tag; } var js = html.substring(pos1 + pat1.length, pos2); var obj = null; eval('obj = ' + js); var pages = obj.surveyData[0].configPages; for (var i = 0; i < pages.length; i++) { var pg = pages[i]; if (pg.pageType != 'category') continue; var it = env.newHashMap(); it.put('title', pg.pageName); it.put('url', pg.fullpath); tag.add(it); } } catch (e) { env.error(e); } return tag; }
No comments:
Post a Comment