Grab categories from HP Shopping
This task use javascript sandbox with jsoup support to grab categories from HP Shopping.
Grab categories from HP Shopping
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
1 | function main(env, args) { |
2 | var links = args.get('links'); |
3 | var cats = grabCategory(env); |
4 | for (var i = 0; i < cats.size(); i++) { |
5 | links.add(cats.get(i)); |
6 | } |
7 | } |
8 | |
9 | function grabCategory(env) { |
10 | var tag = env.newArrayList(); |
11 | try { |
12 | var link = env.newURL('http://shopping.hp.com'); |
13 | var conn = env.newJsoup().connect(link).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101'); |
14 | var html = conn.timeout(60000).execute().body(); |
15 | var pat1 = 'var surveyInitData ='; |
16 | var pat2 = "$('body').hpOnSiteExit({"; |
17 | var pos1 = html.indexOf(pat1); |
18 | if (pos1 < 0) { |
19 | env.info('S1: javascript code not found'); |
20 | return tag; |
21 | } |
22 | var pos2 = html.indexOf(pat2, pos1); |
23 | if (pos2 < 0) { |
24 | env.info('S2: javascript code not found'); |
25 | return tag; |
26 | } |
27 | var js = html.substring(pos1 + pat1.length, pos2); |
28 | var obj = null; |
29 | eval('obj = ' + js); |
30 | var pages = obj.surveyData[0].configPages; |
31 | for (var i = 0; i < pages.length; i++) { |
32 | var pg = pages[i]; |
33 | if (pg.pageType != 'category') continue; |
34 | var it = env.newHashMap(); |
35 | it.put('title', pg.pageName); |
36 | it.put('url', pg.fullpath); |
37 | tag.add(it); |
38 | } |
39 | } catch (e) { |
40 | env.error(e); |
41 | } |
42 | return tag; |
43 | } |
function main(env, args) { var links = args.get('links'); var cats = grabCategory(env); for (var i = 0; i < cats.size(); i++) { links.add(cats.get(i)); } } function grabCategory(env) { var tag = env.newArrayList(); try { var link = env.newURL('http://shopping.hp.com'); var conn = env.newJsoup().connect(link).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101'); var html = conn.timeout(60000).execute().body(); var pat1 = 'var surveyInitData ='; var pat2 = "$('body').hpOnSiteExit({"; var pos1 = html.indexOf(pat1); if (pos1 < 0) { env.info('S1: javascript code not found'); return tag; } var pos2 = html.indexOf(pat2, pos1); if (pos2 < 0) { env.info('S2: javascript code not found'); return tag; } var js = html.substring(pos1 + pat1.length, pos2); var obj = null; eval('obj = ' + js); var pages = obj.surveyData[0].configPages; for (var i = 0; i < pages.length; i++) { var pg = pages[i]; if (pg.pageType != 'category') continue; var it = env.newHashMap(); it.put('title', pg.pageName); it.put('url', pg.fullpath); tag.add(it); } } catch (e) { env.error(e); } return tag; }
No comments:
Post a Comment