Grab products from HP Shopping
This task use javascript sandbox with jsoup support to grab products from HP Shopping.
Grab products from HP Shopping
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) { var catUrl = 'http://shopping.hp.com/en_US/home-office/-/products/Laptops/HP%20Pavilion'; var links = args.get('links'); var prods = grabProduct(catUrl, env); for (var i = 0; i < prods.size(); i++) { links.add(prods.get(i)); } } function grabProduct(catUrl, env) { var tag = env.newArrayList(); var urls = env.newArrayList(); urls.add(catUrl); try { var conn = env.newJsoup().connect(catUrl).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101'); var doc = conn.timeout(60000).get(); var root = doc.select('.pagination-results-container').first(); if (root != null) { var elements = root.select('a'); for (var i = 0; i < elements.size() - 1; i++) { var element = elements.get(i); if (element.hasClass('option')) continue; if (element.hasClass('pngFix')) continue; urls.add(element.attr('href')); } } } catch (e) { env.error(e); } for (var i = 0; i < urls.size(); i++) { try { var link = urls.get(i); var conn = env.newJsoup().connect(link).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101'); var doc = conn.timeout(60000).get(); var elements = doc.select('.listing-page-bucket'); for (var j = 0; j < elements.size(); j++) { var element = elements.get(j); var children = element.select('.color-selector-img img.pngFix'); var image_list = ''; var image = ''; for (var k = 0; k < children.size(); k++) { var child = children.get(k); if (image_list.length > 0) image_list += '\n'; image_list += child.attr('src') + ''; if (child.attr('style') + '' != 'display:none') { image = child.attr('src'); } } var child = element.select('.product-specs h3 a').first(); if (child == null) continue; var title = child.text(); var url = child.attr('href'); var desc = ''; child = element.select('.product-specs .rating').first(); if (child != null) { child = child.nextElementSibling().nextElementSibling(); desc = child.text(); } var price = ''; child = element.select('#start-price').first(); if (child != null) { price = child.text(); } else { child = element.select('.price-value').first(); if (child != null) { price = child.text(); } } var it = env.newHashMap(); it.put('image-list', image_list); it.put('image', image); it.put('title', title); it.put('url', url); it.put('desc', desc); it.put('price', price); tag.add(it); } } catch (e) { env.error(e); } } return tag; }