Grab products from HP Shopping
This task use javascript sandbox with jsoup support to grab products from HP Shopping.
Grab products from HP Shopping
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) {
var catUrl = 'http://shopping.hp.com/en_US/home-office/-/products/Laptops/HP%20Pavilion';
var links = args.get('links');
var prods = grabProduct(catUrl, env);
for (var i = 0; i < prods.size(); i++) {
links.add(prods.get(i));
}
}
function grabProduct(catUrl, env) {
var tag = env.newArrayList();
var urls = env.newArrayList();
urls.add(catUrl);
try {
var conn = env.newJsoup().connect(catUrl).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101');
var doc = conn.timeout(60000).get();
var root = doc.select('.pagination-results-container').first();
if (root != null) {
var elements = root.select('a');
for (var i = 0; i < elements.size() - 1; i++) {
var element = elements.get(i);
if (element.hasClass('option')) continue;
if (element.hasClass('pngFix')) continue;
urls.add(element.attr('href'));
}
}
} catch (e) {
env.error(e);
}
for (var i = 0; i < urls.size(); i++) {
try {
var link = urls.get(i);
var conn = env.newJsoup().connect(link).userAgent('Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101');
var doc = conn.timeout(60000).get();
var elements = doc.select('.listing-page-bucket');
for (var j = 0; j < elements.size(); j++) {
var element = elements.get(j);
var children = element.select('.color-selector-img img.pngFix');
var image_list = '';
var image = '';
for (var k = 0; k < children.size(); k++) {
var child = children.get(k);
if (image_list.length > 0) image_list += '\n';
image_list += child.attr('src') + '';
if (child.attr('style') + '' != 'display:none') {
image = child.attr('src');
}
}
var child = element.select('.product-specs h3 a').first();
if (child == null) continue;
var title = child.text();
var url = child.attr('href');
var desc = '';
child = element.select('.product-specs .rating').first();
if (child != null) {
child = child.nextElementSibling().nextElementSibling();
desc = child.text();
}
var price = '';
child = element.select('#start-price').first();
if (child != null) {
price = child.text();
} else {
child = element.select('.price-value').first();
if (child != null) {
price = child.text();
}
}
var it = env.newHashMap();
it.put('image-list', image_list);
it.put('image', image);
it.put('title', title);
it.put('url', url);
it.put('desc', desc);
it.put('price', price);
tag.add(it);
}
} catch (e) {
env.error(e);
}
}
return tag;
}