Grab products from BestBuy
Grab products from BestBuy
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) {
var catUrl = 'http://www.bestbuy.com/site/olstemplatemapper.jsp?id=pcat17080&type=page&qp=cabcat0100000%23%230%23%23wv~~cabcat0101000%23%23-1%23%23wv~~q466173746c696d69747067735f323236~~nf862%7C%7C53616d73756e67&list=y&nrp=15&sc=TVVideoSP&sp=-bestsellingsort+skuid&usc=abcat0100000';
var maxpage = 1;
var products = grabProduct(catUrl, maxpage, env);
var links = args.get('links');
for (var i = 0; i < products.size(); i++) {
links.add(products.get(i));
}
}
function grabProduct(catUrl, maxpage, env) {
var tag = env.newArrayList();
for (var no = 1; no <= maxpage; no++) {
try {
var link = env.newURL(catUrl + '&cp=' + no);
var doc = env.newJsoup().parse(link, 60000);
var elements = doc.select('.hproduct');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var child = element.select('.info-main .name a').first();
if (child == null) continue;
var title = child.text();
var url = env.newURL(link, child.attr('href')) + '';
var item = env.newHashMap();
item.put('title', title);
item.put('url', url);
child = element.select('.attributes').first();
if (child != null) {
var desc = child.html();
var bdoc = env.newJsoup().parse(desc, link + '');
buildURL(bdoc, link + '', env);
item.put('attributes', bdoc.select('body').first().html());
}
child = element.select('.description').first();
if (child != null) {
var desc = child.html();
var bdoc = env.newJsoup().parse(desc, link + '');
buildURL(bdoc, link + '', env);
item.put('description', bdoc.select('body').first().html());
}
child = element.select('.image-col a.uri img.thumb').first();
if (child != null) {
item.put('small-image', env.newURL(link, child.attr('src')) + '');
}
var cdoc = env.newJsoup().parse(env.newURL(url), 60000);
child = cdoc.select('#esrbcontent').first();
if (child != null) {
var desc = child.parent().html();
var bdoc = env.newJsoup().parse(desc, link + '');
buildURL(bdoc, link + '', env);
item.put('overview', bdoc.select('body').first().html());
}
child = cdoc.select('#tabbed-specifications').first();
if (child != null) {
var desc = child.html();
var bdoc = env.newJsoup().parse(desc, link + '');
buildURL(bdoc, link + '', env);
item.put('specifications', bdoc.select('body').first().html());
}
child = cdoc.select('.prciest').first();
if (child != null) {
item.put('price', child.text());
}
child = cdoc.select('.price').first();
if (child != null) {
item.put('price', child.text());
}
tag.add(item);
}
} catch (e) {
env.error(e);
}
}
return tag;
}
function buildURL(doc, baseUrl, env) {
baseUrl = env.newURL(baseUrl);
var elements = doc.select('a');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
try {
var url = env.newURL(baseUrl, element.attr('href'));
element.attr('href', url + '');
} catch (e) {
}
}
elements = doc.select('img');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
try {
var url = env.newURL(baseUrl, element.attr('src'));
element.attr('src', url + '');
} catch (e) {
}
}
}
No comments:
Post a Comment