Grab products from BestBuy
Grab products from BestBuy
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) { var catUrl = 'http://www.bestbuy.com/site/olstemplatemapper.jsp?id=pcat17080&type=page&qp=cabcat0100000%23%230%23%23wv~~cabcat0101000%23%23-1%23%23wv~~q466173746c696d69747067735f323236~~nf862%7C%7C53616d73756e67&list=y&nrp=15&sc=TVVideoSP&sp=-bestsellingsort+skuid&usc=abcat0100000'; var maxpage = 1; var products = grabProduct(catUrl, maxpage, env); var links = args.get('links'); for (var i = 0; i < products.size(); i++) { links.add(products.get(i)); } } function grabProduct(catUrl, maxpage, env) { var tag = env.newArrayList(); for (var no = 1; no <= maxpage; no++) { try { var link = env.newURL(catUrl + '&cp=' + no); var doc = env.newJsoup().parse(link, 60000); var elements = doc.select('.hproduct'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var child = element.select('.info-main .name a').first(); if (child == null) continue; var title = child.text(); var url = env.newURL(link, child.attr('href')) + ''; var item = env.newHashMap(); item.put('title', title); item.put('url', url); child = element.select('.attributes').first(); if (child != null) { var desc = child.html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('attributes', bdoc.select('body').first().html()); } child = element.select('.description').first(); if (child != null) { var desc = child.html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('description', bdoc.select('body').first().html()); } child = element.select('.image-col a.uri img.thumb').first(); if (child != null) { item.put('small-image', env.newURL(link, child.attr('src')) + ''); } var cdoc = env.newJsoup().parse(env.newURL(url), 60000); child = cdoc.select('#esrbcontent').first(); if (child != null) { var desc = child.parent().html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('overview', bdoc.select('body').first().html()); } child = cdoc.select('#tabbed-specifications').first(); if (child != null) { var desc = child.html(); var bdoc = env.newJsoup().parse(desc, link + ''); buildURL(bdoc, link + '', env); item.put('specifications', bdoc.select('body').first().html()); } child = cdoc.select('.prciest').first(); if (child != null) { item.put('price', child.text()); } child = cdoc.select('.price').first(); if (child != null) { item.put('price', child.text()); } tag.add(item); } } catch (e) { env.error(e); } } return tag; } function buildURL(doc, baseUrl, env) { baseUrl = env.newURL(baseUrl); var elements = doc.select('a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); try { var url = env.newURL(baseUrl, element.attr('href')); element.attr('href', url + ''); } catch (e) { } } elements = doc.select('img'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); try { var url = env.newURL(baseUrl, element.attr('src')); element.attr('src', url + ''); } catch (e) { } } }
No comments:
Post a Comment