Grab categories from BestBuy
Grab categories from BestBuy
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
function main(env, args) {
try {
var categories = grabCategory(env);
var links = args.get('links');
for (var i = 0; i < categories.size(); i++) {
links.add(categories.get(i));
}
} catch (e) {
env.error(e);
}
}
function grabCategory(env) {
var tag = env.newArrayList();
try {
var link = env.newURL('http://www.bestbuy.com/site/index.jsp');
var doc = env.newJsoup().parse(link, 60000);
var elements = doc.select('#nav li.nav-pro ul li ul li a');
var saved = env.newArrayList();
var topcat = env.newArrayList();
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var title = element.text();
var url = element.attr('href');
if (saved.indexOf(url) >= 0) continue;
saved.add(url);
if (url.indexOf('http://www.bestbuy.com/site/') < 0) continue;
var item = env.newHashMap();
item.put('title', title);
item.put('url', url);
topcat.add(item);
}
for (var i = 0; i < topcat.size(); i++) {
var tc = topcat.get(i);
var lnk = env.newURL(tc.get('url'));
doc = env.newJsoup().parse(lnk, 60000);
elements = doc.select('.search .title');
var element = null;
for (var j = 0; j < elements.size(); j++) {
element = elements.get(j);
if (env.newString(element.text()).trim() == 'Brand') {
break;
}
element = null;
}
if (element == null) continue;
var brands = element.parent();
element = brands.select('.seeall a').first();
if (element != null) {
lnk = env.newURL(link, element.attr('href'));
doc = env.newJsoup().parse(lnk, 60000);
elements = doc.select('.search .title');
var element = null;
for (var j = 0; j < elements.size(); j++) {
element = elements.get(j);
if (env.newString(element.text()).trim() == 'Brand') {
break;
}
element = null;
}
if (element == null) continue;
brands = element.parent();
}
elements = brands.select('li a');
for (var j = 0; j < elements.size(); j++) {
element = elements.get(j);
var title = element.text() + ' | ' + tc.get('title');
var url = env.newURL(link, element.attr('href'));
var item = env.newHashMap();
item.put('title', title);
item.put('url', url);
tag.add(item);
}
}
} catch (e) {
env.error(e);
}
return tag;
}
No comments:
Post a Comment