Grab search results from Yahoo
Grab search results from Yahoo
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
var g_env;
function main(p_env, p_args) {
g_env = p_env;
g_env.info('Starting');
run();
g_env.info('Ending');
}
function run() {
try {
var query = 'lucene';
for (var pn = 1; pn <= 10; pn++) {
var res = grab(query, pn);
for (var i = 0; i < res.size(); i++) {
var it = res.get(i);
var title = it.get('title');
var link = it.get('link');
var no = (pn - 1) * 10 + i + 1;
g_env.info(no + ' | ' + title + ' | ' + link);
}
}
} catch (e) {
g_env.error(e);
}
}
function grab(query, pageno) {
var tag = g_env.newArrayList();
try {
var url = 'http://search.yahoo.com/search?p=' + g_env.encodeURL(query, 'UTF-8') + '&pstart=1&b=' + ((pageno - 1) * 10 + 1);
var conn = g_env.newJsoup().connect(url);
conn.userAgent('Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1');
conn.timeout(60000);
var doc = conn.get();
var nodes = doc.select('#web .res');
for (var i = 0; i < nodes.size(); i++) {
var node = nodes.get(i);
var child = node.select('a.yschttl').first();
var title = child.text();
var link = child.attr('href');
var pos = link.indexOf('**');
if (pos >= 0) {
link = link.substring(pos + 2);
link = g_env.decodeURL(link, 'UTF-8');
}
var it = g_env.newHashMap();
it.put('title', title);
it.put('link', link);
tag.add(it);
}
} catch (e) {
g_env.error(e);
}
return tag;
}
No comments:
Post a Comment