Grab search results from Yahoo
Grab search results from Yahoo
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
var g_env; function main(p_env, p_args) { g_env = p_env; g_env.info('Starting'); run(); g_env.info('Ending'); } function run() { try { var query = 'lucene'; for (var pn = 1; pn <= 10; pn++) { var res = grab(query, pn); for (var i = 0; i < res.size(); i++) { var it = res.get(i); var title = it.get('title'); var link = it.get('link'); var no = (pn - 1) * 10 + i + 1; g_env.info(no + ' | ' + title + ' | ' + link); } } } catch (e) { g_env.error(e); } } function grab(query, pageno) { var tag = g_env.newArrayList(); try { var url = 'http://search.yahoo.com/search?p=' + g_env.encodeURL(query, 'UTF-8') + '&pstart=1&b=' + ((pageno - 1) * 10 + 1); var conn = g_env.newJsoup().connect(url); conn.userAgent('Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1'); conn.timeout(60000); var doc = conn.get(); var nodes = doc.select('#web .res'); for (var i = 0; i < nodes.size(); i++) { var node = nodes.get(i); var child = node.select('a.yschttl').first(); var title = child.text(); var link = child.attr('href'); var pos = link.indexOf('**'); if (pos >= 0) { link = link.substring(pos + 2); link = g_env.decodeURL(link, 'UTF-8'); } var it = g_env.newHashMap(); it.put('title', title); it.put('link', link); tag.add(it); } } catch (e) { g_env.error(e); } return tag; }
No comments:
Post a Comment