Grab jobs from Stack Overflow Careers
This task use java and jsoup to grab jobs from Stack Overflow Careers
Grab jobs from Stack Overflow Careers
- Create LinkEntry class as following
- Create StackoverflowCareers class as following
- Call StackoverflowCareers.grab() method as following
Call StackoverflowCareers.grab() method
List<LinkEntry> links = StackoverflowCareers.grab(1); for (int i = 0; i < links.size(); i++) { logger.info("\r\n(" + (i + 1) + ")\r\n" + links.get(i).toString()); }LinkEntry class
public class LinkEntry { public String url = ""; public String title = ""; public String desc = ""; public String toString() { String tag = "\r\n"; tag += "Url: " + url + "\r\n"; tag += "Title: " + title + "\r\n"; tag += "Desc: " + desc + "\r\n"; return tag; } }StackoverflowCareers class
public class StackoverflowCareers { public static List<LinkEntry> grab(int maxpage) { List<LinkEntry> tag = new ArrayList<LinkEntry>(); for (int no = 1; no <= maxpage; no++) { String link = "http://careers.stackoverflow.com/jobs"; if (no > 1) { link += "?pg=" + no; } try { Document doc = Jsoup.parse(new URL(link), 60000); Elements elements = doc.select(".job"); Element element = null; for (int i = 0; i < elements.size(); i++) { element = elements.get(i); Element child = element.select(".title").first(); if (child == null) continue; String url = child.attr("href"); if (!url.startsWith("/jobs/")) continue; int pos = url.lastIndexOf("?"); if (pos >= 0) url = url.substring(0, pos); url = "http://careers.stackoverflow.com" + url; try { Document cdoc = Jsoup.parse(new URL(url), 60000); child = cdoc.select("#title").first(); if (child == null) continue; String title = child.text(); child = cdoc.select(".jobdetail .description").first(); if (child == null) continue; String desc = child.text().trim(); if (desc.startsWith("Job Description")) { desc = desc.substring(15).trim(); } String head = ""; child = cdoc.select("#hed .employer").first(); if (child != null) { head = child.text(); } child = cdoc.select("#hed .location").first(); if (child != null) { if (head.length() > 0) head += "\r\n"; head += child.text(); } desc = head + "\r\n" + desc; LinkEntry job = new LinkEntry(); job.url = url; job.title = title; job.desc = desc; tag.add(job); } catch (Exception e) { logger.error("", e); } } } catch (Exception e) { logger.error("", e); } } return tag; } }
No comments:
Post a Comment