Saturday, 7 April 2012

Grab jobs from Stack Overflow Careers

Grab jobs from Stack Overflow Careers
This task use java and jsoup to grab jobs from Stack Overflow Careers
Grab jobs from Stack Overflow Careers
  1. Create LinkEntry class as following
  2. Create StackoverflowCareers class as following
  3. Call StackoverflowCareers.grab() method as following
Call StackoverflowCareers.grab() method
List<LinkEntry> links = StackoverflowCareers.grab(1);
      
for (int i = 0; i < links.size(); i++) {
    logger.info("\r\n(" + (i + 1) + ")\r\n" + links.get(i).toString());
}
    
LinkEntry class
public class LinkEntry {

    public String url = "";
    public String title = "";
    public String desc = "";
         
    public String toString() {
        String tag = "\r\n";
        tag += "Url: " + url + "\r\n";
        tag += "Title: " + title + "\r\n";
        tag += "Desc: " + desc + "\r\n";
        return tag;
    }

}    
    
StackoverflowCareers class
public class StackoverflowCareers {

    public static List<LinkEntry> grab(int maxpage) {
        List<LinkEntry> tag = new ArrayList<LinkEntry>();
        for (int no = 1; no <= maxpage; no++) {
            String link = "http://careers.stackoverflow.com/jobs";
            if (no > 1) {
                link += "?pg=" + no;
            }
            try {
                Document doc = Jsoup.parse(new URL(link), 60000);
                Elements elements =  doc.select(".job");
                Element element = null;
                for (int i = 0; i < elements.size(); i++) {
                    element = elements.get(i);
                    Element child = element.select(".title").first();
                    if (child == null) continue;
                    String url = child.attr("href");
                    if (!url.startsWith("/jobs/")) continue;
                    int pos = url.lastIndexOf("?");
                    if (pos >= 0) url = url.substring(0, pos);
                    url = "http://careers.stackoverflow.com" + url;
                    try {
                        Document cdoc = Jsoup.parse(new URL(url), 60000);
                        child = cdoc.select("#title").first();
                        if (child == null) continue;
                        String title = child.text();
                        child = cdoc.select(".jobdetail .description").first();
                        if (child == null) continue;
                        String desc = child.text().trim();
                        if (desc.startsWith("Job Description")) {
                            desc = desc.substring(15).trim();
                        }
                        String head = "";
                        child = cdoc.select("#hed .employer").first();
                        if (child != null) {
                            head = child.text();
                        }
                        child = cdoc.select("#hed .location").first();
                        if (child != null) {
                            if (head.length() > 0) head += "\r\n";
                            head += child.text();
                        }
                        desc = head + "\r\n" + desc;
                        LinkEntry job = new LinkEntry();
                        job.url = url;
                        job.title = title;
                        job.desc = desc;
                        tag.add(job);
                    } catch (Exception e) {
                        logger.error("", e);
                    }
                }
            } catch (Exception e) {
                logger.error("", e);
            }
        }
        return tag;
    }
     
}
    

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment