Grab jobs from Stack Overflow Careers
This task use java and jsoup to grab jobs from Stack Overflow Careers
Grab jobs from Stack Overflow Careers
- Create LinkEntry class as following
- Create StackoverflowCareers class as following
- Call StackoverflowCareers.grab() method as following
Call StackoverflowCareers.grab() method
List<LinkEntry> links = StackoverflowCareers.grab(1);
for (int i = 0; i < links.size(); i++) {
logger.info("\r\n(" + (i + 1) + ")\r\n" + links.get(i).toString());
}
LinkEntry class
public class LinkEntry {
public String url = "";
public String title = "";
public String desc = "";
public String toString() {
String tag = "\r\n";
tag += "Url: " + url + "\r\n";
tag += "Title: " + title + "\r\n";
tag += "Desc: " + desc + "\r\n";
return tag;
}
}
StackoverflowCareers class
public class StackoverflowCareers {
public static List<LinkEntry> grab(int maxpage) {
List<LinkEntry> tag = new ArrayList<LinkEntry>();
for (int no = 1; no <= maxpage; no++) {
String link = "http://careers.stackoverflow.com/jobs";
if (no > 1) {
link += "?pg=" + no;
}
try {
Document doc = Jsoup.parse(new URL(link), 60000);
Elements elements = doc.select(".job");
Element element = null;
for (int i = 0; i < elements.size(); i++) {
element = elements.get(i);
Element child = element.select(".title").first();
if (child == null) continue;
String url = child.attr("href");
if (!url.startsWith("/jobs/")) continue;
int pos = url.lastIndexOf("?");
if (pos >= 0) url = url.substring(0, pos);
url = "http://careers.stackoverflow.com" + url;
try {
Document cdoc = Jsoup.parse(new URL(url), 60000);
child = cdoc.select("#title").first();
if (child == null) continue;
String title = child.text();
child = cdoc.select(".jobdetail .description").first();
if (child == null) continue;
String desc = child.text().trim();
if (desc.startsWith("Job Description")) {
desc = desc.substring(15).trim();
}
String head = "";
child = cdoc.select("#hed .employer").first();
if (child != null) {
head = child.text();
}
child = cdoc.select("#hed .location").first();
if (child != null) {
if (head.length() > 0) head += "\r\n";
head += child.text();
}
desc = head + "\r\n" + desc;
LinkEntry job = new LinkEntry();
job.url = url;
job.title = title;
job.desc = desc;
tag.add(job);
} catch (Exception e) {
logger.error("", e);
}
}
} catch (Exception e) {
logger.error("", e);
}
}
return tag;
}
}
No comments:
Post a Comment