Grab jobs from Stack Overflow Careers
Grab jobs from Stack Overflow Careers
- Create LinkEntry class as following
- Create StackoverflowCareers class as following
- Call StackoverflowCareers.grab() method as following
Call StackoverflowCareers.grab() method
List<LinkEntry> links = StackoverflowCareers.grab(1);
for (int i = 0; i < links.size(); i++) {
logger.info("\r\n(" + (i + 1) + ")\r\n" + links.get(i).toString());
}
1 | List<LinkEntry> links = StackoverflowCareers.grab(1); |
2 | |
3 | for (int i = 0; i < links.size(); i++) { |
4 | logger.info("\r\n(" + (i + 1) + ")\r\n" + links.get(i).toString()); |
5 | } |
List<LinkEntry> links = StackoverflowCareers.grab(1);
for (int i = 0; i < links.size(); i++) {
logger.info("\r\n(" + (i + 1) + ")\r\n" + links.get(i).toString());
}
LinkEntry class
public class LinkEntry {
public String url = "";
public String title = "";
public String desc = "";
public String toString() {
String tag = "\r\n";
tag += "Url: " + url + "\r\n";
tag += "Title: " + title + "\r\n";
tag += "Desc: " + desc + "\r\n";
return tag;
}
}
1 | public class LinkEntry { |
2 | |
3 | public String url = ""; |
4 | public String title = ""; |
5 | public String desc = ""; |
6 | |
7 | public String toString() { |
8 | String tag = "\r\n"; |
9 | tag += "Url: " + url + "\r\n"; |
10 | tag += "Title: " + title + "\r\n"; |
11 | tag += "Desc: " + desc + "\r\n"; |
12 | return tag; |
13 | } |
14 | |
15 | } |
public class LinkEntry {
public String url = "";
public String title = "";
public String desc = "";
public String toString() {
String tag = "\r\n";
tag += "Url: " + url + "\r\n";
tag += "Title: " + title + "\r\n";
tag += "Desc: " + desc + "\r\n";
return tag;
}
}
StackoverflowCareers class
public class StackoverflowCareers {
public static List<LinkEntry> grab(int maxpage) {
List<LinkEntry> tag = new ArrayList<LinkEntry>();
for (int no = 1; no <= maxpage; no++) {
String link = "http://careers.stackoverflow.com/jobs";
if (no > 1) {
link += "?pg=" + no;
}
try {
Document doc = Jsoup.parse(new URL(link), 60000);
Elements elements = doc.select(".job");
Element element = null;
for (int i = 0; i < elements.size(); i++) {
element = elements.get(i);
Element child = element.select(".title").first();
if (child == null) continue;
String url = child.attr("href");
if (!url.startsWith("/jobs/")) continue;
int pos = url.lastIndexOf("?");
if (pos >= 0) url = url.substring(0, pos);
url = "http://careers.stackoverflow.com" + url;
try {
Document cdoc = Jsoup.parse(new URL(url), 60000);
child = cdoc.select("#title").first();
if (child == null) continue;
String title = child.text();
child = cdoc.select(".jobdetail .description").first();
if (child == null) continue;
String desc = child.text().trim();
if (desc.startsWith("Job Description")) {
desc = desc.substring(15).trim();
}
String head = "";
child = cdoc.select("#hed .employer").first();
if (child != null) {
head = child.text();
}
child = cdoc.select("#hed .location").first();
if (child != null) {
if (head.length() > 0) head += "\r\n";
head += child.text();
}
desc = head + "\r\n" + desc;
LinkEntry job = new LinkEntry();
job.url = url;
job.title = title;
job.desc = desc;
tag.add(job);
} catch (Exception e) {
logger.error("", e);
}
}
} catch (Exception e) {
logger.error("", e);
}
}
return tag;
}
}
1 | public class StackoverflowCareers { |
2 | |
3 | public static List<LinkEntry> grab(int maxpage) { |
4 | List<LinkEntry> tag = new ArrayList<LinkEntry>(); |
5 | for (int no = 1; no <= maxpage; no++) { |
6 | String link = "http://careers.stackoverflow.com/jobs"; |
7 | if (no > 1) { |
8 | link += "?pg=" + no; |
9 | } |
10 | try { |
11 | Document doc = Jsoup.parse(new URL(link), 60000); |
12 | Elements elements = doc.select(".job"); |
13 | Element element = null; |
14 | for (int i = 0; i < elements.size(); i++) { |
15 | element = elements.get(i); |
16 | Element child = element.select(".title").first(); |
17 | if (child == null) continue; |
18 | String url = child.attr("href"); |
19 | if (!url.startsWith("/jobs/")) continue; |
20 | int pos = url.lastIndexOf("?"); |
21 | if (pos >= 0) url = url.substring(0, pos); |
22 | url = "http://careers.stackoverflow.com" + url; |
23 | try { |
24 | Document cdoc = Jsoup.parse(new URL(url), 60000); |
25 | child = cdoc.select("#title").first(); |
26 | if (child == null) continue; |
27 | String title = child.text(); |
28 | child = cdoc.select(".jobdetail .description").first(); |
29 | if (child == null) continue; |
30 | String desc = child.text().trim(); |
31 | if (desc.startsWith("Job Description")) { |
32 | desc = desc.substring(15).trim(); |
33 | } |
34 | String head = ""; |
35 | child = cdoc.select("#hed .employer").first(); |
36 | if (child != null) { |
37 | head = child.text(); |
38 | } |
39 | child = cdoc.select("#hed .location").first(); |
40 | if (child != null) { |
41 | if (head.length() > 0) head += "\r\n"; |
42 | head += child.text(); |
43 | } |
44 | desc = head + "\r\n" + desc; |
45 | LinkEntry job = new LinkEntry(); |
46 | job.url = url; |
47 | job.title = title; |
48 | job.desc = desc; |
49 | tag.add(job); |
50 | } catch (Exception e) { |
51 | logger.error("", e); |
52 | } |
53 | } |
54 | } catch (Exception e) { |
55 | logger.error("", e); |
56 | } |
57 | } |
58 | return tag; |
59 | } |
60 | |
61 | } |
public class StackoverflowCareers {
public static List<LinkEntry> grab(int maxpage) {
List<LinkEntry> tag = new ArrayList<LinkEntry>();
for (int no = 1; no <= maxpage; no++) {
String link = "http://careers.stackoverflow.com/jobs";
if (no > 1) {
link += "?pg=" + no;
}
try {
Document doc = Jsoup.parse(new URL(link), 60000);
Elements elements = doc.select(".job");
Element element = null;
for (int i = 0; i < elements.size(); i++) {
element = elements.get(i);
Element child = element.select(".title").first();
if (child == null) continue;
String url = child.attr("href");
if (!url.startsWith("/jobs/")) continue;
int pos = url.lastIndexOf("?");
if (pos >= 0) url = url.substring(0, pos);
url = "http://careers.stackoverflow.com" + url;
try {
Document cdoc = Jsoup.parse(new URL(url), 60000);
child = cdoc.select("#title").first();
if (child == null) continue;
String title = child.text();
child = cdoc.select(".jobdetail .description").first();
if (child == null) continue;
String desc = child.text().trim();
if (desc.startsWith("Job Description")) {
desc = desc.substring(15).trim();
}
String head = "";
child = cdoc.select("#hed .employer").first();
if (child != null) {
head = child.text();
}
child = cdoc.select("#hed .location").first();
if (child != null) {
if (head.length() > 0) head += "\r\n";
head += child.text();
}
desc = head + "\r\n" + desc;
LinkEntry job = new LinkEntry();
job.url = url;
job.title = title;
job.desc = desc;
tag.add(job);
} catch (Exception e) {
logger.error("", e);
}
}
} catch (Exception e) {
logger.error("", e);
}
}
return tag;
}
}
No comments:
Post a Comment