Grab jobs from vWorker
- Create vWorker class as following
- Call vWorker.grab() method as following
Call vWorker.grab() method
vWorker class
1 | List<vWorker.Job> jobs = vWorker.grab(1); |
2 | for (int i = 0; i < jobs.size(); i++) { |
3 | vWorker.Job job = jobs.get(i); |
4 | logger.info(job.toString()); |
5 | } |
List<vWorker.Job> jobs = vWorker.grab(1); for (int i = 0; i < jobs.size(); i++) { vWorker.Job job = jobs.get(i); logger.info(job.toString()); }
1 | public class vWorker { |
2 | |
3 | private static Logger logger = Logger.getLogger(vWorker.class); |
4 | |
5 | public static List<Job> grab(int maxpage) { |
6 | List<Job> jobs = new ArrayList<Job>(); |
7 | try { |
8 | boolean stop = false; |
9 | int pageno = 0; |
10 | String link = "http://www.vworker.com/RentACoder/DotNet/misc/BidRequests/ShowBidRequests.aspx?lngBidRequestListType=3&optSortTitle=2&lngBidRequestCategoryId=-1&txtMaxNumberOfEntriesPerPage=10&optBidRequestPhase=2&lngSortColumn=-6&blnModeVerbose=True&optBiddingExpiration=1&intTabSelectedId=2"; |
11 | while (!stop) { |
12 | Document doc = Jsoup.parse(new URL(link), 60000); |
13 | Elements elements = null; |
14 | Element element = null; |
15 | |
16 | elements = doc.select("a"); |
17 | for (int i = 0; i < elements.size(); i++) { |
18 | element = elements.get(i); |
19 | String val = element.attr("href"); |
20 | String pattern = "/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId="; |
21 | int pos = val.indexOf(pattern); |
22 | if (pos < 0) continue; |
23 | String code = val.substring(pos + pattern.length()); |
24 | String title = element.text(); |
25 | if (!element.parent().tagName().equals("font")) continue; |
26 | if (!element.parent().parent().tagName().equals("td")) continue; |
27 | Node parent = element.parent().parent().parent().nextSibling().nextSibling(); |
28 | Document pdoc = Jsoup.parse(parent.outerHtml()); |
29 | String shortDesc = pdoc.text(); |
30 | Job job = new Job(); |
31 | job.id = code; |
32 | job.title = title; |
33 | job.shortDesc = shortDesc; |
34 | jobs.add(job); |
35 | } |
36 | |
37 | elements = doc.select("input[name=cmdNextPage]"); |
38 | if (elements.size() == 0) { |
39 | stop = true; |
40 | } else { |
41 | element = elements.get(0); |
42 | String val = element.attr("onclick"); |
43 | int pos = val.indexOf("action='"); |
44 | if (pos < 0) { |
45 | stop = true; |
46 | continue; |
47 | } |
48 | val = val.substring(pos + 8); |
49 | pos = val.lastIndexOf("'"); |
50 | if (pos < 0) { |
51 | stop = true; |
52 | continue; |
53 | } |
54 | val = val.substring(0, pos); |
55 | link = val; |
56 | pageno++; |
57 | if (pageno >= maxpage) { |
58 | stop = true; |
59 | } |
60 | } |
61 | } |
62 | |
63 | for (int i = 0; i < jobs.size(); i++) { |
64 | Job job = jobs.get(i); |
65 | link = "http://vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=" + job.id + "&intProjectTab_TabId=1"; |
66 | Document doc = Jsoup.parse(new URL(link), 60000); |
67 | Elements elements = null; |
68 | Element element = null; |
69 | element = doc.select("#idTabstripContent").first(); |
70 | if (element != null) { |
71 | elements = element.select("td"); |
72 | String[] fields = new String[] { "Non-action ratio:", "Employer security verifications:", "Approved on:", "Bidding closes:", "Viewed (by workers):", "Deadline:", "Phase:", "Sourcing type:", "Payment Model:", "Max Accepted Bid:", "Expert Guarantee:", "Estimated size:", "Bidding type:", "Accepted bidder economy type(s):", "Accepted english fluency(ies):", "ExpertRating requirement:", "Project management:" }; |
73 | List<String> fieldList = new ArrayList<String>(); |
74 | for (int j = 0; j < fields.length; j++) { |
75 | fieldList.add(fields[j]); |
76 | } |
77 | for (int j = 0; j < elements.size(); j++) { |
78 | String field = elements.get(j).text(); |
79 | String value = ""; |
80 | if (j + 1 < elements.size()) { |
81 | value = elements.get(j + 1).text(); |
82 | } |
83 | if (fieldList.indexOf(field) >= 0) { |
84 | job.data.put(field, value); |
85 | } |
86 | } |
87 | elements = doc.select("b"); |
88 | for (int j = 0; j < elements.size(); j++) { |
89 | element = elements.get(j); |
90 | String name = element.text().trim(); |
91 | if ("Brief summary:".equals(name)) { |
92 | element = element.nextElementSibling().nextElementSibling(); |
93 | if (element.html().indexOf("<a name=\"NoWorkInAdvance\">") >= 0) { |
94 | element = element.nextElementSibling(); |
95 | } |
96 | job.longDesc = element.html(); |
97 | } |
98 | } |
99 | } |
100 | } |
101 | } catch (Exception e) { |
102 | logger.error("", e); |
103 | } |
104 | return jobs; |
105 | } |
106 | |
107 | public static class Job { |
108 | public String id = ""; |
109 | public String title = ""; |
110 | public String shortDesc = ""; |
111 | public String longDesc = ""; |
112 | public Map<String, String> data = new HashMap<String, String>(); |
113 | |
114 | public String toString() { |
115 | String tag = "\r\n"; |
116 | tag += "Id: " + id + "\r\n"; |
117 | tag += "Title: " + title + "\r\n"; |
118 | tag += "Short Desc: " + shortDesc + "\r\n"; |
119 | tag += "Long Desc: " + longDesc + "\r\n"; |
120 | for (String key : data.keySet()) { |
121 | tag += key + " " + data.get(key) + "\r\n"; |
122 | } |
123 | return tag; |
124 | } |
125 | } |
126 | } |
public class vWorker { private static Logger logger = Logger.getLogger(vWorker.class); public static List<Job> grab(int maxpage) { List<Job> jobs = new ArrayList<Job>(); try { boolean stop = false; int pageno = 0; String link = "http://www.vworker.com/RentACoder/DotNet/misc/BidRequests/ShowBidRequests.aspx?lngBidRequestListType=3&optSortTitle=2&lngBidRequestCategoryId=-1&txtMaxNumberOfEntriesPerPage=10&optBidRequestPhase=2&lngSortColumn=-6&blnModeVerbose=True&optBiddingExpiration=1&intTabSelectedId=2"; while (!stop) { Document doc = Jsoup.parse(new URL(link), 60000); Elements elements = null; Element element = null; elements = doc.select("a"); for (int i = 0; i < elements.size(); i++) { element = elements.get(i); String val = element.attr("href"); String pattern = "/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId="; int pos = val.indexOf(pattern); if (pos < 0) continue; String code = val.substring(pos + pattern.length()); String title = element.text(); if (!element.parent().tagName().equals("font")) continue; if (!element.parent().parent().tagName().equals("td")) continue; Node parent = element.parent().parent().parent().nextSibling().nextSibling(); Document pdoc = Jsoup.parse(parent.outerHtml()); String shortDesc = pdoc.text(); Job job = new Job(); job.id = code; job.title = title; job.shortDesc = shortDesc; jobs.add(job); } elements = doc.select("input[name=cmdNextPage]"); if (elements.size() == 0) { stop = true; } else { element = elements.get(0); String val = element.attr("onclick"); int pos = val.indexOf("action='"); if (pos < 0) { stop = true; continue; } val = val.substring(pos + 8); pos = val.lastIndexOf("'"); if (pos < 0) { stop = true; continue; } val = val.substring(0, pos); link = val; pageno++; if (pageno >= maxpage) { stop = true; } } } for (int i = 0; i < jobs.size(); i++) { Job job = jobs.get(i); link = "http://vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=" + job.id + "&intProjectTab_TabId=1"; Document doc = Jsoup.parse(new URL(link), 60000); Elements elements = null; Element element = null; element = doc.select("#idTabstripContent").first(); if (element != null) { elements = element.select("td"); String[] fields = new String[] { "Non-action ratio:", "Employer security verifications:", "Approved on:", "Bidding closes:", "Viewed (by workers):", "Deadline:", "Phase:", "Sourcing type:", "Payment Model:", "Max Accepted Bid:", "Expert Guarantee:", "Estimated size:", "Bidding type:", "Accepted bidder economy type(s):", "Accepted english fluency(ies):", "ExpertRating requirement:", "Project management:" }; List<String> fieldList = new ArrayList<String>(); for (int j = 0; j < fields.length; j++) { fieldList.add(fields[j]); } for (int j = 0; j < elements.size(); j++) { String field = elements.get(j).text(); String value = ""; if (j + 1 < elements.size()) { value = elements.get(j + 1).text(); } if (fieldList.indexOf(field) >= 0) { job.data.put(field, value); } } elements = doc.select("b"); for (int j = 0; j < elements.size(); j++) { element = elements.get(j); String name = element.text().trim(); if ("Brief summary:".equals(name)) { element = element.nextElementSibling().nextElementSibling(); if (element.html().indexOf("<a name=\"NoWorkInAdvance\">") >= 0) { element = element.nextElementSibling(); } job.longDesc = element.html(); } } } } } catch (Exception e) { logger.error("", e); } return jobs; } public static class Job { public String id = ""; public String title = ""; public String shortDesc = ""; public String longDesc = ""; public Map<String, String> data = new HashMap<String, String>(); public String toString() { String tag = "\r\n"; tag += "Id: " + id + "\r\n"; tag += "Title: " + title + "\r\n"; tag += "Short Desc: " + shortDesc + "\r\n"; tag += "Long Desc: " + longDesc + "\r\n"; for (String key : data.keySet()) { tag += key + " " + data.get(key) + "\r\n"; } return tag; } } }
No comments:
Post a Comment