Grab jobs from vWorker
- Create vWorker class as following
- Call vWorker.grab() method as following
Call vWorker.grab() method
List<vWorker.Job> jobs = vWorker.grab(1); for (int i = 0; i < jobs.size(); i++) { vWorker.Job job = jobs.get(i); logger.info(job.toString()); }vWorker class
public class vWorker { private static Logger logger = Logger.getLogger(vWorker.class); public static List<Job> grab(int maxpage) { List<Job> jobs = new ArrayList<Job>(); try { boolean stop = false; int pageno = 0; String link = "http://www.vworker.com/RentACoder/DotNet/misc/BidRequests/ShowBidRequests.aspx?lngBidRequestListType=3&optSortTitle=2&lngBidRequestCategoryId=-1&txtMaxNumberOfEntriesPerPage=10&optBidRequestPhase=2&lngSortColumn=-6&blnModeVerbose=True&optBiddingExpiration=1&intTabSelectedId=2"; while (!stop) { Document doc = Jsoup.parse(new URL(link), 60000); Elements elements = null; Element element = null; elements = doc.select("a"); for (int i = 0; i < elements.size(); i++) { element = elements.get(i); String val = element.attr("href"); String pattern = "/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId="; int pos = val.indexOf(pattern); if (pos < 0) continue; String code = val.substring(pos + pattern.length()); String title = element.text(); if (!element.parent().tagName().equals("font")) continue; if (!element.parent().parent().tagName().equals("td")) continue; Node parent = element.parent().parent().parent().nextSibling().nextSibling(); Document pdoc = Jsoup.parse(parent.outerHtml()); String shortDesc = pdoc.text(); Job job = new Job(); job.id = code; job.title = title; job.shortDesc = shortDesc; jobs.add(job); } elements = doc.select("input[name=cmdNextPage]"); if (elements.size() == 0) { stop = true; } else { element = elements.get(0); String val = element.attr("onclick"); int pos = val.indexOf("action='"); if (pos < 0) { stop = true; continue; } val = val.substring(pos + 8); pos = val.lastIndexOf("'"); if (pos < 0) { stop = true; continue; } val = val.substring(0, pos); link = val; pageno++; if (pageno >= maxpage) { stop = true; } } } for (int i = 0; i < jobs.size(); i++) { Job job = jobs.get(i); link = "http://vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=" + job.id + "&intProjectTab_TabId=1"; Document doc = Jsoup.parse(new URL(link), 60000); Elements elements = null; Element element = null; element = doc.select("#idTabstripContent").first(); if (element != null) { elements = element.select("td"); String[] fields = new String[] { "Non-action ratio:", "Employer security verifications:", "Approved on:", "Bidding closes:", "Viewed (by workers):", "Deadline:", "Phase:", "Sourcing type:", "Payment Model:", "Max Accepted Bid:", "Expert Guarantee:", "Estimated size:", "Bidding type:", "Accepted bidder economy type(s):", "Accepted english fluency(ies):", "ExpertRating requirement:", "Project management:" }; List<String> fieldList = new ArrayList<String>(); for (int j = 0; j < fields.length; j++) { fieldList.add(fields[j]); } for (int j = 0; j < elements.size(); j++) { String field = elements.get(j).text(); String value = ""; if (j + 1 < elements.size()) { value = elements.get(j + 1).text(); } if (fieldList.indexOf(field) >= 0) { job.data.put(field, value); } } elements = doc.select("b"); for (int j = 0; j < elements.size(); j++) { element = elements.get(j); String name = element.text().trim(); if ("Brief summary:".equals(name)) { element = element.nextElementSibling().nextElementSibling(); if (element.html().indexOf("<a name=\"NoWorkInAdvance\">") >= 0) { element = element.nextElementSibling(); } job.longDesc = element.html(); } } } } } catch (Exception e) { logger.error("", e); } return jobs; } public static class Job { public String id = ""; public String title = ""; public String shortDesc = ""; public String longDesc = ""; public Map<String, String> data = new HashMap<String, String>(); public String toString() { String tag = "\r\n"; tag += "Id: " + id + "\r\n"; tag += "Title: " + title + "\r\n"; tag += "Short Desc: " + shortDesc + "\r\n"; tag += "Long Desc: " + longDesc + "\r\n"; for (String key : data.keySet()) { tag += key + " " + data.get(key) + "\r\n"; } return tag; } } }
No comments:
Post a Comment