Monday, 20 February 2012

Grab jobs from vWorker

Grab jobs from vWorker
This task use java and jsoup to grab jobs from vWorker
Grab jobs from vWorker
  1. Create vWorker class as following
  2. Call vWorker.grab() method as following
Call vWorker.grab() method
List<vWorker.Job> jobs = vWorker.grab(1);
for (int i = 0; i < jobs.size(); i++) {
    vWorker.Job job = jobs.get(i);
    logger.info(job.toString());
}
    
vWorker class
public class vWorker {

    private static Logger logger = Logger.getLogger(vWorker.class);
     
    public static List<Job> grab(int maxpage) {
        List<Job> jobs = new ArrayList<Job>();
        try {
            boolean stop = false;
            int pageno = 0;
            String link = "http://www.vworker.com/RentACoder/DotNet/misc/BidRequests/ShowBidRequests.aspx?lngBidRequestListType=3&optSortTitle=2&lngBidRequestCategoryId=-1&txtMaxNumberOfEntriesPerPage=10&optBidRequestPhase=2&lngSortColumn=-6&blnModeVerbose=True&optBiddingExpiration=1&intTabSelectedId=2";
            while (!stop) {
                Document doc = Jsoup.parse(new URL(link), 60000);
                Elements elements =  null;
                Element element = null;
              
                elements = doc.select("a");
                for (int i = 0; i < elements.size(); i++) {
                    element = elements.get(i);
                    String val = element.attr("href");
                    String pattern = "/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=";
                    int pos = val.indexOf(pattern);
                    if (pos < 0) continue;
                    String code = val.substring(pos + pattern.length());
                    String title = element.text();
                    if (!element.parent().tagName().equals("font")) continue;
                    if (!element.parent().parent().tagName().equals("td")) continue;
                    Node parent = element.parent().parent().parent().nextSibling().nextSibling();
                    Document pdoc = Jsoup.parse(parent.outerHtml());
                    String shortDesc = pdoc.text();
                    Job job = new Job();
                    job.id = code;
                    job.title = title;
                    job.shortDesc = shortDesc;
                    jobs.add(job);
                }
              
                elements = doc.select("input[name=cmdNextPage]");
                if (elements.size() == 0) {
                    stop = true;
                } else {
                    element = elements.get(0);
                    String val = element.attr("onclick");
                    int pos = val.indexOf("action='");
                    if (pos < 0) {
                        stop = true;
                        continue;
                    }
                    val = val.substring(pos + 8);
                    pos = val.lastIndexOf("'");
                    if (pos < 0) {
                        stop = true;
                        continue;
                    }
                    val = val.substring(0, pos);
                    link = val;
                    pageno++;
                    if (pageno >= maxpage) {
                        stop = true;
                    }
                }
            }
          
            for (int i = 0; i < jobs.size(); i++) {
                Job job = jobs.get(i);
                link = "http://vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=" + job.id + "&intProjectTab_TabId=1";
                Document doc = Jsoup.parse(new URL(link), 60000);
                Elements elements =  null;
                Element element = null;
                element = doc.select("#idTabstripContent").first();
                if (element != null) {
                    elements = element.select("td");
                    String[] fields = new String[] { "Non-action ratio:", "Employer security verifications:", "Approved on:", "Bidding closes:", "Viewed (by workers):", "Deadline:", "Phase:", "Sourcing type:", "Payment Model:", "Max Accepted Bid:", "Expert Guarantee:", "Estimated size:", "Bidding type:", "Accepted bidder economy type(s):", "Accepted english fluency(ies):", "ExpertRating requirement:", "Project management:" };
                    List<String> fieldList = new ArrayList<String>();
                    for (int j = 0; j < fields.length; j++) {
                        fieldList.add(fields[j]);
                    }
                    for (int j = 0; j < elements.size(); j++) {
                        String field = elements.get(j).text();
                        String value = "";
                        if (j + 1 < elements.size()) {
                            value = elements.get(j + 1).text();
                        }
                        if (fieldList.indexOf(field) >= 0) {
                            job.data.put(field, value);
                        }
                    }
                    elements = doc.select("b");
                    for (int j = 0; j < elements.size(); j++) {
                        element = elements.get(j);
                        String name = element.text().trim();
                        if ("Brief summary:".equals(name)) {
                            element = element.nextElementSibling().nextElementSibling();
                            if (element.html().indexOf("<a name=\"NoWorkInAdvance\">") >= 0) {
                                element = element.nextElementSibling();
                            }
                            job.longDesc = element.html();
                        }
                    }
                }
            }
        } catch (Exception e) {
            logger.error("", e);
        }
        return jobs;
    }
     
    public static class Job {
        public String id = "";
        public String title = "";
        public String shortDesc = "";
        public String longDesc = "";
        public Map<String, String> data = new HashMap<String, String>();
         
        public String toString() {
            String tag = "\r\n";
            tag += "Id: " + id + "\r\n";
            tag += "Title: " + title + "\r\n";
            tag += "Short Desc: " + shortDesc + "\r\n";
            tag += "Long Desc: " + longDesc + "\r\n";
            for (String key : data.keySet()) {
                tag += key + " " + data.get(key) + "\r\n";
            }
            return tag;
        }
    }
}
    

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment