Monday, 20 February 2012

Grab jobs from vWorker

Grab jobs from vWorker
This task use java and jsoup to grab jobs from vWorker
Grab jobs from vWorker
  1. Create vWorker class as following
  2. Call vWorker.grab() method as following
Call vWorker.grab() method
1List<vWorker.Job> jobs = vWorker.grab(1);
2for (int i = 0; i < jobs.size(); i++) {
3 vWorker.Job job = jobs.get(i);
4 logger.info(job.toString());
5}
List<vWorker.Job> jobs = vWorker.grab(1);
for (int i = 0; i < jobs.size(); i++) {
    vWorker.Job job = jobs.get(i);
    logger.info(job.toString());
}
vWorker class
1public class vWorker {
2
3 private static Logger logger = Logger.getLogger(vWorker.class);
4
5 public static List<Job> grab(int maxpage) {
6 List<Job> jobs = new ArrayList<Job>();
7 try {
8 boolean stop = false;
9 int pageno = 0;
10 String link = "http://www.vworker.com/RentACoder/DotNet/misc/BidRequests/ShowBidRequests.aspx?lngBidRequestListType=3&optSortTitle=2&lngBidRequestCategoryId=-1&txtMaxNumberOfEntriesPerPage=10&optBidRequestPhase=2&lngSortColumn=-6&blnModeVerbose=True&optBiddingExpiration=1&intTabSelectedId=2";
11 while (!stop) {
12 Document doc = Jsoup.parse(new URL(link), 60000);
13 Elements elements = null;
14 Element element = null;
15
16 elements = doc.select("a");
17 for (int i = 0; i < elements.size(); i++) {
18 element = elements.get(i);
19 String val = element.attr("href");
20 String pattern = "/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=";
21 int pos = val.indexOf(pattern);
22 if (pos < 0) continue;
23 String code = val.substring(pos + pattern.length());
24 String title = element.text();
25 if (!element.parent().tagName().equals("font")) continue;
26 if (!element.parent().parent().tagName().equals("td")) continue;
27 Node parent = element.parent().parent().parent().nextSibling().nextSibling();
28 Document pdoc = Jsoup.parse(parent.outerHtml());
29 String shortDesc = pdoc.text();
30 Job job = new Job();
31 job.id = code;
32 job.title = title;
33 job.shortDesc = shortDesc;
34 jobs.add(job);
35 }
36
37 elements = doc.select("input[name=cmdNextPage]");
38 if (elements.size() == 0) {
39 stop = true;
40 } else {
41 element = elements.get(0);
42 String val = element.attr("onclick");
43 int pos = val.indexOf("action='");
44 if (pos < 0) {
45 stop = true;
46 continue;
47 }
48 val = val.substring(pos + 8);
49 pos = val.lastIndexOf("'");
50 if (pos < 0) {
51 stop = true;
52 continue;
53 }
54 val = val.substring(0, pos);
55 link = val;
56 pageno++;
57 if (pageno >= maxpage) {
58 stop = true;
59 }
60 }
61 }
62
63 for (int i = 0; i < jobs.size(); i++) {
64 Job job = jobs.get(i);
65 link = "http://vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=" + job.id + "&intProjectTab_TabId=1";
66 Document doc = Jsoup.parse(new URL(link), 60000);
67 Elements elements = null;
68 Element element = null;
69 element = doc.select("#idTabstripContent").first();
70 if (element != null) {
71 elements = element.select("td");
72 String[] fields = new String[] { "Non-action ratio:", "Employer security verifications:", "Approved on:", "Bidding closes:", "Viewed (by workers):", "Deadline:", "Phase:", "Sourcing type:", "Payment Model:", "Max Accepted Bid:", "Expert Guarantee:", "Estimated size:", "Bidding type:", "Accepted bidder economy type(s):", "Accepted english fluency(ies):", "ExpertRating requirement:", "Project management:" };
73 List<String> fieldList = new ArrayList<String>();
74 for (int j = 0; j < fields.length; j++) {
75 fieldList.add(fields[j]);
76 }
77 for (int j = 0; j < elements.size(); j++) {
78 String field = elements.get(j).text();
79 String value = "";
80 if (j + 1 < elements.size()) {
81 value = elements.get(j + 1).text();
82 }
83 if (fieldList.indexOf(field) >= 0) {
84 job.data.put(field, value);
85 }
86 }
87 elements = doc.select("b");
88 for (int j = 0; j < elements.size(); j++) {
89 element = elements.get(j);
90 String name = element.text().trim();
91 if ("Brief summary:".equals(name)) {
92 element = element.nextElementSibling().nextElementSibling();
93 if (element.html().indexOf("<a name=\"NoWorkInAdvance\">") >= 0) {
94 element = element.nextElementSibling();
95 }
96 job.longDesc = element.html();
97 }
98 }
99 }
100 }
101 } catch (Exception e) {
102 logger.error("", e);
103 }
104 return jobs;
105 }
106
107 public static class Job {
108 public String id = "";
109 public String title = "";
110 public String shortDesc = "";
111 public String longDesc = "";
112 public Map<String, String> data = new HashMap<String, String>();
113
114 public String toString() {
115 String tag = "\r\n";
116 tag += "Id: " + id + "\r\n";
117 tag += "Title: " + title + "\r\n";
118 tag += "Short Desc: " + shortDesc + "\r\n";
119 tag += "Long Desc: " + longDesc + "\r\n";
120 for (String key : data.keySet()) {
121 tag += key + " " + data.get(key) + "\r\n";
122 }
123 return tag;
124 }
125 }
126}
public class vWorker {

    private static Logger logger = Logger.getLogger(vWorker.class);
     
    public static List<Job> grab(int maxpage) {
        List<Job> jobs = new ArrayList<Job>();
        try {
            boolean stop = false;
            int pageno = 0;
            String link = "http://www.vworker.com/RentACoder/DotNet/misc/BidRequests/ShowBidRequests.aspx?lngBidRequestListType=3&optSortTitle=2&lngBidRequestCategoryId=-1&txtMaxNumberOfEntriesPerPage=10&optBidRequestPhase=2&lngSortColumn=-6&blnModeVerbose=True&optBiddingExpiration=1&intTabSelectedId=2";
            while (!stop) {
                Document doc = Jsoup.parse(new URL(link), 60000);
                Elements elements =  null;
                Element element = null;
              
                elements = doc.select("a");
                for (int i = 0; i < elements.size(); i++) {
                    element = elements.get(i);
                    String val = element.attr("href");
                    String pattern = "/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=";
                    int pos = val.indexOf(pattern);
                    if (pos < 0) continue;
                    String code = val.substring(pos + pattern.length());
                    String title = element.text();
                    if (!element.parent().tagName().equals("font")) continue;
                    if (!element.parent().parent().tagName().equals("td")) continue;
                    Node parent = element.parent().parent().parent().nextSibling().nextSibling();
                    Document pdoc = Jsoup.parse(parent.outerHtml());
                    String shortDesc = pdoc.text();
                    Job job = new Job();
                    job.id = code;
                    job.title = title;
                    job.shortDesc = shortDesc;
                    jobs.add(job);
                }
              
                elements = doc.select("input[name=cmdNextPage]");
                if (elements.size() == 0) {
                    stop = true;
                } else {
                    element = elements.get(0);
                    String val = element.attr("onclick");
                    int pos = val.indexOf("action='");
                    if (pos < 0) {
                        stop = true;
                        continue;
                    }
                    val = val.substring(pos + 8);
                    pos = val.lastIndexOf("'");
                    if (pos < 0) {
                        stop = true;
                        continue;
                    }
                    val = val.substring(0, pos);
                    link = val;
                    pageno++;
                    if (pageno >= maxpage) {
                        stop = true;
                    }
                }
            }
          
            for (int i = 0; i < jobs.size(); i++) {
                Job job = jobs.get(i);
                link = "http://vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=" + job.id + "&intProjectTab_TabId=1";
                Document doc = Jsoup.parse(new URL(link), 60000);
                Elements elements =  null;
                Element element = null;
                element = doc.select("#idTabstripContent").first();
                if (element != null) {
                    elements = element.select("td");
                    String[] fields = new String[] { "Non-action ratio:", "Employer security verifications:", "Approved on:", "Bidding closes:", "Viewed (by workers):", "Deadline:", "Phase:", "Sourcing type:", "Payment Model:", "Max Accepted Bid:", "Expert Guarantee:", "Estimated size:", "Bidding type:", "Accepted bidder economy type(s):", "Accepted english fluency(ies):", "ExpertRating requirement:", "Project management:" };
                    List<String> fieldList = new ArrayList<String>();
                    for (int j = 0; j < fields.length; j++) {
                        fieldList.add(fields[j]);
                    }
                    for (int j = 0; j < elements.size(); j++) {
                        String field = elements.get(j).text();
                        String value = "";
                        if (j + 1 < elements.size()) {
                            value = elements.get(j + 1).text();
                        }
                        if (fieldList.indexOf(field) >= 0) {
                            job.data.put(field, value);
                        }
                    }
                    elements = doc.select("b");
                    for (int j = 0; j < elements.size(); j++) {
                        element = elements.get(j);
                        String name = element.text().trim();
                        if ("Brief summary:".equals(name)) {
                            element = element.nextElementSibling().nextElementSibling();
                            if (element.html().indexOf("<a name=\"NoWorkInAdvance\">") >= 0) {
                                element = element.nextElementSibling();
                            }
                            job.longDesc = element.html();
                        }
                    }
                }
            }
        } catch (Exception e) {
            logger.error("", e);
        }
        return jobs;
    }
     
    public static class Job {
        public String id = "";
        public String title = "";
        public String shortDesc = "";
        public String longDesc = "";
        public Map<String, String> data = new HashMap<String, String>();
         
        public String toString() {
            String tag = "\r\n";
            tag += "Id: " + id + "\r\n";
            tag += "Title: " + title + "\r\n";
            tag += "Short Desc: " + shortDesc + "\r\n";
            tag += "Long Desc: " + longDesc + "\r\n";
            for (String key : data.keySet()) {
                tag += key + " " + data.get(key) + "\r\n";
            }
            return tag;
        }
    }
}

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment