Grab vBulletin pages in logged-in mode
This task use javascript sandbox with jsoup support to grab vBulletin pages in logged-in mode.
Grab vBulletin pages in logged-in mode
- Create javascript sandbox with jsoup support
- Create javascript as following
javascript
1 | function main(env, args) { |
2 | var links = args.get('links'); |
3 | var username = ''; |
4 | var password = ''; |
5 | var conn; |
6 | try { |
7 | conn = env.newJsoup().connect('http://www.sitepoint.com/forums/login.php?do=login'); |
8 | conn.userAgent('Mozilla/5.0 (Windows NT x.y; rv:10.0.1) Gecko/20100101 Firefox/10.0.1'); |
9 | conn.data('do', 'login'); |
10 | conn.data('vb_login_username', username); |
11 | conn.data('vb_login_password', password); |
12 | conn.data('vb_login_md5password', hex_md5(str_to_ent(trim(password)))); |
13 | conn.data('securitytoken', 'guest'); |
14 | conn.data('cookieuser', '1'); |
15 | var doc = conn.post(); |
16 | var ele = doc.select('.standard_error .restore').first(); |
17 | if (ele == null) { |
18 | env.error('Cannot login!'); |
19 | return; |
20 | } |
21 | if (!ele.text().startsWith('Thank you for logging in')) { |
22 | env.error('Cannot login!'); |
23 | return; |
24 | } |
25 | var cookies = conn.getCookies(); |
26 | |
27 | var url = env.newURL('http://www.sitepoint.com/forums/subscription.php'); |
28 | conn = env.newJsoup().connect(url); |
29 | conn.userAgent('Mozilla/5.0 (Windows NT x.y; rv:10.0.1) Gecko/20100101 Firefox/10.0.1'); |
30 | conn.cookies(cookies); |
31 | doc = conn.get(); |
32 | |
33 | var elements = doc.select('#threads .threadbit'); |
34 | for (var i = 0; i < elements.size(); i++) { |
35 | var element = elements.get(i); |
36 | var item = env.newHashMap(); |
37 | var child = element.select('.threadtitle .title').first(); |
38 | item.put('title', child.text()); |
39 | var href = env.newURL(url, child.attr('href')); |
40 | item.put('link', href + ''); |
41 | links.add(item); |
42 | } |
43 | |
44 | } catch (e) { |
45 | env.error(e); |
46 | } |
47 | } |
48 | |
49 | var hexcase=0; |
50 | var b64pad=""; |
51 | var chrsz=8; |
52 | function hex_md5(A){ |
53 | return binl2hex(core_md5(str2binl(A),A.length*chrsz)) |
54 | } |
55 | function b64_md5(A){ |
56 | return binl2b64(core_md5(str2binl(A),A.length*chrsz)) |
57 | } |
58 | function str_md5(A){ |
59 | return binl2str(core_md5(str2binl(A),A.length*chrsz)) |
60 | } |
61 | function hex_hmac_md5(A,B){ |
62 | return binl2hex(core_hmac_md5(A,B)) |
63 | } |
64 | function b64_hmac_md5(A,B){ |
65 | return binl2b64(core_hmac_md5(A,B)) |
66 | } |
67 | function str_hmac_md5(A,B){ |
68 | return binl2str(core_hmac_md5(A,B)) |
69 | } |
70 | function core_md5(K,F){ |
71 | K[F>>5]|=128<<((F)%32); |
72 | K[(((F+64)>>>9)<<4)+14]=F; |
73 | var J=1732584193; |
74 | var I=-271733879; |
75 | var H=-1732584194; |
76 | var G=271733878; |
77 | for(var C=0;C<K.length;C+=16){ |
78 | var E=J; |
79 | var D=I; |
80 | var B=H; |
81 | var A=G; |
82 | J=md5_ff(J,I,H,G,K[C+0],7,-680876936); |
83 | G=md5_ff(G,J,I,H,K[C+1],12,-389564586); |
84 | H=md5_ff(H,G,J,I,K[C+2],17,606105819); |
85 | I=md5_ff(I,H,G,J,K[C+3],22,-1044525330); |
86 | J=md5_ff(J,I,H,G,K[C+4],7,-176418897); |
87 | G=md5_ff(G,J,I,H,K[C+5],12,1200080426); |
88 | H=md5_ff(H,G,J,I,K[C+6],17,-1473231341); |
89 | I=md5_ff(I,H,G,J,K[C+7],22,-45705983); |
90 | J=md5_ff(J,I,H,G,K[C+8],7,1770035416); |
91 | G=md5_ff(G,J,I,H,K[C+9],12,-1958414417); |
92 | H=md5_ff(H,G,J,I,K[C+10],17,-42063); |
93 | I=md5_ff(I,H,G,J,K[C+11],22,-1990404162); |
94 | J=md5_ff(J,I,H,G,K[C+12],7,1804603682); |
95 | G=md5_ff(G,J,I,H,K[C+13],12,-40341101); |
96 | H=md5_ff(H,G,J,I,K[C+14],17,-1502002290); |
97 | I=md5_ff(I,H,G,J,K[C+15],22,1236535329); |
98 | J=md5_gg(J,I,H,G,K[C+1],5,-165796510); |
99 | G=md5_gg(G,J,I,H,K[C+6],9,-1069501632); |
100 | H=md5_gg(H,G,J,I,K[C+11],14,643717713); |
101 | I=md5_gg(I,H,G,J,K[C+0],20,-373897302); |
102 | J=md5_gg(J,I,H,G,K[C+5],5,-701558691); |
103 | G=md5_gg(G,J,I,H,K[C+10],9,38016083); |
104 | H=md5_gg(H,G,J,I,K[C+15],14,-660478335); |
105 | I=md5_gg(I,H,G,J,K[C+4],20,-405537848); |
106 | J=md5_gg(J,I,H,G,K[C+9],5,568446438); |
107 | G=md5_gg(G,J,I,H,K[C+14],9,-1019803690); |
108 | H=md5_gg(H,G,J,I,K[C+3],14,-187363961); |
109 | I=md5_gg(I,H,G,J,K[C+8],20,1163531501); |
110 | J=md5_gg(J,I,H,G,K[C+13],5,-1444681467); |
111 | G=md5_gg(G,J,I,H,K[C+2],9,-51403784); |
112 | H=md5_gg(H,G,J,I,K[C+7],14,1735328473); |
113 | I=md5_gg(I,H,G,J,K[C+12],20,-1926607734); |
114 | J=md5_hh(J,I,H,G,K[C+5],4,-378558); |
115 | G=md5_hh(G,J,I,H,K[C+8],11,-2022574463); |
116 | H=md5_hh(H,G,J,I,K[C+11],16,1839030562); |
117 | I=md5_hh(I,H,G,J,K[C+14],23,-35309556); |
118 | J=md5_hh(J,I,H,G,K[C+1],4,-1530992060); |
119 | G=md5_hh(G,J,I,H,K[C+4],11,1272893353); |
120 | H=md5_hh(H,G,J,I,K[C+7],16,-155497632); |
121 | I=md5_hh(I,H,G,J,K[C+10],23,-1094730640); |
122 | J=md5_hh(J,I,H,G,K[C+13],4,681279174); |
123 | G=md5_hh(G,J,I,H,K[C+0],11,-358537222); |
124 | H=md5_hh(H,G,J,I,K[C+3],16,-722521979); |
125 | I=md5_hh(I,H,G,J,K[C+6],23,76029189); |
126 | J=md5_hh(J,I,H,G,K[C+9],4,-640364487); |
127 | G=md5_hh(G,J,I,H,K[C+12],11,-421815835); |
128 | H=md5_hh(H,G,J,I,K[C+15],16,530742520); |
129 | I=md5_hh(I,H,G,J,K[C+2],23,-995338651); |
130 | J=md5_ii(J,I,H,G,K[C+0],6,-198630844); |
131 | G=md5_ii(G,J,I,H,K[C+7],10,1126891415); |
132 | H=md5_ii(H,G,J,I,K[C+14],15,-1416354905); |
133 | I=md5_ii(I,H,G,J,K[C+5],21,-57434055); |
134 | J=md5_ii(J,I,H,G,K[C+12],6,1700485571); |
135 | G=md5_ii(G,J,I,H,K[C+3],10,-1894986606); |
136 | H=md5_ii(H,G,J,I,K[C+10],15,-1051523); |
137 | I=md5_ii(I,H,G,J,K[C+1],21,-2054922799); |
138 | J=md5_ii(J,I,H,G,K[C+8],6,1873313359); |
139 | G=md5_ii(G,J,I,H,K[C+15],10,-30611744); |
140 | H=md5_ii(H,G,J,I,K[C+6],15,-1560198380); |
141 | I=md5_ii(I,H,G,J,K[C+13],21,1309151649); |
142 | J=md5_ii(J,I,H,G,K[C+4],6,-145523070); |
143 | G=md5_ii(G,J,I,H,K[C+11],10,-1120210379); |
144 | H=md5_ii(H,G,J,I,K[C+2],15,718787259); |
145 | I=md5_ii(I,H,G,J,K[C+9],21,-343485551); |
146 | J=safe_add(J,E); |
147 | I=safe_add(I,D); |
148 | H=safe_add(H,B); |
149 | G=safe_add(G,A) |
150 | } |
151 | return Array(J,I,H,G) |
152 | } |
153 | function md5_cmn(F,C,B,A,E,D){ |
154 | return safe_add(bit_rol(safe_add(safe_add(C,F),safe_add(A,D)),E),B) |
155 | } |
156 | function md5_ff(C,B,G,F,A,E,D){ |
157 | return md5_cmn((B&G)|((~B)&F),C,B,A,E,D) |
158 | } |
159 | function md5_gg(C,B,G,F,A,E,D){ |
160 | return md5_cmn((B&F)|(G&(~F)),C,B,A,E,D) |
161 | } |
162 | function md5_hh(C,B,G,F,A,E,D){ |
163 | return md5_cmn(B^G^F,C,B,A,E,D) |
164 | } |
165 | function md5_ii(C,B,G,F,A,E,D){ |
166 | return md5_cmn(G^(B|(~F)),C,B,A,E,D) |
167 | } |
168 | function core_hmac_md5(C,F){ |
169 | var E=str2binl(C); |
170 | if(E.length>16){ |
171 | E=core_md5(E,C.length*chrsz) |
172 | } |
173 | var A=Array(16),D=Array(16); |
174 | for(var B=0;B<16;B++){ |
175 | A[B]=E[B]^909522486;D[B]=E[B]^1549556828 |
176 | } |
177 | var G=core_md5(A.concat(str2binl(F)),512+F.length*chrsz); |
178 | return core_md5(D.concat(G),512+128) |
179 | } |
180 | function safe_add(A,D){ |
181 | var C=(A&65535)+(D&65535); |
182 | var B=(A>>16)+(D>>16)+(C>>16); |
183 | return(B<<16)|(C&65535) |
184 | } |
185 | function bit_rol(A,B){ |
186 | return(A<<B)|(A>>>(32-B)) |
187 | } |
188 | function str2binl(D){ |
189 | var C=new Array(); |
190 | var A=(1<<chrsz)-1; |
191 | for(var B=0;B<D.length*chrsz;B+=chrsz){ |
192 | C[B>>5]|=(D.charCodeAt(B/chrsz)&A)<<(B%32) |
193 | } |
194 | return C |
195 | } |
196 | function binl2str(C){ |
197 | var D=""; |
198 | var A=(1<<chrsz)-1; |
199 | for(var B=0;B<C.length*32;B+=chrsz){ |
200 | D+=String.fromCharCode((C[B>>5]>>>(B%32))&A) |
201 | } |
202 | return D |
203 | } |
204 | function binl2hex(C){ |
205 | var B=hexcase?"0123456789ABCDEF":"0123456789abcdef"; |
206 | var D=""; |
207 | for(var A=0;A<C.length*4;A++){ |
208 | D+=B.charAt((C[A>>2]>>((A%4)*8+4))&15)+B.charAt((C[A>>2]>>((A%4)*8))&15) |
209 | } |
210 | return D |
211 | } |
212 | function binl2b64(D){ |
213 | var C="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
214 | var F=""; |
215 | for(var B=0;B<D.length*4;B+=3){ |
216 | var E=(((D[B>>2]>>8*(B%4))&255)<<16)|(((D[B+1>>2]>>8*((B+1)%4))&255)<<8)|((D[B+2>>2]>>8*((B+2)%4))&255); |
217 | for(var A=0;A<4;A++){ |
218 | if(B*8+A*6>D.length*32){ |
219 | F+=b64pad |
220 | }else{ |
221 | F+=C.charAt((E>>6*(3-A))&63) |
222 | } |
223 | } |
224 | } |
225 | return F |
226 | } |
227 | function str_to_ent(D){ |
228 | var A=""; |
229 | var C; |
230 | for(C=0;C<D.length;C++){ |
231 | var E=D.charCodeAt(C); |
232 | var B=""; |
233 | if(E>255){ |
234 | while(E>=1){ |
235 | B="0123456789".charAt(E%10)+B;E=E/10 |
236 | } |
237 | if(B==""){B="0"} |
238 | B="#"+B; |
239 | B="&"+B;B=B+";"; |
240 | A+=B |
241 | }else{ |
242 | A+=D.charAt(C) |
243 | } |
244 | } |
245 | return A |
246 | } |
247 | function trim(A){ |
248 | while(A.substring(0,1)==" "){ |
249 | A=A.substring(1,A.length) |
250 | } |
251 | while(A.substring(A.length-1,A.length)==" "){ |
252 | A=A.substring(0,A.length-1) |
253 | } |
254 | return A |
255 | } |
function main(env, args) { var links = args.get('links'); var username = ''; var password = ''; var conn; try { conn = env.newJsoup().connect('http://www.sitepoint.com/forums/login.php?do=login'); conn.userAgent('Mozilla/5.0 (Windows NT x.y; rv:10.0.1) Gecko/20100101 Firefox/10.0.1'); conn.data('do', 'login'); conn.data('vb_login_username', username); conn.data('vb_login_password', password); conn.data('vb_login_md5password', hex_md5(str_to_ent(trim(password)))); conn.data('securitytoken', 'guest'); conn.data('cookieuser', '1'); var doc = conn.post(); var ele = doc.select('.standard_error .restore').first(); if (ele == null) { env.error('Cannot login!'); return; } if (!ele.text().startsWith('Thank you for logging in')) { env.error('Cannot login!'); return; } var cookies = conn.getCookies(); var url = env.newURL('http://www.sitepoint.com/forums/subscription.php'); conn = env.newJsoup().connect(url); conn.userAgent('Mozilla/5.0 (Windows NT x.y; rv:10.0.1) Gecko/20100101 Firefox/10.0.1'); conn.cookies(cookies); doc = conn.get(); var elements = doc.select('#threads .threadbit'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var item = env.newHashMap(); var child = element.select('.threadtitle .title').first(); item.put('title', child.text()); var href = env.newURL(url, child.attr('href')); item.put('link', href + ''); links.add(item); } } catch (e) { env.error(e); } } var hexcase=0; var b64pad=""; var chrsz=8; function hex_md5(A){ return binl2hex(core_md5(str2binl(A),A.length*chrsz)) } function b64_md5(A){ return binl2b64(core_md5(str2binl(A),A.length*chrsz)) } function str_md5(A){ return binl2str(core_md5(str2binl(A),A.length*chrsz)) } function hex_hmac_md5(A,B){ return binl2hex(core_hmac_md5(A,B)) } function b64_hmac_md5(A,B){ return binl2b64(core_hmac_md5(A,B)) } function str_hmac_md5(A,B){ return binl2str(core_hmac_md5(A,B)) } function core_md5(K,F){ K[F>>5]|=128<<((F)%32); K[(((F+64)>>>9)<<4)+14]=F; var J=1732584193; var I=-271733879; var H=-1732584194; var G=271733878; for(var C=0;C<K.length;C+=16){ var E=J; var D=I; var B=H; var A=G; J=md5_ff(J,I,H,G,K[C+0],7,-680876936); G=md5_ff(G,J,I,H,K[C+1],12,-389564586); H=md5_ff(H,G,J,I,K[C+2],17,606105819); I=md5_ff(I,H,G,J,K[C+3],22,-1044525330); J=md5_ff(J,I,H,G,K[C+4],7,-176418897); G=md5_ff(G,J,I,H,K[C+5],12,1200080426); H=md5_ff(H,G,J,I,K[C+6],17,-1473231341); I=md5_ff(I,H,G,J,K[C+7],22,-45705983); J=md5_ff(J,I,H,G,K[C+8],7,1770035416); G=md5_ff(G,J,I,H,K[C+9],12,-1958414417); H=md5_ff(H,G,J,I,K[C+10],17,-42063); I=md5_ff(I,H,G,J,K[C+11],22,-1990404162); J=md5_ff(J,I,H,G,K[C+12],7,1804603682); G=md5_ff(G,J,I,H,K[C+13],12,-40341101); H=md5_ff(H,G,J,I,K[C+14],17,-1502002290); I=md5_ff(I,H,G,J,K[C+15],22,1236535329); J=md5_gg(J,I,H,G,K[C+1],5,-165796510); G=md5_gg(G,J,I,H,K[C+6],9,-1069501632); H=md5_gg(H,G,J,I,K[C+11],14,643717713); I=md5_gg(I,H,G,J,K[C+0],20,-373897302); J=md5_gg(J,I,H,G,K[C+5],5,-701558691); G=md5_gg(G,J,I,H,K[C+10],9,38016083); H=md5_gg(H,G,J,I,K[C+15],14,-660478335); I=md5_gg(I,H,G,J,K[C+4],20,-405537848); J=md5_gg(J,I,H,G,K[C+9],5,568446438); G=md5_gg(G,J,I,H,K[C+14],9,-1019803690); H=md5_gg(H,G,J,I,K[C+3],14,-187363961); I=md5_gg(I,H,G,J,K[C+8],20,1163531501); J=md5_gg(J,I,H,G,K[C+13],5,-1444681467); G=md5_gg(G,J,I,H,K[C+2],9,-51403784); H=md5_gg(H,G,J,I,K[C+7],14,1735328473); I=md5_gg(I,H,G,J,K[C+12],20,-1926607734); J=md5_hh(J,I,H,G,K[C+5],4,-378558); G=md5_hh(G,J,I,H,K[C+8],11,-2022574463); H=md5_hh(H,G,J,I,K[C+11],16,1839030562); I=md5_hh(I,H,G,J,K[C+14],23,-35309556); J=md5_hh(J,I,H,G,K[C+1],4,-1530992060); G=md5_hh(G,J,I,H,K[C+4],11,1272893353); H=md5_hh(H,G,J,I,K[C+7],16,-155497632); I=md5_hh(I,H,G,J,K[C+10],23,-1094730640); J=md5_hh(J,I,H,G,K[C+13],4,681279174); G=md5_hh(G,J,I,H,K[C+0],11,-358537222); H=md5_hh(H,G,J,I,K[C+3],16,-722521979); I=md5_hh(I,H,G,J,K[C+6],23,76029189); J=md5_hh(J,I,H,G,K[C+9],4,-640364487); G=md5_hh(G,J,I,H,K[C+12],11,-421815835); H=md5_hh(H,G,J,I,K[C+15],16,530742520); I=md5_hh(I,H,G,J,K[C+2],23,-995338651); J=md5_ii(J,I,H,G,K[C+0],6,-198630844); G=md5_ii(G,J,I,H,K[C+7],10,1126891415); H=md5_ii(H,G,J,I,K[C+14],15,-1416354905); I=md5_ii(I,H,G,J,K[C+5],21,-57434055); J=md5_ii(J,I,H,G,K[C+12],6,1700485571); G=md5_ii(G,J,I,H,K[C+3],10,-1894986606); H=md5_ii(H,G,J,I,K[C+10],15,-1051523); I=md5_ii(I,H,G,J,K[C+1],21,-2054922799); J=md5_ii(J,I,H,G,K[C+8],6,1873313359); G=md5_ii(G,J,I,H,K[C+15],10,-30611744); H=md5_ii(H,G,J,I,K[C+6],15,-1560198380); I=md5_ii(I,H,G,J,K[C+13],21,1309151649); J=md5_ii(J,I,H,G,K[C+4],6,-145523070); G=md5_ii(G,J,I,H,K[C+11],10,-1120210379); H=md5_ii(H,G,J,I,K[C+2],15,718787259); I=md5_ii(I,H,G,J,K[C+9],21,-343485551); J=safe_add(J,E); I=safe_add(I,D); H=safe_add(H,B); G=safe_add(G,A) } return Array(J,I,H,G) } function md5_cmn(F,C,B,A,E,D){ return safe_add(bit_rol(safe_add(safe_add(C,F),safe_add(A,D)),E),B) } function md5_ff(C,B,G,F,A,E,D){ return md5_cmn((B&G)|((~B)&F),C,B,A,E,D) } function md5_gg(C,B,G,F,A,E,D){ return md5_cmn((B&F)|(G&(~F)),C,B,A,E,D) } function md5_hh(C,B,G,F,A,E,D){ return md5_cmn(B^G^F,C,B,A,E,D) } function md5_ii(C,B,G,F,A,E,D){ return md5_cmn(G^(B|(~F)),C,B,A,E,D) } function core_hmac_md5(C,F){ var E=str2binl(C); if(E.length>16){ E=core_md5(E,C.length*chrsz) } var A=Array(16),D=Array(16); for(var B=0;B<16;B++){ A[B]=E[B]^909522486;D[B]=E[B]^1549556828 } var G=core_md5(A.concat(str2binl(F)),512+F.length*chrsz); return core_md5(D.concat(G),512+128) } function safe_add(A,D){ var C=(A&65535)+(D&65535); var B=(A>>16)+(D>>16)+(C>>16); return(B<<16)|(C&65535) } function bit_rol(A,B){ return(A<<B)|(A>>>(32-B)) } function str2binl(D){ var C=new Array(); var A=(1<<chrsz)-1; for(var B=0;B<D.length*chrsz;B+=chrsz){ C[B>>5]|=(D.charCodeAt(B/chrsz)&A)<<(B%32) } return C } function binl2str(C){ var D=""; var A=(1<<chrsz)-1; for(var B=0;B<C.length*32;B+=chrsz){ D+=String.fromCharCode((C[B>>5]>>>(B%32))&A) } return D } function binl2hex(C){ var B=hexcase?"0123456789ABCDEF":"0123456789abcdef"; var D=""; for(var A=0;A<C.length*4;A++){ D+=B.charAt((C[A>>2]>>((A%4)*8+4))&15)+B.charAt((C[A>>2]>>((A%4)*8))&15) } return D } function binl2b64(D){ var C="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; var F=""; for(var B=0;B<D.length*4;B+=3){ var E=(((D[B>>2]>>8*(B%4))&255)<<16)|(((D[B+1>>2]>>8*((B+1)%4))&255)<<8)|((D[B+2>>2]>>8*((B+2)%4))&255); for(var A=0;A<4;A++){ if(B*8+A*6>D.length*32){ F+=b64pad }else{ F+=C.charAt((E>>6*(3-A))&63) } } } return F } function str_to_ent(D){ var A=""; var C; for(C=0;C<D.length;C++){ var E=D.charCodeAt(C); var B=""; if(E>255){ while(E>=1){ B="0123456789".charAt(E%10)+B;E=E/10 } if(B==""){B="0"} B="#"+B; B="&"+B;B=B+";"; A+=B }else{ A+=D.charAt(C) } } return A } function trim(A){ while(A.substring(0,1)==" "){ A=A.substring(1,A.length) } while(A.substring(A.length-1,A.length)==" "){ A=A.substring(0,A.length-1) } return A }
No comments:
Post a Comment