Create javascript sandbox with jsoup support
Create javascript sandbox with jsoup support
- Create com.paesia.schema.script.Machine class as following
- Create com.paesia.schema.script.safe.jsoup.SJsoup class as following
- Create com.paesia.schema.script.safe.jsoup.SConnection class as following
- Create DataHandler class as following
- Create javascript as following
- Call Machine.run() method as following
Call Machine.run() method
com.paesia.schema.script.Machine class
com.paesia.schema.script.safe.jsoup.SJsoup class
com.paesia.schema.script.safe.jsoup.SConnection class
DataHandler class
javascript
1 | String js = loadJS(); |
2 | Map args = new HashMap(); |
3 | List links = new ArrayList(); |
4 | args.put("links", links); |
5 | |
6 | Machine env = new Machine(new DataHandler()); |
7 | Machine.run(env, js, args); |
8 | |
9 | for (int i = 0; i < links.size(); i++) { |
10 | Map item = (Map)links.get(i); |
11 | String line = ""; |
12 | for (Object key : item.keySet()) { |
13 | line += "\r\n" + key + " : " + item.get(key); |
14 | } |
15 | logger.info("\r\n--------------------------------\r\n" + line + "\r\n"); |
16 | } |
String js = loadJS(); Map args = new HashMap(); List links = new ArrayList(); args.put("links", links); Machine env = new Machine(new DataHandler()); Machine.run(env, js, args); for (int i = 0; i < links.size(); i++) { Map item = (Map)links.get(i); String line = ""; for (Object key : item.keySet()) { line += "\r\n" + key + " : " + item.get(key); } logger.info("\r\n--------------------------------\r\n" + line + "\r\n"); }
1 | package com.paesia.schema.script; |
2 | |
3 | import java.net.URL; |
4 | import java.net.URLDecoder; |
5 | import java.net.URLEncoder; |
6 | import java.util.ArrayList; |
7 | import java.util.HashMap; |
8 | import java.util.List; |
9 | import java.util.Map; |
10 | |
11 | import org.mozilla.javascript.ClassShutter; |
12 | import org.mozilla.javascript.Context; |
13 | import org.mozilla.javascript.Function; |
14 | import org.mozilla.javascript.Scriptable; |
15 | |
16 | import com.paesia.schema.script.safe.jsoup.SJsoup; |
17 | |
18 | import java.util.UUID; |
19 | import java.util.Random; |
20 | import java.util.Date; |
21 | import java.text.SimpleDateFormat; |
22 | import java.io.ByteArrayInputStream; |
23 | import java.io.InputStream; |
24 | import javax.mail.internet.MimeUtility; |
25 | import java.io.ByteArrayOutputStream; |
26 | import java.io.OutputStream; |
27 | import java.util.Set; |
28 | |
29 | import java.util.Locale; |
30 | import java.util.TimeZone; |
31 | import java.util.SimpleTimeZone; |
32 | import java.util.Calendar; |
33 | |
34 | public class Machine { |
35 | |
36 | private Handler handler; |
37 | |
38 | public static void run(Machine env, String js, Map args) throws Exception { |
39 | try { |
40 | Context cx = Context.enter(); |
41 | cx.setClassShutter(new ClassShutter() { |
42 | public boolean visibleToScripts(String className) { |
43 | if ("com.paesia.schema.script.Machine".equals(className)) return true; |
44 | if ("java.lang.String".equals(className)) return true; |
45 | if ("java.lang.Object".equals(className)) return true; |
46 | if ("java.util.HashMap".equals(className)) return true; |
47 | if ("java.util.ArrayList".equals(className)) return true; |
48 | if ("java.lang.Byte".equals(className)) return true; |
49 | if ("java.lang.Short".equals(className)) return true; |
50 | if ("java.lang.Integer".equals(className)) return true; |
51 | if ("java.lang.Long".equals(className)) return true; |
52 | if ("java.lang.Float".equals(className)) return true; |
53 | if ("java.lang.Double".equals(className)) return true; |
54 | if ("java.lang.Boolean".equals(className)) return true; |
55 | if ("java.lang.Character".equals(className)) return true; |
56 | if ("java.util.Collection".equals(className)) return true; |
57 | if ("java.util.List".equals(className)) return true; |
58 | if ("java.util.Map".equals(className)) return true; |
59 | if ("java.util.LinkedHashMap".equals(className)) return true; |
60 | if ("java.util.Iterator".equals(className)) return true; |
61 | if ("java.util.ListIterator".equals(className)) return true; |
62 | if ("java.lang.Iterable".equals(className)) return true; |
63 | if ("java.net.URL".equals(className)) return true; |
64 | if (className.startsWith("org.jsoup.nodes.")) return true; |
65 | if (className.startsWith("org.jsoup.select.")) return true; |
66 | if (className.startsWith("org.jsoup.safety.")) return true; |
67 | if (className.startsWith("org.jsoup.parser.")) return true; |
68 | if (className.startsWith("com.paesia.schema.script.safe.")) return true; |
69 | if ("org.jsoup.helper.HttpConnection$Response".equals(className)) return true; |
70 | if ("java.util.Date".equals(className)) return true; |
71 | if ("java.text.SimpleDateFormat".equals(className)) return true; |
72 | if (className.startsWith("java.util.Collections")) return true; |
73 | if ("java.util.LinkedHashSet".equals(className)) return true; |
74 | if ("java.util.Locale".equals(className)) return true; |
75 | if ("java.util.TimeZone".equals(className)) return true; |
76 | if ("java.util.SimpleTimeZone".equals(className)) return true; |
77 | if ("java.util.Calendar".equals(className)) return true; |
78 | if ("java.util.GregorianCalendar".equals(className)) return true; |
79 | return false; |
80 | } |
81 | }); |
82 | Scriptable scope = cx.initStandardObjects(); |
83 | Object result = cx.evaluateString(scope, js, "<js>", 1, null); |
84 | Object fObj = scope.get("main", scope); |
85 | if (!(fObj instanceof Function)) { |
86 | throw new Exception("main() is undefined or not a function."); |
87 | } else { |
88 | Object functionArgs[] = { env, args }; |
89 | Function f = (Function)fObj; |
90 | result = f.call(cx, scope, scope, functionArgs); |
91 | } |
92 | } catch (Exception e) { |
93 | throw e; |
94 | } finally { |
95 | Context.exit(); |
96 | } |
97 | } |
98 | |
99 | public String newString(String src) { |
100 | return src; |
101 | } |
102 | |
103 | public String newString(byte[] src, String charset) throws Exception { |
104 | return new String(src, charset); |
105 | } |
106 | |
107 | public HashMap newHashMap() { |
108 | return new HashMap(); |
109 | } |
110 | |
111 | public ArrayList newArrayList() { |
112 | return new ArrayList(); |
113 | } |
114 | |
115 | public Byte newByte(byte src) { |
116 | return (Byte)src; |
117 | } |
118 | |
119 | public Short newShort(short src) { |
120 | return (Short)src; |
121 | } |
122 | |
123 | public Integer newInteger(int src) { |
124 | return (Integer)src; |
125 | } |
126 | |
127 | public Long newLong(long src) { |
128 | return (Long)src; |
129 | } |
130 | |
131 | public Float newFloat(float src) { |
132 | return (Float)src; |
133 | } |
134 | |
135 | public Double newDouble(double src) { |
136 | return (Double)src; |
137 | } |
138 | |
139 | public Boolean newBoolean(boolean src) { |
140 | return (Boolean)src; |
141 | } |
142 | |
143 | public Character newCharacter(char src) { |
144 | return (Character)src; |
145 | } |
146 | |
147 | public List getKeys(Map src) { |
148 | List tag = new ArrayList(); |
149 | for (Object key : src.keySet()) { |
150 | tag.add(key); |
151 | } |
152 | return tag; |
153 | } |
154 | |
155 | public URL newURL(String protocol, String host, int port, String file) throws Exception { |
156 | return new URL(protocol, host, port, file); |
157 | } |
158 | |
159 | public URL newURL(String protocol, String host, String file) throws Exception { |
160 | return new URL(protocol, host, file); |
161 | } |
162 | |
163 | public URL newURL(String spec) throws Exception { |
164 | return new URL(spec); |
165 | } |
166 | |
167 | public URL newURL(URL context, String spec) throws Exception { |
168 | return new URL(context, spec); |
169 | } |
170 | |
171 | public SJsoup newJsoup() { |
172 | return new SJsoup(); |
173 | } |
174 | |
175 | public String encodeURL(String src, String charset) { |
176 | try { |
177 | return URLEncoder.encode(src, charset); |
178 | } catch (Exception e) { |
179 | return ""; |
180 | } |
181 | } |
182 | |
183 | public String decodeURL(String src, String charset) { |
184 | try { |
185 | return URLDecoder.decode(src, charset); |
186 | } catch (Exception e) { |
187 | return ""; |
188 | } |
189 | } |
190 | |
191 | public String uniqid() { |
192 | return UUID.randomUUID().toString().replaceAll("-", ""); |
193 | } |
194 | |
195 | public String suniqid() { |
196 | Random random = new Random(); |
197 | return Long.toString(Math.abs(random.nextLong()), 36); |
198 | } |
199 | |
200 | public Date newDate() { |
201 | return new Date(); |
202 | } |
203 | |
204 | public Date newDate(long time) { |
205 | return new Date(time); |
206 | } |
207 | |
208 | public SimpleDateFormat newDateFormat(String format) { |
209 | return new SimpleDateFormat(format); |
210 | } |
211 | |
212 | public byte[] decodeBase64(byte[] b) throws Exception { |
213 | ByteArrayInputStream bais = new ByteArrayInputStream(b); |
214 | InputStream b64is = MimeUtility.decode(bais, "base64"); |
215 | byte[] tmp = new byte[b.length]; |
216 | int n = b64is.read(tmp); |
217 | byte[] res = new byte[n]; |
218 | System.arraycopy(tmp, 0, res, 0, n); |
219 | return res; |
220 | } |
221 | |
222 | public byte[] encodeBase64(byte[] b) throws Exception { |
223 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
224 | OutputStream b64os = MimeUtility.encode(baos, "base64"); |
225 | b64os.write(b); |
226 | b64os.close(); |
227 | return baos.toByteArray(); |
228 | } |
229 | |
230 | public List<Object> setToList(Set src) { |
231 | List<Object> tag = new ArrayList<Object>(); |
232 | for (Object item : src) { |
233 | tag.add(item); |
234 | } |
235 | return tag; |
236 | } |
237 | |
238 | public Locale newLocale(String language) { |
239 | return new Locale(language); |
240 | } |
241 | |
242 | public Locale newLocale(String language, String country) { |
243 | return new Locale(language, country); |
244 | } |
245 | |
246 | public Locale newLocale(String language, String country, String variant) { |
247 | return new Locale(language, country, variant); |
248 | } |
249 | |
250 | public TimeZone newTimeZone(int rawOffset, String ID) { |
251 | return new SimpleTimeZone(rawOffset, ID); |
252 | } |
253 | |
254 | public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int endMonth, int endDay, int endDayOfWeek, int endTime) { |
255 | return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, endMonth, endDay, endDayOfWeek, endTime); |
256 | } |
257 | |
258 | public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int endMonth, int endDay, int endDayOfWeek, int endTime, int dstSavings) { |
259 | return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, endMonth, endDay, endDayOfWeek, endTime, dstSavings); |
260 | } |
261 | |
262 | public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int startTimeMode, int endMonth, int endDay, int endDayOfWeek, int endTime, int endTimeMode, int dstSavings) { |
263 | return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, startTimeMode, endMonth, endDay, endDayOfWeek, endTime, endTimeMode, dstSavings); |
264 | } |
265 | |
266 | public Calendar newCalendar() { |
267 | return Calendar.getInstance(); |
268 | } |
269 | |
270 | public Calendar newCalendar(Locale aLocale) { |
271 | return Calendar.getInstance(aLocale); |
272 | } |
273 | |
274 | public Calendar newCalendar(TimeZone zone) { |
275 | return Calendar.getInstance(zone); |
276 | } |
277 | |
278 | public Calendar newCalendar(TimeZone zone, Locale aLocale) { |
279 | return Calendar.getInstance(zone, aLocale); |
280 | } |
281 | |
282 | public Machine(Handler handler) { |
283 | this.handler = handler; |
284 | } |
285 | |
286 | public void debug(String message) { |
287 | if (handler != null) { |
288 | handler.debug(message); |
289 | } |
290 | } |
291 | |
292 | public void error(String message) { |
293 | if (handler != null) { |
294 | handler.error(message); |
295 | } |
296 | } |
297 | |
298 | public void fatal(String message) { |
299 | if (handler != null) { |
300 | handler.fatal(message); |
301 | } |
302 | } |
303 | |
304 | public void info(String message) { |
305 | if (handler != null) { |
306 | handler.info(message); |
307 | } |
308 | } |
309 | |
310 | public static class Handler { |
311 | |
312 | public void debug(String message) { } |
313 | public void error(String message) { } |
314 | public void fatal(String message) { } |
315 | public void info(String message) { } |
316 | |
317 | } |
318 | |
319 | } |
package com.paesia.schema.script; import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.mozilla.javascript.ClassShutter; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.Scriptable; import com.paesia.schema.script.safe.jsoup.SJsoup; import java.util.UUID; import java.util.Random; import java.util.Date; import java.text.SimpleDateFormat; import java.io.ByteArrayInputStream; import java.io.InputStream; import javax.mail.internet.MimeUtility; import java.io.ByteArrayOutputStream; import java.io.OutputStream; import java.util.Set; import java.util.Locale; import java.util.TimeZone; import java.util.SimpleTimeZone; import java.util.Calendar; public class Machine { private Handler handler; public static void run(Machine env, String js, Map args) throws Exception { try { Context cx = Context.enter(); cx.setClassShutter(new ClassShutter() { public boolean visibleToScripts(String className) { if ("com.paesia.schema.script.Machine".equals(className)) return true; if ("java.lang.String".equals(className)) return true; if ("java.lang.Object".equals(className)) return true; if ("java.util.HashMap".equals(className)) return true; if ("java.util.ArrayList".equals(className)) return true; if ("java.lang.Byte".equals(className)) return true; if ("java.lang.Short".equals(className)) return true; if ("java.lang.Integer".equals(className)) return true; if ("java.lang.Long".equals(className)) return true; if ("java.lang.Float".equals(className)) return true; if ("java.lang.Double".equals(className)) return true; if ("java.lang.Boolean".equals(className)) return true; if ("java.lang.Character".equals(className)) return true; if ("java.util.Collection".equals(className)) return true; if ("java.util.List".equals(className)) return true; if ("java.util.Map".equals(className)) return true; if ("java.util.LinkedHashMap".equals(className)) return true; if ("java.util.Iterator".equals(className)) return true; if ("java.util.ListIterator".equals(className)) return true; if ("java.lang.Iterable".equals(className)) return true; if ("java.net.URL".equals(className)) return true; if (className.startsWith("org.jsoup.nodes.")) return true; if (className.startsWith("org.jsoup.select.")) return true; if (className.startsWith("org.jsoup.safety.")) return true; if (className.startsWith("org.jsoup.parser.")) return true; if (className.startsWith("com.paesia.schema.script.safe.")) return true; if ("org.jsoup.helper.HttpConnection$Response".equals(className)) return true; if ("java.util.Date".equals(className)) return true; if ("java.text.SimpleDateFormat".equals(className)) return true; if (className.startsWith("java.util.Collections")) return true; if ("java.util.LinkedHashSet".equals(className)) return true; if ("java.util.Locale".equals(className)) return true; if ("java.util.TimeZone".equals(className)) return true; if ("java.util.SimpleTimeZone".equals(className)) return true; if ("java.util.Calendar".equals(className)) return true; if ("java.util.GregorianCalendar".equals(className)) return true; return false; } }); Scriptable scope = cx.initStandardObjects(); Object result = cx.evaluateString(scope, js, "<js>", 1, null); Object fObj = scope.get("main", scope); if (!(fObj instanceof Function)) { throw new Exception("main() is undefined or not a function."); } else { Object functionArgs[] = { env, args }; Function f = (Function)fObj; result = f.call(cx, scope, scope, functionArgs); } } catch (Exception e) { throw e; } finally { Context.exit(); } } public String newString(String src) { return src; } public String newString(byte[] src, String charset) throws Exception { return new String(src, charset); } public HashMap newHashMap() { return new HashMap(); } public ArrayList newArrayList() { return new ArrayList(); } public Byte newByte(byte src) { return (Byte)src; } public Short newShort(short src) { return (Short)src; } public Integer newInteger(int src) { return (Integer)src; } public Long newLong(long src) { return (Long)src; } public Float newFloat(float src) { return (Float)src; } public Double newDouble(double src) { return (Double)src; } public Boolean newBoolean(boolean src) { return (Boolean)src; } public Character newCharacter(char src) { return (Character)src; } public List getKeys(Map src) { List tag = new ArrayList(); for (Object key : src.keySet()) { tag.add(key); } return tag; } public URL newURL(String protocol, String host, int port, String file) throws Exception { return new URL(protocol, host, port, file); } public URL newURL(String protocol, String host, String file) throws Exception { return new URL(protocol, host, file); } public URL newURL(String spec) throws Exception { return new URL(spec); } public URL newURL(URL context, String spec) throws Exception { return new URL(context, spec); } public SJsoup newJsoup() { return new SJsoup(); } public String encodeURL(String src, String charset) { try { return URLEncoder.encode(src, charset); } catch (Exception e) { return ""; } } public String decodeURL(String src, String charset) { try { return URLDecoder.decode(src, charset); } catch (Exception e) { return ""; } } public String uniqid() { return UUID.randomUUID().toString().replaceAll("-", ""); } public String suniqid() { Random random = new Random(); return Long.toString(Math.abs(random.nextLong()), 36); } public Date newDate() { return new Date(); } public Date newDate(long time) { return new Date(time); } public SimpleDateFormat newDateFormat(String format) { return new SimpleDateFormat(format); } public byte[] decodeBase64(byte[] b) throws Exception { ByteArrayInputStream bais = new ByteArrayInputStream(b); InputStream b64is = MimeUtility.decode(bais, "base64"); byte[] tmp = new byte[b.length]; int n = b64is.read(tmp); byte[] res = new byte[n]; System.arraycopy(tmp, 0, res, 0, n); return res; } public byte[] encodeBase64(byte[] b) throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); OutputStream b64os = MimeUtility.encode(baos, "base64"); b64os.write(b); b64os.close(); return baos.toByteArray(); } public List<Object> setToList(Set src) { List<Object> tag = new ArrayList<Object>(); for (Object item : src) { tag.add(item); } return tag; } public Locale newLocale(String language) { return new Locale(language); } public Locale newLocale(String language, String country) { return new Locale(language, country); } public Locale newLocale(String language, String country, String variant) { return new Locale(language, country, variant); } public TimeZone newTimeZone(int rawOffset, String ID) { return new SimpleTimeZone(rawOffset, ID); } public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int endMonth, int endDay, int endDayOfWeek, int endTime) { return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, endMonth, endDay, endDayOfWeek, endTime); } public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int endMonth, int endDay, int endDayOfWeek, int endTime, int dstSavings) { return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, endMonth, endDay, endDayOfWeek, endTime, dstSavings); } public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int startTimeMode, int endMonth, int endDay, int endDayOfWeek, int endTime, int endTimeMode, int dstSavings) { return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, startTimeMode, endMonth, endDay, endDayOfWeek, endTime, endTimeMode, dstSavings); } public Calendar newCalendar() { return Calendar.getInstance(); } public Calendar newCalendar(Locale aLocale) { return Calendar.getInstance(aLocale); } public Calendar newCalendar(TimeZone zone) { return Calendar.getInstance(zone); } public Calendar newCalendar(TimeZone zone, Locale aLocale) { return Calendar.getInstance(zone, aLocale); } public Machine(Handler handler) { this.handler = handler; } public void debug(String message) { if (handler != null) { handler.debug(message); } } public void error(String message) { if (handler != null) { handler.error(message); } } public void fatal(String message) { if (handler != null) { handler.fatal(message); } } public void info(String message) { if (handler != null) { handler.info(message); } } public static class Handler { public void debug(String message) { } public void error(String message) { } public void fatal(String message) { } public void info(String message) { } } }
1 | package com.paesia.schema.script.safe.jsoup; |
2 | |
3 | import java.net.URL; |
4 | |
5 | import org.jsoup.Jsoup; |
6 | import org.jsoup.nodes.Document; |
7 | import org.jsoup.safety.Whitelist; |
8 | |
9 | public class SJsoup { |
10 | |
11 | public static Document parse(String html, String baseUri) { |
12 | return Jsoup.parse(html, baseUri); |
13 | } |
14 | |
15 | public static Document parse(String html) { |
16 | return Jsoup.parse(html); |
17 | } |
18 | |
19 | public static Document parse(URL url, int timeoutMillis) throws Exception { |
20 | if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!"); |
21 | return Jsoup.parse(url, timeoutMillis); |
22 | } |
23 | |
24 | public static Document parseBodyFragment(String bodyHtml) { |
25 | return Jsoup.parseBodyFragment(bodyHtml); |
26 | } |
27 | |
28 | public static Document parseBodyFragment(String bodyHtml, String baseUri) { |
29 | return Jsoup.parseBodyFragment(bodyHtml, baseUri); |
30 | } |
31 | |
32 | public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) { |
33 | return Jsoup.clean(bodyHtml, baseUri, whitelist); |
34 | } |
35 | |
36 | public static String clean(String bodyHtml, Whitelist whitelist) { |
37 | return Jsoup.clean(bodyHtml, whitelist); |
38 | } |
39 | |
40 | public static boolean isValid(String bodyHtml, Whitelist whitelist) { |
41 | return Jsoup.isValid(bodyHtml, whitelist); |
42 | } |
43 | |
44 | public static SConnection connect(String url) throws Exception { |
45 | return connect(new URL(url)); |
46 | } |
47 | |
48 | public static SConnection connect(URL url) throws Exception { |
49 | if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!"); |
50 | return new SConnection(Jsoup.connect(url.toString())); |
51 | } |
52 | |
53 | } |
package com.paesia.schema.script.safe.jsoup; import java.net.URL; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.safety.Whitelist; public class SJsoup { public static Document parse(String html, String baseUri) { return Jsoup.parse(html, baseUri); } public static Document parse(String html) { return Jsoup.parse(html); } public static Document parse(URL url, int timeoutMillis) throws Exception { if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!"); return Jsoup.parse(url, timeoutMillis); } public static Document parseBodyFragment(String bodyHtml) { return Jsoup.parseBodyFragment(bodyHtml); } public static Document parseBodyFragment(String bodyHtml, String baseUri) { return Jsoup.parseBodyFragment(bodyHtml, baseUri); } public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) { return Jsoup.clean(bodyHtml, baseUri, whitelist); } public static String clean(String bodyHtml, Whitelist whitelist) { return Jsoup.clean(bodyHtml, whitelist); } public static boolean isValid(String bodyHtml, Whitelist whitelist) { return Jsoup.isValid(bodyHtml, whitelist); } public static SConnection connect(String url) throws Exception { return connect(new URL(url)); } public static SConnection connect(URL url) throws Exception { if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!"); return new SConnection(Jsoup.connect(url.toString())); } }
1 | package com.paesia.schema.script.safe.jsoup; |
2 | |
3 | import java.net.URL; |
4 | import java.util.HashMap; |
5 | import java.util.Map; |
6 | |
7 | import org.jsoup.Connection; |
8 | import org.jsoup.Connection.Method; |
9 | import org.jsoup.Connection.Response; |
10 | import org.jsoup.nodes.Document; |
11 | |
12 | public class SConnection { |
13 | |
14 | private Connection data; |
15 | |
16 | public SConnection(Connection data) { |
17 | this.data = data; |
18 | } |
19 | |
20 | public SConnection method(String src) { |
21 | if ("get".equalsIgnoreCase(src)) { |
22 | this.data.method(Method.GET); |
23 | } |
24 | if ("post".equalsIgnoreCase(src)) { |
25 | this.data.method(Method.POST); |
26 | } |
27 | return this; |
28 | } |
29 | |
30 | public Response execute() throws Exception { |
31 | return this.data.execute(); |
32 | } |
33 | |
34 | public SConnection cookie(String name, String value) { |
35 | this.data.cookie(name, value); |
36 | return this; |
37 | } |
38 | |
39 | public SConnection cookies(Map cookies) { |
40 | for (Object key : cookies.keySet()) { |
41 | cookie(key + "", cookies.get(key) + ""); |
42 | } |
43 | return this; |
44 | } |
45 | |
46 | public SConnection data(Map src) { |
47 | Map<String, String> tag = new HashMap<String, String>(); |
48 | for (Object key : src.keySet()) { |
49 | tag.put(key + "", src.get(key) + ""); |
50 | } |
51 | this.data.data(tag); |
52 | return this; |
53 | } |
54 | |
55 | public SConnection data(String... keyvals) { |
56 | this.data.data(keyvals); |
57 | return this; |
58 | } |
59 | |
60 | public Document get() throws Exception { |
61 | return this.data.get(); |
62 | } |
63 | |
64 | public SConnection header(String name, String value) { |
65 | this.data.header(name, value); |
66 | return this; |
67 | } |
68 | |
69 | public Document post() throws Exception { |
70 | return this.data.post(); |
71 | } |
72 | |
73 | public Map getCookies() { |
74 | return this.data.response().cookies(); |
75 | } |
76 | |
77 | public SConnection referrer(String referrer) { |
78 | this.data.referrer(referrer); |
79 | return this; |
80 | } |
81 | |
82 | public SConnection timeout(int millis) { |
83 | this.data.timeout(millis); |
84 | return this; |
85 | } |
86 | |
87 | public SConnection url(URL url) throws Exception { |
88 | if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!"); |
89 | this.data.url(url); |
90 | return this; |
91 | } |
92 | |
93 | public SConnection url(String url) throws Exception { |
94 | return url(new URL(url)); |
95 | } |
96 | |
97 | public SConnection userAgent(String userAgent) { |
98 | this.data.userAgent(userAgent); |
99 | return this; |
100 | } |
101 | |
102 | } |
package com.paesia.schema.script.safe.jsoup; import java.net.URL; import java.util.HashMap; import java.util.Map; import org.jsoup.Connection; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; public class SConnection { private Connection data; public SConnection(Connection data) { this.data = data; } public SConnection method(String src) { if ("get".equalsIgnoreCase(src)) { this.data.method(Method.GET); } if ("post".equalsIgnoreCase(src)) { this.data.method(Method.POST); } return this; } public Response execute() throws Exception { return this.data.execute(); } public SConnection cookie(String name, String value) { this.data.cookie(name, value); return this; } public SConnection cookies(Map cookies) { for (Object key : cookies.keySet()) { cookie(key + "", cookies.get(key) + ""); } return this; } public SConnection data(Map src) { Map<String, String> tag = new HashMap<String, String>(); for (Object key : src.keySet()) { tag.put(key + "", src.get(key) + ""); } this.data.data(tag); return this; } public SConnection data(String... keyvals) { this.data.data(keyvals); return this; } public Document get() throws Exception { return this.data.get(); } public SConnection header(String name, String value) { this.data.header(name, value); return this; } public Document post() throws Exception { return this.data.post(); } public Map getCookies() { return this.data.response().cookies(); } public SConnection referrer(String referrer) { this.data.referrer(referrer); return this; } public SConnection timeout(int millis) { this.data.timeout(millis); return this; } public SConnection url(URL url) throws Exception { if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!"); this.data.url(url); return this; } public SConnection url(String url) throws Exception { return url(new URL(url)); } public SConnection userAgent(String userAgent) { this.data.userAgent(userAgent); return this; } }
1 | public static class DataHandler extends Machine.Handler { |
2 | |
3 | public void debug(String message) { |
4 | logger.debug(message); |
5 | } |
6 | |
7 | public void error(String message) { |
8 | logger.error(message); |
9 | } |
10 | |
11 | public void fatal(String message) { |
12 | logger.fatal(message); |
13 | } |
14 | |
15 | public void info(String message) { |
16 | logger.info(message); |
17 | } |
18 | |
19 | } |
public static class DataHandler extends Machine.Handler { public void debug(String message) { logger.debug(message); } public void error(String message) { logger.error(message); } public void fatal(String message) { logger.fatal(message); } public void info(String message) { logger.info(message); } }
1 | function main(env, args) { |
2 | var links = args.get('links'); |
3 | try { |
4 | var url = env.newURL('http://yahoo.com'); |
5 | var doc = env.newJsoup().parse(url, 60000); |
6 | var elements = doc.select('a'); |
7 | for (var i = 0; i < elements.size(); i++) { |
8 | var element = elements.get(i); |
9 | var item = env.newHashMap(); |
10 | item.put('title', element.text()); |
11 | var href = env.newURL(url, element.attr('href')); |
12 | item.put('link', href + ''); |
13 | links.add(item); |
14 | } |
15 | } catch (e) { |
16 | env.error(e); |
17 | } |
18 | } |
function main(env, args) { var links = args.get('links'); try { var url = env.newURL('http://yahoo.com'); var doc = env.newJsoup().parse(url, 60000); var elements = doc.select('a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var item = env.newHashMap(); item.put('title', element.text()); var href = env.newURL(url, element.attr('href')); item.put('link', href + ''); links.add(item); } } catch (e) { env.error(e); } }
No comments:
Post a Comment