Friday 13 April 2012

Create javascript sandbox with jsoup support

Create javascript sandbox with jsoup support
This task use java and Rhino to create javascript sandbox with jsoup support.
Create javascript sandbox with jsoup support
  1. Create com.paesia.schema.script.Machine class as following
  2. Create com.paesia.schema.script.safe.jsoup.SJsoup class as following
  3. Create com.paesia.schema.script.safe.jsoup.SConnection class as following
  4. Create DataHandler class as following
  5. Create javascript as following
  6. Call Machine.run() method as following
Call Machine.run() method
String js = loadJS();
Map args = new HashMap();
List links = new ArrayList();
args.put("links", links);

Machine env = new Machine(new DataHandler());
Machine.run(env, js, args);
      
for (int i = 0; i < links.size(); i++) {
    Map item = (Map)links.get(i);
    String line = "";
    for (Object key : item.keySet()) {
        line += "\r\n" + key + " : " + item.get(key);
    }
    logger.info("\r\n--------------------------------\r\n" + line + "\r\n");
}
    
com.paesia.schema.script.Machine class
package com.paesia.schema.script;

import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.mozilla.javascript.ClassShutter;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.Scriptable;

import com.paesia.schema.script.safe.jsoup.SJsoup;

import java.util.UUID;
import java.util.Random;
import java.util.Date;
import java.text.SimpleDateFormat;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import javax.mail.internet.MimeUtility;
import java.io.ByteArrayOutputStream;
import java.io.OutputStream;
import java.util.Set;

import java.util.Locale;
import java.util.TimeZone;
import java.util.SimpleTimeZone;
import java.util.Calendar;

public class Machine {

    private Handler handler;
 
    public static void run(Machine env, String js, Map args) throws Exception {
        try {
            Context cx = Context.enter();
            cx.setClassShutter(new ClassShutter() {
                public boolean visibleToScripts(String className) {  
                    if ("com.paesia.schema.script.Machine".equals(className)) return true;
                    if ("java.lang.String".equals(className)) return true;
                    if ("java.lang.Object".equals(className)) return true;
                    if ("java.util.HashMap".equals(className)) return true;
                    if ("java.util.ArrayList".equals(className)) return true;
                    if ("java.lang.Byte".equals(className)) return true;
                    if ("java.lang.Short".equals(className)) return true;
                    if ("java.lang.Integer".equals(className)) return true;
                    if ("java.lang.Long".equals(className)) return true;
                    if ("java.lang.Float".equals(className)) return true;
                    if ("java.lang.Double".equals(className)) return true;
                    if ("java.lang.Boolean".equals(className)) return true;
                    if ("java.lang.Character".equals(className)) return true;
                    if ("java.util.Collection".equals(className)) return true;
                    if ("java.util.List".equals(className)) return true;
                    if ("java.util.Map".equals(className)) return true;
                    if ("java.util.LinkedHashMap".equals(className)) return true;
                    if ("java.util.Iterator".equals(className)) return true;
                    if ("java.util.ListIterator".equals(className)) return true;
                    if ("java.lang.Iterable".equals(className)) return true;
                    if ("java.net.URL".equals(className)) return true;
                    if (className.startsWith("org.jsoup.nodes.")) return true;
                    if (className.startsWith("org.jsoup.select.")) return true;
                    if (className.startsWith("org.jsoup.safety.")) return true;
                    if (className.startsWith("org.jsoup.parser.")) return true;
                    if (className.startsWith("com.paesia.schema.script.safe.")) return true;
                    if ("org.jsoup.helper.HttpConnection$Response".equals(className)) return true;
                    if ("java.util.Date".equals(className)) return true;
                    if ("java.text.SimpleDateFormat".equals(className)) return true;
                    if (className.startsWith("java.util.Collections")) return true;
                    if ("java.util.LinkedHashSet".equals(className)) return true;
                    if ("java.util.Locale".equals(className)) return true;
                    if ("java.util.TimeZone".equals(className)) return true;
                    if ("java.util.SimpleTimeZone".equals(className)) return true;
                    if ("java.util.Calendar".equals(className)) return true;
                    if ("java.util.GregorianCalendar".equals(className)) return true;
                    return false;
                }
            });   
            Scriptable scope = cx.initStandardObjects();
            Object result = cx.evaluateString(scope, js, "<js>", 1, null);
            Object fObj = scope.get("main", scope);
            if (!(fObj instanceof Function)) {
                throw new Exception("main() is undefined or not a function.");
            } else {
                Object functionArgs[] = { env, args };
                Function f = (Function)fObj;
                result = f.call(cx, scope, scope, functionArgs);
            }   
        } catch (Exception e) {
            throw e;
        } finally {
            Context.exit();   
        }
    }
 
    public String newString(String src) {
        return src;
    }
 
    public String newString(byte[] src, String charset) throws Exception {
        return new String(src, charset);
    }
 
    public HashMap newHashMap() {
        return new HashMap();
    }
 
    public ArrayList newArrayList() {
        return new ArrayList();
    }
 
    public Byte newByte(byte src) {
        return (Byte)src;
    }
 
    public Short newShort(short src) {
        return (Short)src;
    }
 
    public Integer newInteger(int src) {
        return (Integer)src;
    }
 
    public Long newLong(long src) {
        return (Long)src;
    }
 
    public Float newFloat(float src) {
        return (Float)src;
    }
 
    public Double newDouble(double src) {
        return (Double)src;
    }
 
    public Boolean newBoolean(boolean src) {
        return (Boolean)src;
    }
 
    public Character newCharacter(char src) {
        return (Character)src;
    }
 
    public List getKeys(Map src) {
        List tag = new ArrayList();
        for (Object key : src.keySet()) {
            tag.add(key);
        }
        return tag;
    }
 
    public URL newURL(String protocol, String host, int port, String file) throws Exception {
        return new URL(protocol, host, port, file);
    }

    public URL newURL(String protocol, String host, String file) throws Exception {
        return new URL(protocol, host, file);
    }
 
    public URL newURL(String spec) throws Exception {
        return new URL(spec);
    }
 
    public URL newURL(URL context, String spec) throws Exception {
        return new URL(context, spec);
    }
 
    public SJsoup newJsoup() {
        return new SJsoup();
    }
 
    public String encodeURL(String src, String charset) {
        try {
            return URLEncoder.encode(src, charset);
        } catch (Exception e) {
            return "";
        }
    }

    public String decodeURL(String src, String charset) {
        try {
            return URLDecoder.decode(src, charset);
        } catch (Exception e) {
            return "";
        }
    }

    public String uniqid() {
        return UUID.randomUUID().toString().replaceAll("-", "");
    }
    
    public String suniqid() {
        Random random = new Random();
        return Long.toString(Math.abs(random.nextLong()), 36);
    }
    
    public Date newDate() {
        return new Date();
    }

    public Date newDate(long time) {
        return new Date(time);
    }
    
    public SimpleDateFormat newDateFormat(String format) {
        return new SimpleDateFormat(format);
    }
    
    public byte[] decodeBase64(byte[] b) throws Exception {
        ByteArrayInputStream bais = new ByteArrayInputStream(b);
        InputStream b64is = MimeUtility.decode(bais, "base64");
        byte[] tmp = new byte[b.length];
        int n = b64is.read(tmp);
        byte[] res = new byte[n];
        System.arraycopy(tmp, 0, res, 0, n);
        return res;
    }      

    public byte[] encodeBase64(byte[] b) throws Exception {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        OutputStream b64os = MimeUtility.encode(baos, "base64");
        b64os.write(b);
        b64os.close();
        return baos.toByteArray();
    }

    public List<Object> setToList(Set src) {
        List<Object> tag = new ArrayList<Object>();
        for (Object item : src) {
            tag.add(item);
        }
        return tag;
    }

    public Locale newLocale(String language) {
        return new Locale(language);
    }

    public Locale newLocale(String language, String country) {
        return new Locale(language, country);
    }

    public Locale newLocale(String language, String country, String variant) {
        return new Locale(language, country, variant);
    }
    
    public TimeZone newTimeZone(int rawOffset, String ID) {
        return new SimpleTimeZone(rawOffset, ID);
    }

    public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int endMonth, int endDay, int endDayOfWeek, int endTime) {
        return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, endMonth, endDay, endDayOfWeek, endTime);
    }

    public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int endMonth, int endDay, int endDayOfWeek, int endTime, int dstSavings) {
        return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, endMonth, endDay, endDayOfWeek, endTime, dstSavings);
    }

    public TimeZone newTimeZone(int rawOffset, String ID, int startMonth, int startDay, int startDayOfWeek, int startTime, int startTimeMode, int endMonth, int endDay, int endDayOfWeek, int endTime, int endTimeMode, int dstSavings) {
        return new SimpleTimeZone(rawOffset, ID, startMonth, startDay, startDayOfWeek, startTime, startTimeMode, endMonth, endDay, endDayOfWeek, endTime, endTimeMode, dstSavings);
    }
    
    public Calendar newCalendar() {
        return Calendar.getInstance();
    }

    public Calendar newCalendar(Locale aLocale) {
        return Calendar.getInstance(aLocale);
    }

    public Calendar newCalendar(TimeZone zone) {
        return Calendar.getInstance(zone);
    }

    public Calendar newCalendar(TimeZone zone, Locale aLocale) {
        return Calendar.getInstance(zone, aLocale);
    }

    public Machine(Handler handler) {
        this.handler = handler;
    }
 
    public void debug(String message) { 
        if (handler != null) {
            handler.debug(message);
        }
    }
 
    public void error(String message) { 
        if (handler != null) {
            handler.error(message);
        }
    }
 
    public void fatal(String message) { 
        if (handler != null) {
            handler.fatal(message);
        }
    }
 
    public void info(String message) { 
        if (handler != null) {
            handler.info(message);
        }
    }
 
    public static class Handler {
  
        public void debug(String message) { }
        public void error(String message) { }
        public void fatal(String message) { }
        public void info(String message) { }
  
    }
 
}
    
com.paesia.schema.script.safe.jsoup.SJsoup class
package com.paesia.schema.script.safe.jsoup;

import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Whitelist;

public class SJsoup {

    public static Document parse(String html, String baseUri) {
        return Jsoup.parse(html, baseUri);
    }

    public static Document parse(String html) {
        return Jsoup.parse(html);
    }
 
    public static Document parse(URL url, int timeoutMillis) throws Exception {
        if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!");
        return Jsoup.parse(url, timeoutMillis);
    }

    public static Document parseBodyFragment(String bodyHtml) {
        return Jsoup.parseBodyFragment(bodyHtml);
    }
 
    public static Document parseBodyFragment(String bodyHtml, String baseUri) {
        return Jsoup.parseBodyFragment(bodyHtml, baseUri);
    }
 
    public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
        return Jsoup.clean(bodyHtml, baseUri, whitelist);
    }
 
    public static String clean(String bodyHtml, Whitelist whitelist) {
        return Jsoup.clean(bodyHtml, whitelist);
    }
 
    public static boolean isValid(String bodyHtml, Whitelist whitelist) {
        return Jsoup.isValid(bodyHtml, whitelist);
    }
 
    public static SConnection connect(String url) throws Exception {
        return connect(new URL(url));
    }

    public static SConnection connect(URL url) throws Exception {
        if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!");
        return new SConnection(Jsoup.connect(url.toString()));
    }
 
}
    
com.paesia.schema.script.safe.jsoup.SConnection class
package com.paesia.schema.script.safe.jsoup;

import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Connection;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;

public class SConnection {

    private Connection data;
 
    public SConnection(Connection data) {
        this.data = data;
    }
 
    public SConnection method(String src) {
        if ("get".equalsIgnoreCase(src)) {
            this.data.method(Method.GET);
        }
        if ("post".equalsIgnoreCase(src)) {
            this.data.method(Method.POST);
        }
        return this;
    }
 
    public Response execute() throws Exception {
        return this.data.execute();
    }

    public SConnection cookie(String name, String value) {
        this.data.cookie(name, value);
        return this;
    }
 
    public SConnection cookies(Map cookies) {
        for (Object key : cookies.keySet()) {
            cookie(key + "", cookies.get(key) + "");
        }
        return this;
    }
 
    public SConnection data(Map src) {
        Map<String, String> tag = new HashMap<String, String>();
        for (Object key : src.keySet()) {
            tag.put(key + "", src.get(key) + "");
        }
        this.data.data(tag);
        return this;
    }
 
    public SConnection data(String... keyvals) {
        this.data.data(keyvals);
        return this;
    }

    public Document get() throws Exception {
        return this.data.get();
    }
 
    public SConnection header(String name, String value) {
        this.data.header(name, value);
        return this;
    }

    public Document post() throws Exception {
        return this.data.post();
    }
 
    public Map getCookies() {
        return this.data.response().cookies();
    }
 
    public SConnection referrer(String referrer) {
        this.data.referrer(referrer);
        return this;
    }
 
    public SConnection timeout(int millis) {
        this.data.timeout(millis);
        return this;
    }
 
    public SConnection url(URL url) throws Exception {
        if (!"http".equals(url.getProtocol()) && !"https".equals(url.getProtocol())) throw new Exception("Protocol is not supported!");
        this.data.url(url);
        return this;
    }

    public SConnection url(String url) throws Exception {
        return url(new URL(url));
    }
 
    public SConnection userAgent(String userAgent) {
        this.data.userAgent(userAgent);
        return this;
    }
 
}
    
DataHandler class
public static class DataHandler extends Machine.Handler {
     
    public void debug(String message) { 
        logger.debug(message);
    }
  
    public void error(String message) { 
        logger.error(message);
    }
  
    public void fatal(String message) { 
        logger.fatal(message);
    }
  
    public void info(String message) { 
        logger.info(message);
    }
     
}
    
javascript
function main(env, args) {
    var links = args.get('links');
    try {
        var url = env.newURL('http://yahoo.com');
        var doc = env.newJsoup().parse(url, 60000);
        var elements = doc.select('a');
        for (var i = 0; i < elements.size(); i++) {
            var element = elements.get(i);
            var item = env.newHashMap();
            item.put('title', element.text());
            var href = env.newURL(url, element.attr('href'));
            item.put('link', href + '');
            links.add(item);
        }
    } catch (e) {
        env.error(e);
    }
}
    

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment