Add Lucene support to javascript sandbox
This task add Lucene support to javascript sandbox.
Add Lucene support to javascript sandbox
- Create javascript sandbox with jsoup support
- Create com.paesia.schema.script.safe.lucene.SEntity class as following
- Create com.paesia.schema.script.LuceneHandler class as following
- Modify com.paesia.schema.script.Machine class as following
- Modify DataHandler class as following
- Create javascript as following
- Call Machine.run() method as following
Call Machine.run() method     
String dirIndex = "";
String dirBackup = "";
double systemQuota = 1024 * 1024;
String js = loadJS();
Map args = new HashMap();
List links = new ArrayList();
args.put("links", links);
Machine env = new Machine(new DataHandler(dirIndex, dirBackup, systemQuota));
Machine.run(env, js, args);
         
for (int i = 0; i < links.size(); i++) {
    Map item = (Map)links.get(i);
    String line = "";
    for (Object key : item.keySet()) {
        line += "\r\n" + key + " : " + item.get(key);
    }
    logger.info("\r\n" + (i + 1) + " --------------------------------\r\n" + line + "\r\n");
}   
    Modify com.paesia.schema.script.Machine class     ............
import com.paesia.schema.script.safe.lucene.SEntity;
public class Machine {
    private Handler handler;
 
    public static void run(Machine env, String js, Map args) throws Exception {
        try {
            Context cx = Context.enter();
            cx.setClassShutter(new ClassShutter() {
                public boolean visibleToScripts(String className) {  
...........
                    if ("org.apache.lucene.search.Query".equals(className)) return true;
                    if ("org.apache.lucene.search.Filter".equals(className)) return true;
                    if ("org.apache.lucene.search.Sort".equals(className)) return true;
                    if ("org.apache.lucene.search.BooleanQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.BooleanClause".equals(className)) return true;
                    if (className.startsWith("org.apache.lucene.search.BooleanClause$")) return true;
                    if ("org.apache.lucene.search.PhraseQuery".equals(className)) return true;
                    if ("org.apache.lucene.index.Term".equals(className)) return true;
                    if ("org.apache.lucene.search.MultiPhraseQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.NGramPhraseQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.NumericRangeQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.PrefixQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.TermQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.TermRangeQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.WildcardQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.MatchAllDocsQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.FieldValueFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.NumericRangeFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.PrefixFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.QueryWrapperFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.TermRangeFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.SortField".equals(className)) return true;
...........
                    return false;
                }
            });   
...........
        } catch (Exception e) {
            throw e;
        } finally {
            Context.exit();   
        }
    }
...........
    public SEntity newEntity() {
        SEntity.Handler seh = null;
        if (handler != null) {
            seh = handler.getEntityHandler();
        }
        return new SEntity(seh);
    }
    public static class Handler {
...........
        public SEntity.Handler getEntityHandler() { return null; }
  
    }
...........
}
    com.paesia.schema.script.safe.lucene.SEntity class     package com.paesia.schema.script.safe.lucene;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldValueFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.NGramPhraseQuery;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixFilter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;
public class SEntity {
    public static final String STRING = "s";
    public static final String DOUBLE = "d";
    public static final String FLOAT = "f";
    public static final String INTEGER = "i";
    public static final String LONG = "l";
    public static final String ANALYZED = "a";
 
    public static final String ALL_KINDS = "|s|d|f|i|l|a|";
 
    public static final String SCHEMA = "F4f8cc93237f50";
    public static final String ID = "F4f8cce61643dd";
    public static final String CREATED = "F4f8cd83fcca31";
    public static final String UPDATED = "F4f8cd84e2b74a";
    public static final String KIND = "F4f8cd9c8ee13d";
    public static final String MARK = "F4f8cda27d62fb";
    protected Properties data = new Properties();
    protected Properties schema = new Properties();
    protected Handler handler = null;
 
    public SEntity(Handler handler) {
        this.handler = handler;
        registerDefault();
    }
 
    public void register(String field, String type) {
        if (ALL_KINDS.indexOf("|" + type + "|") < 0) return;
        schema.put(field, type);
        saveSchema();
    }
 
    public void setSchema(String src) {
        String[] fields = src.split("\\|");
        schema.clear();
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            register(fields[i + 1], fields[i]);
        }
        registerDefault();
        saveSchema();
    }
 
    public String getSchema() {
        String tag = data.getProperty(SCHEMA);
        if (tag == null) tag = "";
        return tag;
    }
 
    public void fromString(String src) {
        data.clear();
        schema.clear();
        try {
            ByteArrayInputStream bais = new ByteArrayInputStream(src.getBytes("UTF-8"));
            data.load(bais);
            bais.close();
        } catch (Exception e) {
        }
        loadSchema();
    }
 
    public String toString() {
        String tag = "";
        try {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            data.store(baos, "");
            tag = baos.toString();
            baos.close();
        } catch (Exception e) {
        }
        return tag;
    }
 
    public String getString(String field) {
        String tag = data.getProperty(field);
        if (tag == null) tag = "";
        return tag;
    }
 
    public void setString(String field, String value) {
        if (schema.containsKey(field)) {
            if (value == null) value = "";
            data.setProperty(field, value);
        }
    }
 
    public double getDouble(String field) {
        double tag = 0;
        try {
            tag = Double.parseDouble(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setDouble(String field, double value) {
        setString(field, Double.toString(value));
    }
    public float getFloat(String field) {
        float tag = 0;
        try {
            tag = Float.parseFloat(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setFloat(String field, float value) {
        setString(field, Float.toString(value));
    }
    public long getLong(String field) {
        long tag = 0;
        try {
            tag = Long.parseLong(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setLong(String field, long value) {
        setString(field, Long.toString(value));
    }
    public int getInteger(String field) {
        int tag = 0;
        try {
            tag = Integer.parseInt(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setInteger(String field, int value) {
        setString(field, Integer.toString(value));
    }
 
    public String getId() {
        return getString(ID);
    }
 
    public void setId(String src) {
        setString(ID, src);
    }
    public String getKind() {
        return getString(KIND);
    }
 
    public void setKind(String src) {
        setString(KIND, src);
    }
 
    public String getMark() {
        return getString(MARK);
    }
 
    public void setMark(String src) {
        setString(MARK, src);
    }
 
    public Date getCreated() {
        return new Date(getLong(CREATED));
    }
 
    public Date getUpdated() {
        return new Date(getLong(UPDATED));
    }
 
    public boolean exists() {
        if (handler == null) {
            return false;
        } else {
            return handler.exists(getId());
        }
    }
 
    public void save() {
        if (handler != null) {
            long now = new Date().getTime();
            if (handler.exists(getId())) {
                setLong(UPDATED, now);
                handler.update(this);
            } else {
                setLong(CREATED, now);
                setLong(UPDATED, now);
                handler.create(this);
            }
        }
    }
    public int count(String kind, Query query, int max) {
        if (handler != null) {
            return handler.count(kind, query, max);
        }
        return 0; 
    }
 
    public int count(String kind, Query query, Sort sort, int max) {
        if (handler != null) {
            return handler.count(kind, query, sort, max);
        }
        return 0; 
    }
 
    public int count(String kind, Query query, Filter filter, int max) {
        if (handler != null) {
            return handler.count(kind, query, filter, max);
        }
        return 0; 
    }
 
    public int count(String kind, Query query, Filter filter, Sort sort, int max) {
        if (handler != null) {
            return handler.count(kind, query, filter, sort, max);
        }
        return 0; 
    }
 
    public List<SEntity> search(String kind, Query query, int max) {
        if (handler != null) {
            return handler.search(kind, query, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Sort sort, int max) {
        if (handler != null) {
            return handler.search(kind, query, sort, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int max) {
        if (handler != null) {
            return handler.search(kind, query, filter, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
        if (handler != null) {
            return handler.search(kind, query, filter, sort, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, sort, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, filter, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, filter, sort, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public void load(String id) {
        if (handler != null) {
            handler.load(id, this);
        }
    }
 
    public BooleanQuery newBooleanQuery() {
        return new BooleanQuery();
    }
 
    public BooleanClause newBooleanClause(Query query, Occur occur) {
        return new BooleanClause(query, occur);
    }
 
    public Occur occurMust() {
        return Occur.MUST;
    }
 
    public Occur occurMustNot() {
        return Occur.MUST_NOT;
    }
 
    public Occur occurShould() {
        return Occur.SHOULD;
    }
    public MatchAllDocsQuery newMatchAllDocsQuery() {
        return new MatchAllDocsQuery();
    }
 
    public MultiPhraseQuery newMultiPhraseQuery() {
        return new MultiPhraseQuery();
    }
 
    public PhraseQuery newPhraseQuery() {
        return new PhraseQuery();
    }
 
    public NGramPhraseQuery newNGramPhraseQuery(int n) {
        return new NGramPhraseQuery(n);
    }
 
    public Term newTerm(String field, String value) {
        return new Term(field, value);
    }
 
    public NumericRangeQuery<Double> newDoubleRangeQuery(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeQuery<Double> newDoubleRangeQuery(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeQuery<Float> newFloatRangeQuery(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeQuery<Float> newFloatRangeQuery(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeQuery<Long> newLongRangeQuery(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeQuery<Long> newLongRangeQuery(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public PrefixQuery newPrefixQuery(Term term) {
        return new PrefixQuery(term);
    }
 
    public TermQuery newTermQuery(Term term) {
        return new TermQuery(term);
    }
 
    public TermRangeQuery newTermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
        return new TermRangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper); 
    }
 
    public WildcardQuery newWildcardQuery(Term term) {
        return new WildcardQuery(term);
    }
 
    public FieldValueFilter newFieldValueFilter(String field, boolean negate) {
        return new FieldValueFilter(field, negate);
    }
 
    public NumericRangeFilter<Double> newDoubleRangeFilter(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newDoubleRange(field, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeFilter<Double> newDoubleRangeFilter(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeFilter<Float> newFloatRangeFilter(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newFloatRange(field, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeFilter<Float> newFloatRangeFilter(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newIntRange(field, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeFilter<Long> newLongRangeFilter(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newLongRange(field, min, max, minInclusive, maxInclusive);
    }
    public NumericRangeFilter<Long> newLongRangeFilter(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public PrefixFilter newPrefixFilter(Term term) {
        return new PrefixFilter(term);
    }
 
    public QueryWrapperFilter newQueryWrapperFilter(Query query) {
        return new QueryWrapperFilter(query);
    }
 
    public TermRangeFilter newTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
        return new TermRangeFilter(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
    }
 
    public SortField newSortField(String field, int type, boolean reverse) {
        return new SortField(field, type, reverse);
    }
 
    public Sort newSort() {
        return new Sort();
    }
    public Sort newSort(SortField... fields) {
        return new Sort(fields);
    }
    public Sort newSort(SortField field) {
        return new Sort(field);
    }
 
    public Query parseQuery(String[] queries, String[] fields) throws Exception {
        return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, new StandardAnalyzer(Version.LUCENE_36));
    }
 
    public Query parseQuery(String[] queries, String[] fields, BooleanClause.Occur[] flags) throws Exception {
        return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
    }
 
    public Query parseQuery(String query, String[] fields, BooleanClause.Occur[] flags) throws Exception {
        return MultiFieldQueryParser.parse(Version.LUCENE_36, query, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
    }
 
    public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception {
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(field, new StringReader(text)));
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
        Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
        Highlighter highlighter = new Highlighter(formatter, scorer);
        highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
        tokenStream.reset();
        String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
        return rv.length() == 0 ? text : rv;
    }
 
    protected void registerDefault() {
        register(SCHEMA, "s");
        register(ID, "s");
        register(CREATED, "l");
        register(UPDATED, "l");
        register(KIND, "s");
        register(MARK, "s");
    }
 
    protected void saveSchema() {
        String tag = "";
        for (Object key : schema.keySet()) {
            if (tag.length() > 0) tag += "|";
            tag += schema.get(key) + "|" + key;
        }
        data.put(SCHEMA, tag);
    }
    protected void loadSchema() {
        String src = data.getProperty(SCHEMA);
        if (src == null) src = "";
        String[] fields = src.split("\\|");
        schema.clear();
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            register(fields[i + 1], fields[i]);
        }
        registerDefault();
        String tag = "";
        for (Object key : schema.keySet()) {
            if (tag.length() > 0) tag += "|";
            tag += schema.get(key) + "|" + key;
        }
        data.put(SCHEMA, tag);
    }
 
    public void delete() {
        delete(getId());
    }
 
    public void delete(String id) {
        if (handler != null) {
            handler.delete(id);
        }
    }
    public SortField sortFieldDoc() {
        return SortField.FIELD_DOC;
    }
    
    public SortField sortFieldScore() {
        return SortField.FIELD_SCORE;
    }
    
    public int sortFieldLong() {
        return SortField.LONG;
    }
    
    public int sortFieldInteger() {
        return SortField.INT;
    }
    
    public int sortFieldDouble() {
        return SortField.DOUBLE;
    }
    
    public int sortFieldFloat() {
        return SortField.FLOAT;
    }
    
    public int sortFieldString() {
        return SortField.STRING_VAL;
    }
    public double storageQuota() {
        if (handler != null) {
            return handler.storageQuota();
        }
        return 0; 
    }
    public double storageSize() { 
        if (handler != null) {
            return handler.storageSize();
        }
        return 0; 
    }
    public static class Handler {
  
        public boolean exists(String id) { return false; }
        public void create(SEntity src) { }
        public void update(SEntity src) { }
        public void load(String id, SEntity src) { }
        public void delete(String id) { }
        public List<SEntity> search(String kind, Query query, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Sort sort, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public int count(String kind, Query query, int max) { return 0; }
        public int count(String kind, Query query, Sort sort, int max) { return 0; }
        public int count(String kind, Query query, Filter filter, int max) { return 0; }
        public int count(String kind, Query query, Filter filter, Sort sort, int max) { return 0; }
        public double storageQuota() { return 0; }
        public double storageSize() { return 0; }
  
    }
 
}
    com.paesia.schema.script.LuceneHandler class     package com.paesia.schema.script;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.paesia.schema.script.safe.lucene.SEntity;
public class LuceneHandler extends SEntity.Handler {
    public static final String KIND_QUOTA = "C4f91ee1eb414a";
    public static final String QUOTA_SYSTEM = "F4f91ee659b1ec";
 
    protected String dirIndex = "";
    protected String dirBackup = "";
    protected double systemQuota = 0;
    public LuceneHandler(String dirIndex, String dirBackup, double systemQuota) {
        this.dirIndex = dirIndex;
        this.dirBackup = dirBackup;
        this.systemQuota = systemQuota;
    }
 
    public boolean exists(String id) {
        boolean tag = false;
        if (id.length() == 0) return tag;
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
            if (td.totalHits > 0) {
                tag = true;
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
  
        return tag; 
    }
 
    public void create(SEntity src) {
        Monitor monitor = new Monitor();
        Timer timer = new Timer();
        timer.schedule(new CreateTask(timer, src, monitor), 1);
        while (!monitor.finished) {
            try {
                Thread.sleep(10);
            } catch (Exception e) {
            }
        }
        timer = null;
    }
    protected boolean quotaCreate(SEntity src) {
        boolean tag = false;
        SEntity quota = findSystemQuota();
        if (quota == null) {
            quota = newSystemQuota();
        }
        double newSize = quota.getDouble("size") + ((double)src.toString().length() / 1048576.0);
        if (newSize < 0) newSize = 0;
        if (newSize < systemQuota) {
            tag = true;
            quota.setDouble("size", newSize);
            quota.save();
        }
        return tag;
    }
    protected boolean quotaUpdate(SEntity src) {
        boolean tag = false;
        SEntity quota = findSystemQuota();
        if (quota == null) {
            quota = newSystemQuota();
        }
        double newSize = quota.getDouble("size") - ((double)getFileSize(src.getId(), src.getKind()) / 1048576.0) + ((double)src.toString().length() / 1048576.0);
        if (newSize < 0) newSize = 0;
        if (newSize < systemQuota) {
            tag = true;
            quota.setDouble("size", newSize);
            quota.save();
        }
        return tag;
    }
    protected boolean quotaDelete(String id, String kind) {
        boolean tag = false;
        SEntity quota = findSystemQuota();
        if (quota == null) {
            quota = newSystemQuota();
        }
        double newSize = quota.getDouble("size") - ((double)getFileSize(id, kind) / 1048576.0);
        if (newSize < 0) newSize = 0;
        if (newSize < systemQuota) {
            tag = true;
            quota.setDouble("size", newSize);
            quota.save();
        }
        return tag;
    }
 
    protected long getFileSize(String id, String kind) {
        long tag = 0;
        String fid = "";
        for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
            if (fid.length() > 0) fid += File.separator;
            fid += id.substring(i, i + 2);
        }
        File file = new File(dirBackup, kind);
        file = new File(file.getAbsolutePath(), fid);
        String folder = file.getAbsolutePath();
        file = new File(folder, id + ".txt");
        if (file.exists()) {
            tag = file.length();
        }
        return tag;
    }
 
    protected SEntity newSystemQuota() {
        SEntity tag = new SEntity(this);
        tag.setSchema("s|kind|d|size");
        tag.setKind(KIND_QUOTA);
        tag.setId(UUID.randomUUID().toString().replaceAll("-", ""));
        tag.setString("kind", QUOTA_SYSTEM);
        return tag;
    }
 
    protected SEntity findSystemQuota() {
        List<SEntity> results = search(KIND_QUOTA, new TermQuery(new Term("kind", QUOTA_SYSTEM)), 1);
        if (results.size() == 0) return null;
        return results.get(0);
    }
    protected void createEntity(SEntity src) { 
        if (src.getId().length() == 0) return;
        if (src.getKind().length() == 0) return;
        try {
            if (!src.getKind().equals(KIND_QUOTA)) {
                if (!quotaCreate(src)) return;
            }
            backup(src);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
            Document doc = new Document();
            write(src, doc);
            writer.addDocument(doc);
            writer.close();
        } catch (Exception e) {
        }
    }
 
    public void update(SEntity src) {
        Monitor monitor = new Monitor();
        Timer timer = new Timer();
        timer.schedule(new UpdateTask(timer, src, monitor), 1);
        while (!monitor.finished) {
            try {
                Thread.sleep(10);
            } catch (Exception e) {
            }
        }
        timer = null;
    }
    protected void updateEntity(SEntity src) { 
        if (src.getId().length() == 0) return;
        if (src.getKind().length() == 0) return;
        try {
            if (!src.getKind().equals(KIND_QUOTA)) {
                if (!quotaUpdate(src)) return;
            }
            backup(src);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
            Document doc = new Document();
            write(src, doc);
            writer.updateDocument(new Term(SEntity.ID, src.getId()), doc);
            writer.close();
        } catch (Exception e) {
        }
    }
 
    public void load(String id, SEntity src) {
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
            if (td.totalHits > 0) {
                Document doc = searcher.doc(td.scoreDocs[0].doc);
                if (allowLoad(id, doc.get(SEntity.KIND))) {
                    src.setSchema(doc.get(SEntity.SCHEMA));
                    read(src, doc);
                }
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
    }
 
    protected boolean allowLoad(String id, String kind) {
        return true;
    }
 
    public int count(String kind, Query query, Filter filter, Sort sort, int max) {
        int tag = 0;
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            BooleanQuery boolQuery = new BooleanQuery();
            boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
            if (query != null) {
                boolQuery.add(new BooleanClause(query, Occur.MUST));
            }
            TopDocs td = null;
            if (filter != null && sort != null) {
                td = searcher.search(boolQuery, filter, max, sort);
            } else if (filter != null) {
                td = searcher.search(boolQuery, filter, max);
            } else if (sort != null) {
                td = searcher.search(boolQuery, max, sort);
            } else {
                td = searcher.search(boolQuery, max);
            }
            tag = td.totalHits;
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        return tag;
    }
    public int count(String kind, Query query, int max) {
        return count(kind, query, null, null, max);
    }
    public int count(String kind, Query query, Sort sort, int max) {
        return count(kind, query, null, sort, max);
    }
 
    public int count(String kind, Query query, Filter filter, int max) {
        return count(kind, query, filter, null, max);
    }
 
    public List<SEntity> search(String kind, Query query, int max) {
        return search(kind, query, null, null, max);
    }
    public List<SEntity> search(String kind, Query query, Sort sort, int max) {
        return search(kind, query, null, sort, max);
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int max) {
        return search(kind, query, filter, null, max);
    }
 
    public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { 
        return search(kind, query, null, null, pagesize, pageno);
    }
 
    public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { 
        return search(kind, query, null, sort, pagesize, pageno);
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
        return search(kind, query, filter, null, pagesize, pageno);
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
        List<SEntity> tag = new ArrayList<SEntity>();
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            BooleanQuery boolQuery = new BooleanQuery();
            boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
            if (query != null) {
                boolQuery.add(new BooleanClause(query, Occur.MUST));
            }
            TopDocs td = null;
            if (filter != null && sort != null) {
                td = searcher.search(boolQuery, filter, max, sort);
            } else if (filter != null) {
                td = searcher.search(boolQuery, filter, max);
            } else if (sort != null) {
                td = searcher.search(boolQuery, max, sort);
            } else {
                td = searcher.search(boolQuery, max);
            }
            for (int i = 0; i < td.totalHits; i++) {
                SEntity item = new SEntity(this);
                Document doc = searcher.doc(td.scoreDocs[i].doc);
                item.setSchema(doc.get(SEntity.SCHEMA));
                read(item, doc);
                tag.add(item);
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        return tag;
    }
    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) {
        List<SEntity> tag = new ArrayList<SEntity>();
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            BooleanQuery boolQuery = new BooleanQuery();
            boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
            if (query != null) {
                boolQuery.add(new BooleanClause(query, Occur.MUST));
            }
            if (pagesize <= 0) pagesize = 10;
            if (pageno <= 0) pageno = 1;
            int max = pageno * pagesize;
            TopDocs td = null;
            if (filter != null && sort != null) {
                td = searcher.search(boolQuery, filter, max, sort);
            } else if (filter != null) {
                td = searcher.search(boolQuery, filter, max);
            } else if (sort != null) {
                td = searcher.search(boolQuery, max, sort);
            } else {
                td = searcher.search(boolQuery, max);
            }
            for (int i = (pageno - 1) * pagesize; i < td.totalHits && i < max; i++) {
                SEntity item = new SEntity(this);
                Document doc = searcher.doc(td.scoreDocs[i].doc);
                item.setSchema(doc.get(SEntity.SCHEMA));
                read(item, doc);
                tag.add(item);
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        return tag;
    }
 
    protected void backup(SEntity src) {
        String id = src.getId();
        if (id.length() == 0) return;
        String kind = src.getKind();
        if (kind.length() == 0) return;
        String fid = "";
        for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
            if (fid.length() > 0) fid += File.separator;
            fid += id.substring(i, i + 2);
        }
        try {
            File file = new File(dirBackup, kind);
            file = new File(file.getAbsolutePath(), fid);
            file.mkdirs();
            String folder = file.getAbsolutePath();
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(folder, id + ".txt"))));
            writer.write(src.toString());
            writer.close();
        } catch (Exception e) {
        }
    }
 
    protected void read(SEntity entity, Document doc) {
        String schema = doc.get(SEntity.SCHEMA);
        if (schema == null) schema = "";
        String[] fields = schema.split("\\|");
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            String kind = fields[i];
            String fname = fields[i + 1];
            String val = doc.get(fname);
            if (val == null) val = "";
            if (SEntity.ALL_KINDS.indexOf("|" + kind + "|") < 0) continue;
            entity.setString(fname, val);
        }
    }
 
    protected void write(SEntity entity, Document doc) {
        String schema = entity.getSchema();
        if (schema == null) schema = "";
        String[] fields = schema.split("\\|");
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            String kind = fields[i];
            String fname = fields[i + 1];
            if (SEntity.STRING.equalsIgnoreCase(kind)) {
                Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS);
                doc.add(field);
            } else if (SEntity.DOUBLE.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setDoubleValue(entity.getDouble(fname));
                doc.add(field);
            } else if (SEntity.FLOAT.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setFloatValue(entity.getFloat(fname));
                doc.add(field);
            } else if (SEntity.INTEGER.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setIntValue(entity.getInteger(fname));
                doc.add(field);
            } else if (SEntity.LONG.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setLongValue(entity.getLong(fname));
                doc.add(field);
            } else if (SEntity.ANALYZED.equalsIgnoreCase(kind)) {
                Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED);
                doc.add(field);
            }
        }
    }
 
    public void delete(String id) {
        Monitor monitor = new Monitor();
        Timer timer = new Timer();
        timer.schedule(new DeleteTask(timer, id, monitor), 1);
        while (!monitor.finished) {
            try {
                Thread.sleep(10);
            } catch (Exception e) {
            }
        }
        timer = null;
    }
 
    protected void deleteEntity(String id) { 
        if (id.length() == 0) return;
        String kind = "";
        
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
            if (td.totalHits > 0) {
                Document doc = searcher.doc(td.scoreDocs[0].doc);
                kind = doc.get(SEntity.KIND);
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        if (kind.length() == 0) return;
        if (!allowDelete(id, kind)) return;
        
        try {
            if (!kind.equals(KIND_QUOTA)) {
                if (!quotaDelete(id, kind)) return;
            }
            removeBackup(id, kind);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
            writer.deleteDocuments(new Term(SEntity.ID, id));
            writer.close();
        } catch (Exception e) {
        }
    }
 
    protected boolean allowDelete(String id, String kind) {
        return true;
    }
 
    protected void removeBackup(String id, String kind) {
        if (id.length() == 0) return;
        if (kind.length() == 0) return;
        String fid = "";
        for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
            if (fid.length() > 0) fid += File.separator;
            fid += id.substring(i, i + 2);
        }
        try {
            File file = new File(dirBackup, kind);
            file = new File(file.getAbsolutePath(), fid);
            String folder = file.getAbsolutePath();
            file = new File(folder, id + ".txt");
            file.delete();
        } catch (Exception e) {
        }
    }
    public double storageQuota() {
        return systemQuota;
    }
 
    public double storageSize() {
        SEntity tag = findSystemQuota();
        if (tag == null) return 0;
        return tag.getDouble("size");
    }
    private class DeleteTask extends TimerTask {
        private String id;
        private Timer timer;
        private Monitor monitor;
  
        public DeleteTask(Timer timer, String id, Monitor monitor) {
            this.timer = timer;
            this.id = id;
            this.monitor = monitor;
        }
  
        @Override
        public void run() {
            deleteEntity(id);
            monitor.finished = true;
            timer.cancel();
            timer.purge();
            timer = null;
        }
  
    }
    private class CreateTask extends TimerTask {
        private SEntity entity;
        private Timer timer;
        private Monitor monitor;
  
        public CreateTask(Timer timer, SEntity entity, Monitor monitor) {
            this.timer = timer;
            this.entity = entity;
            this.monitor = monitor;
        }
  
        @Override
        public void run() {
            createEntity(entity);
            monitor.finished = true;
            timer.cancel();
            timer.purge();
            timer = null;
        }
  
    }
    private class UpdateTask extends TimerTask {
        private SEntity entity;
        private Timer timer;
        private Monitor monitor;
  
        public UpdateTask(Timer timer, SEntity entity, Monitor monitor) {
            this.timer = timer;
            this.entity = entity;
            this.monitor = monitor;
        }
  
        @Override
        public void run() {
            updateEntity(entity);
            monitor.finished = true;
            timer.cancel();
            timer.purge();
            timer = null;
        }
  
    }
 
    private class Monitor {
        public boolean finished = false;
    }
 
}
    Modify DataHandler class     public static class DataHandler extends Machine.Handler {
    private String dirIndex;
    private String dirBackup;
    private double systemQuota;
     
    public DataHandler(String dirIndex, String dirBackup, double systemQuota) {
        this.dirIndex = dirIndex;
        this.dirBackup = dirBackup;
        this.systemQuota = systemQuota;
    }
     
    public SEntity.Handler getEntityHandler() { 
        return new LuceneHandler(dirIndex, dirBackup, systemQuota);
    }
..............    
}
    javascript     function main(env, args) {
  var no = 1;
  if (no == 1) {
    test01(env, args); // Grab products
  }
  if (no == 2) {
    test02(env, args); // List all products
  }
  if (no == 3) {
    test03(env, args); // Search products
  }
  if (no == 4) {
    test04(env, args); // Delete products
  }
}
function test04(env, args) {
  var entity = env.newEntity();
  var query = entity.newMatchAllDocsQuery();
  var products = entity.search('Link', query, 3, 1);
  for (var i = 0; i < products.size(); i++) {
    products.get(i).delete();
  }
}
function test03(env, args) {
  var term = 'Sleeping';
  var entity = env.newEntity();
  var query = entity.parseQuery([term, term], ['desc', 'title'], [entity.occurShould(), entity.occurShould()]);
  var size = entity.count('Link', query, 999999);
  var products = entity.search('Link', query, entity.newSort(org.apache.lucene.search.SortField.FIELD_SCORE), 999999);
  for (var i = 0; i < products.size(); i++) {
    var title = env.newString(products.get(i).getString('title').getBytes('UTF-8'), 'UTF-8');
    try {
      title = entity.highlight(query, title, 'title', 50, 3, ' (...) ');
    } catch (e) {
      env.error(e);
    }
    var desc = env.newString(products.get(i).getString('desc').getBytes('UTF-8'), 'UTF-8');
    try {
      desc = entity.highlight(query, desc, 'desc', 50, 3, ' (...) ');
    } catch (e) {
      env.error(e);
    }
    printProduct(products.get(i), env, desc, title);
  }
}
function test02(env, args) {
  var entity = env.newEntity();
  var query = entity.newMatchAllDocsQuery();
  var size = entity.count('Link', query, 999999);
  var products = entity.search('Link', query, 999999);
  env.info('Size: ' + size);
  for (var i = 0; i < products.size(); i++) {
    printProduct(products.get(i), env);
  }
}
function test01(env, args) {
  var astore = 'paesia';
  var node = '100';
  var maxpage = 2;
  var products = grabProduct(astore, node, maxpage, env);
  for (var i = 0; i < products.size(); i++) {
    var pro = products.get(i);
    saveProduct(pro, env);
  }
  env.info('Saved: ' + products.size());
}
function printProduct(pro, env, descH, titleH) {
  var line = '';
  line += '\nId: ' + pro.getId();
  line += '\nTitle: ' + pro.getString('title');
  line += '\nUrl: ' + pro.getString('url');
  line += '\nDescription: \n' + pro.getString('desc');
  if (titleH != null) {
    line += '\nTitle Highlight: \n' + titleH;
  }
  if (descH != null) {
    line += '\nDescription Highlight: \n' + descH;
  }
  env.info('\n' + line + '\n');
}
function saveProduct(pro, env) {
  var title = pro.get('title');
  var url = pro.get('url');
  if (title == null || title.length == 0 || url == null || url.length == 0) return;
  if (findProductByUrl(url, env)) return;
  var desc = pro.get('description');
  if (desc == null) desc = '';
  if (desc.length() > 0) {
    var doc = env.newJsoup().parse(desc);
    desc = doc.select('body').first().text();
  }
  var schema = 's|url|a|title|a|desc';
  var entity = env.newEntity();
  entity.setSchema(schema);
  entity.setKind('Link');
  entity.setId(env.uniqid());
  entity.setString('url', url);
  entity.setString('title', title);
  entity.setString('desc', desc);
  entity.save();
}
function findProductByUrl(url, env) {
  var entity = env.newEntity();
  var query = entity.newTermQuery(entity.newTerm('url', url));
  var size = entity.count('Link', query, 1);
  return (size > 0);
}
function grabProduct(astore, node, maxpage, env) {
  var tag = env.newArrayList();
  for (var no = 1; no <= maxpage; no++) {
    try {
      var alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + node + '&page=' + no);
      var doc = env.newJsoup().parse(alink, 60000);
      var elements = doc.select('#featuredProducts .textrow a');
      var map = env.newHashMap();
      for (var i = 0; i < elements.size(); i++) {
        var element = elements.get(i);
        var title = element.text();
        var url = element.attr('href');
        var pos = url.lastIndexOf('/detail/');
        if (pos < 0) continue;
        var code = url.substring(pos + 8);
        var url = env.newURL(alink, url) + '';
        var item = env.newHashMap();
        item.put('code', code);
        item.put('title', title);
        item.put('url', url);
        map.put(code, item);
      }
      elements = doc.select('#featuredProducts .imagerow a');
      for (var i = 0; i < elements.size(); i++) {
        var element = elements.get(i);
        var url = element.attr('href');
        var pos = url.lastIndexOf('/detail/');
        if (pos < 0) continue;
        var code = url.substring(pos + 8);
        var item = map.get(code);
        if (item == null) continue;
        var child = element.select('img').first();
        if (child == null) continue;
        var title = child.attr('alt');
        var smimg = child.attr('src');
        if (title.length() > 0) {
          item.put('title', title);
        }
        item.put('small-image', smimg);
      }
      var keys = env.getKeys(map);
      for (var i = 0; i < keys.size(); i++) {
        try {
          var item = map.get(keys.get(i));
          alink = env.newURL(item.get('url'));
          doc = env.newJsoup().parse(alink, 60000);
          var element = doc.select('#detailImage img').first();
          if (element != null) {
            item.put('large-image', element.attr('src'));
          }
          element = doc.select('#productDescription').first();
          if (element != null) {
            var desc = element.html();
            var pattern = '<h2>Product Description</h2>';
            var pos = desc.indexOf(pattern);
            if (pos >= 0) {
              desc = desc.substring(pos + pattern.length);
            }
            var bdoc = env.newJsoup().parse(desc, item.get('url'));
            buildURL(bdoc, item.get('url'), env);
            desc = bdoc.select('body').first().html();
            if (desc.indexOf('<html') < 0) {
              item.put('description', desc);
            }
          }
          element = doc.select('#productDetails').first();
          if (element != null) {
            var desc = element.html();
            var pattern = '<h2>Product Details</h2>';
            var pos = desc.indexOf(pattern);
            if (pos >= 0) {
              desc = desc.substring(pos + pattern.length);
            }
            var bdoc = env.newJsoup().parse(desc, item.get('url'));
            buildURL(bdoc, item.get('url'), env);
            desc = bdoc.select('body').first().html();
            if (desc.indexOf('<html') < 0) {
              item.put('details', desc);
            }
          }
          element = doc.select('#editorialReviews').first();
          if (element != null) {
            var desc = element.html();
            var bdoc = env.newJsoup().parse(desc, item.get('url') + '');
            buildURL(bdoc, item.get('url'), env);
            desc = bdoc.select('body').first().html();
            if (desc.indexOf('<html') < 0) {
              item.put('editorial-reviews', desc);
            }
          }
          element = doc.select('#detailListPrice').first();
          if (element != null) {
            item.put('list-price', element.text());
          }
          element = doc.select('#detailOfferPrice').first();
          if (element != null) {
            item.put('offer-price', element.text());
          }
          element = doc.select('#addToCartForm a').first();
          if (element != null) {
            item.put('buy-url', element.attr('href'));
          }
        } catch (e) {
          env.error(e);
        }
      }
      for (var i = 0; i < keys.size(); i++) {
        tag.add(map.get(keys.get(i)));
      }
    } catch (e) {
      env.error(e);
    }
  }
  return tag;
}
function buildURL(doc, baseUrl, env) {
  baseUrl = env.newURL(baseUrl);
  var elements = doc.select('a');
  for (var i = 0; i < elements.size(); i++) {
    var element = elements.get(i);
    var url = env.newURL(baseUrl, element.attr('href'));
    element.attr('href', url + '');
  }
  elements = doc.select('img');
  for (var i = 0; i < elements.size(); i++) {
    var element = elements.get(i);
    var url = env.newURL(baseUrl, element.attr('src'));
    element.attr('src', url + '');
  }
}
    
 
 

 
No comments:
Post a Comment