Add Lucene support to javascript sandbox
This task add Lucene support to javascript sandbox.
Add Lucene support to javascript sandbox
- Create javascript sandbox with jsoup support
- Create com.paesia.schema.script.safe.lucene.SEntity class as following
- Create com.paesia.schema.script.LuceneHandler class as following
- Modify com.paesia.schema.script.Machine class as following
- Modify DataHandler class as following
- Create javascript as following
- Call Machine.run() method as following
Call Machine.run() method
String dirIndex = ""; String dirBackup = ""; double systemQuota = 1024 * 1024; String js = loadJS(); Map args = new HashMap(); List links = new ArrayList(); args.put("links", links); Machine env = new Machine(new DataHandler(dirIndex, dirBackup, systemQuota)); Machine.run(env, js, args); for (int i = 0; i < links.size(); i++) { Map item = (Map)links.get(i); String line = ""; for (Object key : item.keySet()) { line += "\r\n" + key + " : " + item.get(key); } logger.info("\r\n" + (i + 1) + " --------------------------------\r\n" + line + "\r\n"); }Modify com.paesia.schema.script.Machine class
............ import com.paesia.schema.script.safe.lucene.SEntity; public class Machine { private Handler handler; public static void run(Machine env, String js, Map args) throws Exception { try { Context cx = Context.enter(); cx.setClassShutter(new ClassShutter() { public boolean visibleToScripts(String className) { ........... if ("org.apache.lucene.search.Query".equals(className)) return true; if ("org.apache.lucene.search.Filter".equals(className)) return true; if ("org.apache.lucene.search.Sort".equals(className)) return true; if ("org.apache.lucene.search.BooleanQuery".equals(className)) return true; if ("org.apache.lucene.search.BooleanClause".equals(className)) return true; if (className.startsWith("org.apache.lucene.search.BooleanClause$")) return true; if ("org.apache.lucene.search.PhraseQuery".equals(className)) return true; if ("org.apache.lucene.index.Term".equals(className)) return true; if ("org.apache.lucene.search.MultiPhraseQuery".equals(className)) return true; if ("org.apache.lucene.search.NGramPhraseQuery".equals(className)) return true; if ("org.apache.lucene.search.NumericRangeQuery".equals(className)) return true; if ("org.apache.lucene.search.PrefixQuery".equals(className)) return true; if ("org.apache.lucene.search.TermQuery".equals(className)) return true; if ("org.apache.lucene.search.TermRangeQuery".equals(className)) return true; if ("org.apache.lucene.search.WildcardQuery".equals(className)) return true; if ("org.apache.lucene.search.MatchAllDocsQuery".equals(className)) return true; if ("org.apache.lucene.search.FieldValueFilter".equals(className)) return true; if ("org.apache.lucene.search.NumericRangeFilter".equals(className)) return true; if ("org.apache.lucene.search.PrefixFilter".equals(className)) return true; if ("org.apache.lucene.search.QueryWrapperFilter".equals(className)) return true; if ("org.apache.lucene.search.TermRangeFilter".equals(className)) return true; if ("org.apache.lucene.search.SortField".equals(className)) return true; ........... return false; } }); ........... } catch (Exception e) { throw e; } finally { Context.exit(); } } ........... public SEntity newEntity() { SEntity.Handler seh = null; if (handler != null) { seh = handler.getEntityHandler(); } return new SEntity(seh); } public static class Handler { ........... public SEntity.Handler getEntityHandler() { return null; } } ........... }com.paesia.schema.script.safe.lucene.SEntity class
package com.paesia.schema.script.safe.lucene; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.StringReader; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Properties; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldValueFilter; import org.apache.lucene.search.Filter; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.NGramPhraseQuery; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixFilter; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeFilter; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.util.Version; public class SEntity { public static final String STRING = "s"; public static final String DOUBLE = "d"; public static final String FLOAT = "f"; public static final String INTEGER = "i"; public static final String LONG = "l"; public static final String ANALYZED = "a"; public static final String ALL_KINDS = "|s|d|f|i|l|a|"; public static final String SCHEMA = "F4f8cc93237f50"; public static final String ID = "F4f8cce61643dd"; public static final String CREATED = "F4f8cd83fcca31"; public static final String UPDATED = "F4f8cd84e2b74a"; public static final String KIND = "F4f8cd9c8ee13d"; public static final String MARK = "F4f8cda27d62fb"; protected Properties data = new Properties(); protected Properties schema = new Properties(); protected Handler handler = null; public SEntity(Handler handler) { this.handler = handler; registerDefault(); } public void register(String field, String type) { if (ALL_KINDS.indexOf("|" + type + "|") < 0) return; schema.put(field, type); saveSchema(); } public void setSchema(String src) { String[] fields = src.split("\\|"); schema.clear(); for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) { register(fields[i + 1], fields[i]); } registerDefault(); saveSchema(); } public String getSchema() { String tag = data.getProperty(SCHEMA); if (tag == null) tag = ""; return tag; } public void fromString(String src) { data.clear(); schema.clear(); try { ByteArrayInputStream bais = new ByteArrayInputStream(src.getBytes("UTF-8")); data.load(bais); bais.close(); } catch (Exception e) { } loadSchema(); } public String toString() { String tag = ""; try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); data.store(baos, ""); tag = baos.toString(); baos.close(); } catch (Exception e) { } return tag; } public String getString(String field) { String tag = data.getProperty(field); if (tag == null) tag = ""; return tag; } public void setString(String field, String value) { if (schema.containsKey(field)) { if (value == null) value = ""; data.setProperty(field, value); } } public double getDouble(String field) { double tag = 0; try { tag = Double.parseDouble(getString(field)); } catch (Exception e) { tag = 0; } return tag; } public void setDouble(String field, double value) { setString(field, Double.toString(value)); } public float getFloat(String field) { float tag = 0; try { tag = Float.parseFloat(getString(field)); } catch (Exception e) { tag = 0; } return tag; } public void setFloat(String field, float value) { setString(field, Float.toString(value)); } public long getLong(String field) { long tag = 0; try { tag = Long.parseLong(getString(field)); } catch (Exception e) { tag = 0; } return tag; } public void setLong(String field, long value) { setString(field, Long.toString(value)); } public int getInteger(String field) { int tag = 0; try { tag = Integer.parseInt(getString(field)); } catch (Exception e) { tag = 0; } return tag; } public void setInteger(String field, int value) { setString(field, Integer.toString(value)); } public String getId() { return getString(ID); } public void setId(String src) { setString(ID, src); } public String getKind() { return getString(KIND); } public void setKind(String src) { setString(KIND, src); } public String getMark() { return getString(MARK); } public void setMark(String src) { setString(MARK, src); } public Date getCreated() { return new Date(getLong(CREATED)); } public Date getUpdated() { return new Date(getLong(UPDATED)); } public boolean exists() { if (handler == null) { return false; } else { return handler.exists(getId()); } } public void save() { if (handler != null) { long now = new Date().getTime(); if (handler.exists(getId())) { setLong(UPDATED, now); handler.update(this); } else { setLong(CREATED, now); setLong(UPDATED, now); handler.create(this); } } } public int count(String kind, Query query, int max) { if (handler != null) { return handler.count(kind, query, max); } return 0; } public int count(String kind, Query query, Sort sort, int max) { if (handler != null) { return handler.count(kind, query, sort, max); } return 0; } public int count(String kind, Query query, Filter filter, int max) { if (handler != null) { return handler.count(kind, query, filter, max); } return 0; } public int count(String kind, Query query, Filter filter, Sort sort, int max) { if (handler != null) { return handler.count(kind, query, filter, sort, max); } return 0; } public List<SEntity> search(String kind, Query query, int max) { if (handler != null) { return handler.search(kind, query, max); } return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Sort sort, int max) { if (handler != null) { return handler.search(kind, query, sort, max); } return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, int max) { if (handler != null) { return handler.search(kind, query, filter, max); } return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { if (handler != null) { return handler.search(kind, query, filter, sort, max); } return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { if (handler != null) { return handler.search(kind, query, pagesize, pageno); } return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { if (handler != null) { return handler.search(kind, query, sort, pagesize, pageno); } return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) { if (handler != null) { return handler.search(kind, query, filter, pagesize, pageno); } return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max, int pagesize, int pageno) { if (handler != null) { return handler.search(kind, query, filter, sort, pagesize, pageno); } return new ArrayList<SEntity>(); } public void load(String id) { if (handler != null) { handler.load(id, this); } } public BooleanQuery newBooleanQuery() { return new BooleanQuery(); } public BooleanClause newBooleanClause(Query query, Occur occur) { return new BooleanClause(query, occur); } public Occur occurMust() { return Occur.MUST; } public Occur occurMustNot() { return Occur.MUST_NOT; } public Occur occurShould() { return Occur.SHOULD; } public MatchAllDocsQuery newMatchAllDocsQuery() { return new MatchAllDocsQuery(); } public MultiPhraseQuery newMultiPhraseQuery() { return new MultiPhraseQuery(); } public PhraseQuery newPhraseQuery() { return new PhraseQuery(); } public NGramPhraseQuery newNGramPhraseQuery(int n) { return new NGramPhraseQuery(n); } public Term newTerm(String field, String value) { return new Term(field, value); } public NumericRangeQuery<Double> newDoubleRangeQuery(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeQuery<Double> newDoubleRangeQuery(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public NumericRangeQuery<Float> newFloatRangeQuery(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeQuery<Float> newFloatRangeQuery(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public NumericRangeQuery<Long> newLongRangeQuery(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeQuery<Long> newLongRangeQuery(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) { return NumericRangeQuery.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public PrefixQuery newPrefixQuery(Term term) { return new PrefixQuery(term); } public TermQuery newTermQuery(Term term) { return new TermQuery(term); } public TermRangeQuery newTermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { return new TermRangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper); } public WildcardQuery newWildcardQuery(Term term) { return new WildcardQuery(term); } public FieldValueFilter newFieldValueFilter(String field, boolean negate) { return new FieldValueFilter(field, negate); } public NumericRangeFilter<Double> newDoubleRangeFilter(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newDoubleRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeFilter<Double> newDoubleRangeFilter(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public NumericRangeFilter<Float> newFloatRangeFilter(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newFloatRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeFilter<Float> newFloatRangeFilter(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newIntRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public NumericRangeFilter<Long> newLongRangeFilter(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newLongRange(field, min, max, minInclusive, maxInclusive); } public NumericRangeFilter<Long> newLongRangeFilter(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) { return NumericRangeFilter.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive); } public PrefixFilter newPrefixFilter(Term term) { return new PrefixFilter(term); } public QueryWrapperFilter newQueryWrapperFilter(Query query) { return new QueryWrapperFilter(query); } public TermRangeFilter newTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { return new TermRangeFilter(fieldName, lowerTerm, upperTerm, includeLower, includeUpper); } public SortField newSortField(String field, int type, boolean reverse) { return new SortField(field, type, reverse); } public Sort newSort() { return new Sort(); } public Sort newSort(SortField... fields) { return new Sort(fields); } public Sort newSort(SortField field) { return new Sort(field); } public Query parseQuery(String[] queries, String[] fields) throws Exception { return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, new StandardAnalyzer(Version.LUCENE_36)); } public Query parseQuery(String[] queries, String[] fields, BooleanClause.Occur[] flags) throws Exception { return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, flags, new StandardAnalyzer(Version.LUCENE_36)); } public Query parseQuery(String query, String[] fields, BooleanClause.Occur[] flags) throws Exception { return MultiFieldQueryParser.parse(Version.LUCENE_36, query, fields, flags, new StandardAnalyzer(Version.LUCENE_36)); } public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(field, new StringReader(text))); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize)); tokenStream.reset(); String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator); return rv.length() == 0 ? text : rv; } protected void registerDefault() { register(SCHEMA, "s"); register(ID, "s"); register(CREATED, "l"); register(UPDATED, "l"); register(KIND, "s"); register(MARK, "s"); } protected void saveSchema() { String tag = ""; for (Object key : schema.keySet()) { if (tag.length() > 0) tag += "|"; tag += schema.get(key) + "|" + key; } data.put(SCHEMA, tag); } protected void loadSchema() { String src = data.getProperty(SCHEMA); if (src == null) src = ""; String[] fields = src.split("\\|"); schema.clear(); for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) { register(fields[i + 1], fields[i]); } registerDefault(); String tag = ""; for (Object key : schema.keySet()) { if (tag.length() > 0) tag += "|"; tag += schema.get(key) + "|" + key; } data.put(SCHEMA, tag); } public void delete() { delete(getId()); } public void delete(String id) { if (handler != null) { handler.delete(id); } } public SortField sortFieldDoc() { return SortField.FIELD_DOC; } public SortField sortFieldScore() { return SortField.FIELD_SCORE; } public int sortFieldLong() { return SortField.LONG; } public int sortFieldInteger() { return SortField.INT; } public int sortFieldDouble() { return SortField.DOUBLE; } public int sortFieldFloat() { return SortField.FLOAT; } public int sortFieldString() { return SortField.STRING_VAL; } public double storageQuota() { if (handler != null) { return handler.storageQuota(); } return 0; } public double storageSize() { if (handler != null) { return handler.storageSize(); } return 0; } public static class Handler { public boolean exists(String id) { return false; } public void create(SEntity src) { } public void update(SEntity src) { } public void load(String id, SEntity src) { } public void delete(String id) { } public List<SEntity> search(String kind, Query query, int max) { return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Sort sort, int max) { return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, int max) { return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) { return new ArrayList<SEntity>(); } public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); } public int count(String kind, Query query, int max) { return 0; } public int count(String kind, Query query, Sort sort, int max) { return 0; } public int count(String kind, Query query, Filter filter, int max) { return 0; } public int count(String kind, Query query, Filter filter, Sort sort, int max) { return 0; } public double storageQuota() { return 0; } public double storageSize() { return 0; } } }com.paesia.schema.script.LuceneHandler class
package com.paesia.schema.script; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.List; import java.util.Timer; import java.util.TimerTask; import java.util.UUID; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.paesia.schema.script.safe.lucene.SEntity; public class LuceneHandler extends SEntity.Handler { public static final String KIND_QUOTA = "C4f91ee1eb414a"; public static final String QUOTA_SYSTEM = "F4f91ee659b1ec"; protected String dirIndex = ""; protected String dirBackup = ""; protected double systemQuota = 0; public LuceneHandler(String dirIndex, String dirBackup, double systemQuota) { this.dirIndex = dirIndex; this.dirBackup = dirBackup; this.systemQuota = systemQuota; } public boolean exists(String id) { boolean tag = false; if (id.length() == 0) return tag; try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1); if (td.totalHits > 0) { tag = true; } searcher.close(); reader.close(); } catch (Exception e) { } return tag; } public void create(SEntity src) { Monitor monitor = new Monitor(); Timer timer = new Timer(); timer.schedule(new CreateTask(timer, src, monitor), 1); while (!monitor.finished) { try { Thread.sleep(10); } catch (Exception e) { } } timer = null; } protected boolean quotaCreate(SEntity src) { boolean tag = false; SEntity quota = findSystemQuota(); if (quota == null) { quota = newSystemQuota(); } double newSize = quota.getDouble("size") + ((double)src.toString().length() / 1048576.0); if (newSize < 0) newSize = 0; if (newSize < systemQuota) { tag = true; quota.setDouble("size", newSize); quota.save(); } return tag; } protected boolean quotaUpdate(SEntity src) { boolean tag = false; SEntity quota = findSystemQuota(); if (quota == null) { quota = newSystemQuota(); } double newSize = quota.getDouble("size") - ((double)getFileSize(src.getId(), src.getKind()) / 1048576.0) + ((double)src.toString().length() / 1048576.0); if (newSize < 0) newSize = 0; if (newSize < systemQuota) { tag = true; quota.setDouble("size", newSize); quota.save(); } return tag; } protected boolean quotaDelete(String id, String kind) { boolean tag = false; SEntity quota = findSystemQuota(); if (quota == null) { quota = newSystemQuota(); } double newSize = quota.getDouble("size") - ((double)getFileSize(id, kind) / 1048576.0); if (newSize < 0) newSize = 0; if (newSize < systemQuota) { tag = true; quota.setDouble("size", newSize); quota.save(); } return tag; } protected long getFileSize(String id, String kind) { long tag = 0; String fid = ""; for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) { if (fid.length() > 0) fid += File.separator; fid += id.substring(i, i + 2); } File file = new File(dirBackup, kind); file = new File(file.getAbsolutePath(), fid); String folder = file.getAbsolutePath(); file = new File(folder, id + ".txt"); if (file.exists()) { tag = file.length(); } return tag; } protected SEntity newSystemQuota() { SEntity tag = new SEntity(this); tag.setSchema("s|kind|d|size"); tag.setKind(KIND_QUOTA); tag.setId(UUID.randomUUID().toString().replaceAll("-", "")); tag.setString("kind", QUOTA_SYSTEM); return tag; } protected SEntity findSystemQuota() { List<SEntity> results = search(KIND_QUOTA, new TermQuery(new Term("kind", QUOTA_SYSTEM)), 1); if (results.size() == 0) return null; return results.get(0); } protected void createEntity(SEntity src) { if (src.getId().length() == 0) return; if (src.getKind().length() == 0) return; try { if (!src.getKind().equals(KIND_QUOTA)) { if (!quotaCreate(src)) return; } backup(src); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc); Document doc = new Document(); write(src, doc); writer.addDocument(doc); writer.close(); } catch (Exception e) { } } public void update(SEntity src) { Monitor monitor = new Monitor(); Timer timer = new Timer(); timer.schedule(new UpdateTask(timer, src, monitor), 1); while (!monitor.finished) { try { Thread.sleep(10); } catch (Exception e) { } } timer = null; } protected void updateEntity(SEntity src) { if (src.getId().length() == 0) return; if (src.getKind().length() == 0) return; try { if (!src.getKind().equals(KIND_QUOTA)) { if (!quotaUpdate(src)) return; } backup(src); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc); Document doc = new Document(); write(src, doc); writer.updateDocument(new Term(SEntity.ID, src.getId()), doc); writer.close(); } catch (Exception e) { } } public void load(String id, SEntity src) { try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1); if (td.totalHits > 0) { Document doc = searcher.doc(td.scoreDocs[0].doc); if (allowLoad(id, doc.get(SEntity.KIND))) { src.setSchema(doc.get(SEntity.SCHEMA)); read(src, doc); } } searcher.close(); reader.close(); } catch (Exception e) { } } protected boolean allowLoad(String id, String kind) { return true; } public int count(String kind, Query query, Filter filter, Sort sort, int max) { int tag = 0; try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST)); if (query != null) { boolQuery.add(new BooleanClause(query, Occur.MUST)); } TopDocs td = null; if (filter != null && sort != null) { td = searcher.search(boolQuery, filter, max, sort); } else if (filter != null) { td = searcher.search(boolQuery, filter, max); } else if (sort != null) { td = searcher.search(boolQuery, max, sort); } else { td = searcher.search(boolQuery, max); } tag = td.totalHits; searcher.close(); reader.close(); } catch (Exception e) { } return tag; } public int count(String kind, Query query, int max) { return count(kind, query, null, null, max); } public int count(String kind, Query query, Sort sort, int max) { return count(kind, query, null, sort, max); } public int count(String kind, Query query, Filter filter, int max) { return count(kind, query, filter, null, max); } public List<SEntity> search(String kind, Query query, int max) { return search(kind, query, null, null, max); } public List<SEntity> search(String kind, Query query, Sort sort, int max) { return search(kind, query, null, sort, max); } public List<SEntity> search(String kind, Query query, Filter filter, int max) { return search(kind, query, filter, null, max); } public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { return search(kind, query, null, null, pagesize, pageno); } public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { return search(kind, query, null, sort, pagesize, pageno); } public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) { return search(kind, query, filter, null, pagesize, pageno); } public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { List<SEntity> tag = new ArrayList<SEntity>(); try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST)); if (query != null) { boolQuery.add(new BooleanClause(query, Occur.MUST)); } TopDocs td = null; if (filter != null && sort != null) { td = searcher.search(boolQuery, filter, max, sort); } else if (filter != null) { td = searcher.search(boolQuery, filter, max); } else if (sort != null) { td = searcher.search(boolQuery, max, sort); } else { td = searcher.search(boolQuery, max); } for (int i = 0; i < td.totalHits; i++) { SEntity item = new SEntity(this); Document doc = searcher.doc(td.scoreDocs[i].doc); item.setSchema(doc.get(SEntity.SCHEMA)); read(item, doc); tag.add(item); } searcher.close(); reader.close(); } catch (Exception e) { } return tag; } public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) { List<SEntity> tag = new ArrayList<SEntity>(); try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST)); if (query != null) { boolQuery.add(new BooleanClause(query, Occur.MUST)); } if (pagesize <= 0) pagesize = 10; if (pageno <= 0) pageno = 1; int max = pageno * pagesize; TopDocs td = null; if (filter != null && sort != null) { td = searcher.search(boolQuery, filter, max, sort); } else if (filter != null) { td = searcher.search(boolQuery, filter, max); } else if (sort != null) { td = searcher.search(boolQuery, max, sort); } else { td = searcher.search(boolQuery, max); } for (int i = (pageno - 1) * pagesize; i < td.totalHits && i < max; i++) { SEntity item = new SEntity(this); Document doc = searcher.doc(td.scoreDocs[i].doc); item.setSchema(doc.get(SEntity.SCHEMA)); read(item, doc); tag.add(item); } searcher.close(); reader.close(); } catch (Exception e) { } return tag; } protected void backup(SEntity src) { String id = src.getId(); if (id.length() == 0) return; String kind = src.getKind(); if (kind.length() == 0) return; String fid = ""; for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) { if (fid.length() > 0) fid += File.separator; fid += id.substring(i, i + 2); } try { File file = new File(dirBackup, kind); file = new File(file.getAbsolutePath(), fid); file.mkdirs(); String folder = file.getAbsolutePath(); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(folder, id + ".txt")))); writer.write(src.toString()); writer.close(); } catch (Exception e) { } } protected void read(SEntity entity, Document doc) { String schema = doc.get(SEntity.SCHEMA); if (schema == null) schema = ""; String[] fields = schema.split("\\|"); for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) { String kind = fields[i]; String fname = fields[i + 1]; String val = doc.get(fname); if (val == null) val = ""; if (SEntity.ALL_KINDS.indexOf("|" + kind + "|") < 0) continue; entity.setString(fname, val); } } protected void write(SEntity entity, Document doc) { String schema = entity.getSchema(); if (schema == null) schema = ""; String[] fields = schema.split("\\|"); for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) { String kind = fields[i]; String fname = fields[i + 1]; if (SEntity.STRING.equalsIgnoreCase(kind)) { Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS); doc.add(field); } else if (SEntity.DOUBLE.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setDoubleValue(entity.getDouble(fname)); doc.add(field); } else if (SEntity.FLOAT.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setFloatValue(entity.getFloat(fname)); doc.add(field); } else if (SEntity.INTEGER.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setIntValue(entity.getInteger(fname)); doc.add(field); } else if (SEntity.LONG.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setLongValue(entity.getLong(fname)); doc.add(field); } else if (SEntity.ANALYZED.equalsIgnoreCase(kind)) { Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED); doc.add(field); } } } public void delete(String id) { Monitor monitor = new Monitor(); Timer timer = new Timer(); timer.schedule(new DeleteTask(timer, id, monitor), 1); while (!monitor.finished) { try { Thread.sleep(10); } catch (Exception e) { } } timer = null; } protected void deleteEntity(String id) { if (id.length() == 0) return; String kind = ""; try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1); if (td.totalHits > 0) { Document doc = searcher.doc(td.scoreDocs[0].doc); kind = doc.get(SEntity.KIND); } searcher.close(); reader.close(); } catch (Exception e) { } if (kind.length() == 0) return; if (!allowDelete(id, kind)) return; try { if (!kind.equals(KIND_QUOTA)) { if (!quotaDelete(id, kind)) return; } removeBackup(id, kind); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc); writer.deleteDocuments(new Term(SEntity.ID, id)); writer.close(); } catch (Exception e) { } } protected boolean allowDelete(String id, String kind) { return true; } protected void removeBackup(String id, String kind) { if (id.length() == 0) return; if (kind.length() == 0) return; String fid = ""; for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) { if (fid.length() > 0) fid += File.separator; fid += id.substring(i, i + 2); } try { File file = new File(dirBackup, kind); file = new File(file.getAbsolutePath(), fid); String folder = file.getAbsolutePath(); file = new File(folder, id + ".txt"); file.delete(); } catch (Exception e) { } } public double storageQuota() { return systemQuota; } public double storageSize() { SEntity tag = findSystemQuota(); if (tag == null) return 0; return tag.getDouble("size"); } private class DeleteTask extends TimerTask { private String id; private Timer timer; private Monitor monitor; public DeleteTask(Timer timer, String id, Monitor monitor) { this.timer = timer; this.id = id; this.monitor = monitor; } @Override public void run() { deleteEntity(id); monitor.finished = true; timer.cancel(); timer.purge(); timer = null; } } private class CreateTask extends TimerTask { private SEntity entity; private Timer timer; private Monitor monitor; public CreateTask(Timer timer, SEntity entity, Monitor monitor) { this.timer = timer; this.entity = entity; this.monitor = monitor; } @Override public void run() { createEntity(entity); monitor.finished = true; timer.cancel(); timer.purge(); timer = null; } } private class UpdateTask extends TimerTask { private SEntity entity; private Timer timer; private Monitor monitor; public UpdateTask(Timer timer, SEntity entity, Monitor monitor) { this.timer = timer; this.entity = entity; this.monitor = monitor; } @Override public void run() { updateEntity(entity); monitor.finished = true; timer.cancel(); timer.purge(); timer = null; } } private class Monitor { public boolean finished = false; } }Modify DataHandler class
public static class DataHandler extends Machine.Handler { private String dirIndex; private String dirBackup; private double systemQuota; public DataHandler(String dirIndex, String dirBackup, double systemQuota) { this.dirIndex = dirIndex; this.dirBackup = dirBackup; this.systemQuota = systemQuota; } public SEntity.Handler getEntityHandler() { return new LuceneHandler(dirIndex, dirBackup, systemQuota); } .............. }javascript
function main(env, args) { var no = 1; if (no == 1) { test01(env, args); // Grab products } if (no == 2) { test02(env, args); // List all products } if (no == 3) { test03(env, args); // Search products } if (no == 4) { test04(env, args); // Delete products } } function test04(env, args) { var entity = env.newEntity(); var query = entity.newMatchAllDocsQuery(); var products = entity.search('Link', query, 3, 1); for (var i = 0; i < products.size(); i++) { products.get(i).delete(); } } function test03(env, args) { var term = 'Sleeping'; var entity = env.newEntity(); var query = entity.parseQuery([term, term], ['desc', 'title'], [entity.occurShould(), entity.occurShould()]); var size = entity.count('Link', query, 999999); var products = entity.search('Link', query, entity.newSort(org.apache.lucene.search.SortField.FIELD_SCORE), 999999); for (var i = 0; i < products.size(); i++) { var title = env.newString(products.get(i).getString('title').getBytes('UTF-8'), 'UTF-8'); try { title = entity.highlight(query, title, 'title', 50, 3, ' (...) '); } catch (e) { env.error(e); } var desc = env.newString(products.get(i).getString('desc').getBytes('UTF-8'), 'UTF-8'); try { desc = entity.highlight(query, desc, 'desc', 50, 3, ' (...) '); } catch (e) { env.error(e); } printProduct(products.get(i), env, desc, title); } } function test02(env, args) { var entity = env.newEntity(); var query = entity.newMatchAllDocsQuery(); var size = entity.count('Link', query, 999999); var products = entity.search('Link', query, 999999); env.info('Size: ' + size); for (var i = 0; i < products.size(); i++) { printProduct(products.get(i), env); } } function test01(env, args) { var astore = 'paesia'; var node = '100'; var maxpage = 2; var products = grabProduct(astore, node, maxpage, env); for (var i = 0; i < products.size(); i++) { var pro = products.get(i); saveProduct(pro, env); } env.info('Saved: ' + products.size()); } function printProduct(pro, env, descH, titleH) { var line = ''; line += '\nId: ' + pro.getId(); line += '\nTitle: ' + pro.getString('title'); line += '\nUrl: ' + pro.getString('url'); line += '\nDescription: \n' + pro.getString('desc'); if (titleH != null) { line += '\nTitle Highlight: \n' + titleH; } if (descH != null) { line += '\nDescription Highlight: \n' + descH; } env.info('\n' + line + '\n'); } function saveProduct(pro, env) { var title = pro.get('title'); var url = pro.get('url'); if (title == null || title.length == 0 || url == null || url.length == 0) return; if (findProductByUrl(url, env)) return; var desc = pro.get('description'); if (desc == null) desc = ''; if (desc.length() > 0) { var doc = env.newJsoup().parse(desc); desc = doc.select('body').first().text(); } var schema = 's|url|a|title|a|desc'; var entity = env.newEntity(); entity.setSchema(schema); entity.setKind('Link'); entity.setId(env.uniqid()); entity.setString('url', url); entity.setString('title', title); entity.setString('desc', desc); entity.save(); } function findProductByUrl(url, env) { var entity = env.newEntity(); var query = entity.newTermQuery(entity.newTerm('url', url)); var size = entity.count('Link', query, 1); return (size > 0); } function grabProduct(astore, node, maxpage, env) { var tag = env.newArrayList(); for (var no = 1; no <= maxpage; no++) { try { var alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + node + '&page=' + no); var doc = env.newJsoup().parse(alink, 60000); var elements = doc.select('#featuredProducts .textrow a'); var map = env.newHashMap(); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var title = element.text(); var url = element.attr('href'); var pos = url.lastIndexOf('/detail/'); if (pos < 0) continue; var code = url.substring(pos + 8); var url = env.newURL(alink, url) + ''; var item = env.newHashMap(); item.put('code', code); item.put('title', title); item.put('url', url); map.put(code, item); } elements = doc.select('#featuredProducts .imagerow a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var url = element.attr('href'); var pos = url.lastIndexOf('/detail/'); if (pos < 0) continue; var code = url.substring(pos + 8); var item = map.get(code); if (item == null) continue; var child = element.select('img').first(); if (child == null) continue; var title = child.attr('alt'); var smimg = child.attr('src'); if (title.length() > 0) { item.put('title', title); } item.put('small-image', smimg); } var keys = env.getKeys(map); for (var i = 0; i < keys.size(); i++) { try { var item = map.get(keys.get(i)); alink = env.newURL(item.get('url')); doc = env.newJsoup().parse(alink, 60000); var element = doc.select('#detailImage img').first(); if (element != null) { item.put('large-image', element.attr('src')); } element = doc.select('#productDescription').first(); if (element != null) { var desc = element.html(); var pattern = '<h2>Product Description</h2>'; var pos = desc.indexOf(pattern); if (pos >= 0) { desc = desc.substring(pos + pattern.length); } var bdoc = env.newJsoup().parse(desc, item.get('url')); buildURL(bdoc, item.get('url'), env); desc = bdoc.select('body').first().html(); if (desc.indexOf('<html') < 0) { item.put('description', desc); } } element = doc.select('#productDetails').first(); if (element != null) { var desc = element.html(); var pattern = '<h2>Product Details</h2>'; var pos = desc.indexOf(pattern); if (pos >= 0) { desc = desc.substring(pos + pattern.length); } var bdoc = env.newJsoup().parse(desc, item.get('url')); buildURL(bdoc, item.get('url'), env); desc = bdoc.select('body').first().html(); if (desc.indexOf('<html') < 0) { item.put('details', desc); } } element = doc.select('#editorialReviews').first(); if (element != null) { var desc = element.html(); var bdoc = env.newJsoup().parse(desc, item.get('url') + ''); buildURL(bdoc, item.get('url'), env); desc = bdoc.select('body').first().html(); if (desc.indexOf('<html') < 0) { item.put('editorial-reviews', desc); } } element = doc.select('#detailListPrice').first(); if (element != null) { item.put('list-price', element.text()); } element = doc.select('#detailOfferPrice').first(); if (element != null) { item.put('offer-price', element.text()); } element = doc.select('#addToCartForm a').first(); if (element != null) { item.put('buy-url', element.attr('href')); } } catch (e) { env.error(e); } } for (var i = 0; i < keys.size(); i++) { tag.add(map.get(keys.get(i))); } } catch (e) { env.error(e); } } return tag; } function buildURL(doc, baseUrl, env) { baseUrl = env.newURL(baseUrl); var elements = doc.select('a'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var url = env.newURL(baseUrl, element.attr('href')); element.attr('href', url + ''); } elements = doc.select('img'); for (var i = 0; i < elements.size(); i++) { var element = elements.get(i); var url = env.newURL(baseUrl, element.attr('src')); element.attr('src', url + ''); } }
No comments:
Post a Comment