Call Machine.run() method
String dirIndex = "";
String dirBackup = "";
double systemQuota = 1024 * 1024;
String js = loadJS();
Map args = new HashMap();
List links = new ArrayList();
args.put("links", links);
Machine env = new Machine(new DataHandler(dirIndex, dirBackup, systemQuota));
Machine.run(env, js, args);
for (int i = 0; i < links.size(); i++) {
Map item = (Map)links.get(i);
String line = "";
for (Object key : item.keySet()) {
line += "\r\n" + key + " : " + item.get(key);
}
logger.info("\r\n" + (i + 1) + " --------------------------------\r\n" + line + "\r\n");
}
Modify com.paesia.schema.script.Machine class
............
import com.paesia.schema.script.safe.lucene.SEntity;
public class Machine {
private Handler handler;
public static void run(Machine env, String js, Map args) throws Exception {
try {
Context cx = Context.enter();
cx.setClassShutter(new ClassShutter() {
public boolean visibleToScripts(String className) {
...........
if ("org.apache.lucene.search.Query".equals(className)) return true;
if ("org.apache.lucene.search.Filter".equals(className)) return true;
if ("org.apache.lucene.search.Sort".equals(className)) return true;
if ("org.apache.lucene.search.BooleanQuery".equals(className)) return true;
if ("org.apache.lucene.search.BooleanClause".equals(className)) return true;
if (className.startsWith("org.apache.lucene.search.BooleanClause$")) return true;
if ("org.apache.lucene.search.PhraseQuery".equals(className)) return true;
if ("org.apache.lucene.index.Term".equals(className)) return true;
if ("org.apache.lucene.search.MultiPhraseQuery".equals(className)) return true;
if ("org.apache.lucene.search.NGramPhraseQuery".equals(className)) return true;
if ("org.apache.lucene.search.NumericRangeQuery".equals(className)) return true;
if ("org.apache.lucene.search.PrefixQuery".equals(className)) return true;
if ("org.apache.lucene.search.TermQuery".equals(className)) return true;
if ("org.apache.lucene.search.TermRangeQuery".equals(className)) return true;
if ("org.apache.lucene.search.WildcardQuery".equals(className)) return true;
if ("org.apache.lucene.search.MatchAllDocsQuery".equals(className)) return true;
if ("org.apache.lucene.search.FieldValueFilter".equals(className)) return true;
if ("org.apache.lucene.search.NumericRangeFilter".equals(className)) return true;
if ("org.apache.lucene.search.PrefixFilter".equals(className)) return true;
if ("org.apache.lucene.search.QueryWrapperFilter".equals(className)) return true;
if ("org.apache.lucene.search.TermRangeFilter".equals(className)) return true;
if ("org.apache.lucene.search.SortField".equals(className)) return true;
...........
return false;
}
});
...........
} catch (Exception e) {
throw e;
} finally {
Context.exit();
}
}
...........
public SEntity newEntity() {
SEntity.Handler seh = null;
if (handler != null) {
seh = handler.getEntityHandler();
}
return new SEntity(seh);
}
public static class Handler {
...........
public SEntity.Handler getEntityHandler() { return null; }
}
...........
}
com.paesia.schema.script.safe.lucene.SEntity class
package com.paesia.schema.script.safe.lucene;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldValueFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.NGramPhraseQuery;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixFilter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;
public class SEntity {
public static final String STRING = "s";
public static final String DOUBLE = "d";
public static final String FLOAT = "f";
public static final String INTEGER = "i";
public static final String LONG = "l";
public static final String ANALYZED = "a";
public static final String ALL_KINDS = "|s|d|f|i|l|a|";
public static final String SCHEMA = "F4f8cc93237f50";
public static final String ID = "F4f8cce61643dd";
public static final String CREATED = "F4f8cd83fcca31";
public static final String UPDATED = "F4f8cd84e2b74a";
public static final String KIND = "F4f8cd9c8ee13d";
public static final String MARK = "F4f8cda27d62fb";
protected Properties data = new Properties();
protected Properties schema = new Properties();
protected Handler handler = null;
public SEntity(Handler handler) {
this.handler = handler;
registerDefault();
}
public void register(String field, String type) {
if (ALL_KINDS.indexOf("|" + type + "|") < 0) return;
schema.put(field, type);
saveSchema();
}
public void setSchema(String src) {
String[] fields = src.split("\\|");
schema.clear();
for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
register(fields[i + 1], fields[i]);
}
registerDefault();
saveSchema();
}
public String getSchema() {
String tag = data.getProperty(SCHEMA);
if (tag == null) tag = "";
return tag;
}
public void fromString(String src) {
data.clear();
schema.clear();
try {
ByteArrayInputStream bais = new ByteArrayInputStream(src.getBytes("UTF-8"));
data.load(bais);
bais.close();
} catch (Exception e) {
}
loadSchema();
}
public String toString() {
String tag = "";
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
data.store(baos, "");
tag = baos.toString();
baos.close();
} catch (Exception e) {
}
return tag;
}
public String getString(String field) {
String tag = data.getProperty(field);
if (tag == null) tag = "";
return tag;
}
public void setString(String field, String value) {
if (schema.containsKey(field)) {
if (value == null) value = "";
data.setProperty(field, value);
}
}
public double getDouble(String field) {
double tag = 0;
try {
tag = Double.parseDouble(getString(field));
} catch (Exception e) {
tag = 0;
}
return tag;
}
public void setDouble(String field, double value) {
setString(field, Double.toString(value));
}
public float getFloat(String field) {
float tag = 0;
try {
tag = Float.parseFloat(getString(field));
} catch (Exception e) {
tag = 0;
}
return tag;
}
public void setFloat(String field, float value) {
setString(field, Float.toString(value));
}
public long getLong(String field) {
long tag = 0;
try {
tag = Long.parseLong(getString(field));
} catch (Exception e) {
tag = 0;
}
return tag;
}
public void setLong(String field, long value) {
setString(field, Long.toString(value));
}
public int getInteger(String field) {
int tag = 0;
try {
tag = Integer.parseInt(getString(field));
} catch (Exception e) {
tag = 0;
}
return tag;
}
public void setInteger(String field, int value) {
setString(field, Integer.toString(value));
}
public String getId() {
return getString(ID);
}
public void setId(String src) {
setString(ID, src);
}
public String getKind() {
return getString(KIND);
}
public void setKind(String src) {
setString(KIND, src);
}
public String getMark() {
return getString(MARK);
}
public void setMark(String src) {
setString(MARK, src);
}
public Date getCreated() {
return new Date(getLong(CREATED));
}
public Date getUpdated() {
return new Date(getLong(UPDATED));
}
public boolean exists() {
if (handler == null) {
return false;
} else {
return handler.exists(getId());
}
}
public void save() {
if (handler != null) {
long now = new Date().getTime();
if (handler.exists(getId())) {
setLong(UPDATED, now);
handler.update(this);
} else {
setLong(CREATED, now);
setLong(UPDATED, now);
handler.create(this);
}
}
}
public int count(String kind, Query query, int max) {
if (handler != null) {
return handler.count(kind, query, max);
}
return 0;
}
public int count(String kind, Query query, Sort sort, int max) {
if (handler != null) {
return handler.count(kind, query, sort, max);
}
return 0;
}
public int count(String kind, Query query, Filter filter, int max) {
if (handler != null) {
return handler.count(kind, query, filter, max);
}
return 0;
}
public int count(String kind, Query query, Filter filter, Sort sort, int max) {
if (handler != null) {
return handler.count(kind, query, filter, sort, max);
}
return 0;
}
public List<SEntity> search(String kind, Query query, int max) {
if (handler != null) {
return handler.search(kind, query, max);
}
return new ArrayList<SEntity>();
}
public List<SEntity> search(String kind, Query query, Sort sort, int max) {
if (handler != null) {
return handler.search(kind, query, sort, max);
}
return new ArrayList<SEntity>();
}
public List<SEntity> search(String kind, Query query, Filter filter, int max) {
if (handler != null) {
return handler.search(kind, query, filter, max);
}
return new ArrayList<SEntity>();
}
public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
if (handler != null) {
return handler.search(kind, query, filter, sort, max);
}
return new ArrayList<SEntity>();
}
public List<SEntity> search(String kind, Query query, int pagesize, int pageno) {
if (handler != null) {
return handler.search(kind, query, pagesize, pageno);
}
return new ArrayList<SEntity>();
}
public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) {
if (handler != null) {
return handler.search(kind, query, sort, pagesize, pageno);
}
return new ArrayList<SEntity>();
}
public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
if (handler != null) {
return handler.search(kind, query, filter, pagesize, pageno);
}
return new ArrayList<SEntity>();
}
public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max, int pagesize, int pageno) {
if (handler != null) {
return handler.search(kind, query, filter, sort, pagesize, pageno);
}
return new ArrayList<SEntity>();
}
public void load(String id) {
if (handler != null) {
handler.load(id, this);
}
}
public BooleanQuery newBooleanQuery() {
return new BooleanQuery();
}
public BooleanClause newBooleanClause(Query query, Occur occur) {
return new BooleanClause(query, occur);
}
public Occur occurMust() {
return Occur.MUST;
}
public Occur occurMustNot() {
return Occur.MUST_NOT;
}
public Occur occurShould() {
return Occur.SHOULD;
}
public MatchAllDocsQuery newMatchAllDocsQuery() {
return new MatchAllDocsQuery();
}
public MultiPhraseQuery newMultiPhraseQuery() {
return new MultiPhraseQuery();
}
public PhraseQuery newPhraseQuery() {
return new PhraseQuery();
}
public NGramPhraseQuery newNGramPhraseQuery(int n) {
return new NGramPhraseQuery(n);
}
public Term newTerm(String field, String value) {
return new Term(field, value);
}
public NumericRangeQuery<Double> newDoubleRangeQuery(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeQuery<Double> newDoubleRangeQuery(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public NumericRangeQuery<Float> newFloatRangeQuery(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeQuery<Float> newFloatRangeQuery(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public NumericRangeQuery<Long> newLongRangeQuery(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeQuery<Long> newLongRangeQuery(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeQuery.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public PrefixQuery newPrefixQuery(Term term) {
return new PrefixQuery(term);
}
public TermQuery newTermQuery(Term term) {
return new TermQuery(term);
}
public TermRangeQuery newTermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
return new TermRangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper);
}
public WildcardQuery newWildcardQuery(Term term) {
return new WildcardQuery(term);
}
public FieldValueFilter newFieldValueFilter(String field, boolean negate) {
return new FieldValueFilter(field, negate);
}
public NumericRangeFilter<Double> newDoubleRangeFilter(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newDoubleRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeFilter<Double> newDoubleRangeFilter(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public NumericRangeFilter<Float> newFloatRangeFilter(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newFloatRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeFilter<Float> newFloatRangeFilter(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newIntRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public NumericRangeFilter<Long> newLongRangeFilter(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newLongRange(field, min, max, minInclusive, maxInclusive);
}
public NumericRangeFilter<Long> newLongRangeFilter(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
return NumericRangeFilter.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
}
public PrefixFilter newPrefixFilter(Term term) {
return new PrefixFilter(term);
}
public QueryWrapperFilter newQueryWrapperFilter(Query query) {
return new QueryWrapperFilter(query);
}
public TermRangeFilter newTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
return new TermRangeFilter(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
}
public SortField newSortField(String field, int type, boolean reverse) {
return new SortField(field, type, reverse);
}
public Sort newSort() {
return new Sort();
}
public Sort newSort(SortField... fields) {
return new Sort(fields);
}
public Sort newSort(SortField field) {
return new Sort(field);
}
public Query parseQuery(String[] queries, String[] fields) throws Exception {
return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, new StandardAnalyzer(Version.LUCENE_36));
}
public Query parseQuery(String[] queries, String[] fields, BooleanClause.Occur[] flags) throws Exception {
return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
}
public Query parseQuery(String query, String[] fields, BooleanClause.Occur[] flags) throws Exception {
return MultiFieldQueryParser.parse(Version.LUCENE_36, query, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
}
public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(field, new StringReader(text)));
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
tokenStream.reset();
String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
return rv.length() == 0 ? text : rv;
}
protected void registerDefault() {
register(SCHEMA, "s");
register(ID, "s");
register(CREATED, "l");
register(UPDATED, "l");
register(KIND, "s");
register(MARK, "s");
}
protected void saveSchema() {
String tag = "";
for (Object key : schema.keySet()) {
if (tag.length() > 0) tag += "|";
tag += schema.get(key) + "|" + key;
}
data.put(SCHEMA, tag);
}
protected void loadSchema() {
String src = data.getProperty(SCHEMA);
if (src == null) src = "";
String[] fields = src.split("\\|");
schema.clear();
for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
register(fields[i + 1], fields[i]);
}
registerDefault();
String tag = "";
for (Object key : schema.keySet()) {
if (tag.length() > 0) tag += "|";
tag += schema.get(key) + "|" + key;
}
data.put(SCHEMA, tag);
}
public void delete() {
delete(getId());
}
public void delete(String id) {
if (handler != null) {
handler.delete(id);
}
}
public SortField sortFieldDoc() {
return SortField.FIELD_DOC;
}
public SortField sortFieldScore() {
return SortField.FIELD_SCORE;
}
public int sortFieldLong() {
return SortField.LONG;
}
public int sortFieldInteger() {
return SortField.INT;
}
public int sortFieldDouble() {
return SortField.DOUBLE;
}
public int sortFieldFloat() {
return SortField.FLOAT;
}
public int sortFieldString() {
return SortField.STRING_VAL;
}
public double storageQuota() {
if (handler != null) {
return handler.storageQuota();
}
return 0;
}
public double storageSize() {
if (handler != null) {
return handler.storageSize();
}
return 0;
}
public static class Handler {
public boolean exists(String id) { return false; }
public void create(SEntity src) { }
public void update(SEntity src) { }
public void load(String id, SEntity src) { }
public void delete(String id) { }
public List<SEntity> search(String kind, Query query, int max) { return new ArrayList<SEntity>(); }
public List<SEntity> search(String kind, Query query, Sort sort, int max) { return new ArrayList<SEntity>(); }
public List<SEntity> search(String kind, Query query, Filter filter, int max) { return new ArrayList<SEntity>(); }
public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { return new ArrayList<SEntity>(); }
public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
public int count(String kind, Query query, int max) { return 0; }
public int count(String kind, Query query, Sort sort, int max) { return 0; }
public int count(String kind, Query query, Filter filter, int max) { return 0; }
public int count(String kind, Query query, Filter filter, Sort sort, int max) { return 0; }
public double storageQuota() { return 0; }
public double storageSize() { return 0; }
}
}
com.paesia.schema.script.LuceneHandler class
package com.paesia.schema.script;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.paesia.schema.script.safe.lucene.SEntity;
public class LuceneHandler extends SEntity.Handler {
public static final String KIND_QUOTA = "C4f91ee1eb414a";
public static final String QUOTA_SYSTEM = "F4f91ee659b1ec";
protected String dirIndex = "";
protected String dirBackup = "";
protected double systemQuota = 0;
public LuceneHandler(String dirIndex, String dirBackup, double systemQuota) {
this.dirIndex = dirIndex;
this.dirBackup = dirBackup;
this.systemQuota = systemQuota;
}
public boolean exists(String id) {
boolean tag = false;
if (id.length() == 0) return tag;
try {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
if (td.totalHits > 0) {
tag = true;
}
searcher.close();
reader.close();
} catch (Exception e) {
}
return tag;
}
public void create(SEntity src) {
Monitor monitor = new Monitor();
Timer timer = new Timer();
timer.schedule(new CreateTask(timer, src, monitor), 1);
while (!monitor.finished) {
try {
Thread.sleep(10);
} catch (Exception e) {
}
}
timer = null;
}
protected boolean quotaCreate(SEntity src) {
boolean tag = false;
SEntity quota = findSystemQuota();
if (quota == null) {
quota = newSystemQuota();
}
double newSize = quota.getDouble("size") + ((double)src.toString().length() / 1048576.0);
if (newSize < 0) newSize = 0;
if (newSize < systemQuota) {
tag = true;
quota.setDouble("size", newSize);
quota.save();
}
return tag;
}
protected boolean quotaUpdate(SEntity src) {
boolean tag = false;
SEntity quota = findSystemQuota();
if (quota == null) {
quota = newSystemQuota();
}
double newSize = quota.getDouble("size") - ((double)getFileSize(src.getId(), src.getKind()) / 1048576.0) + ((double)src.toString().length() / 1048576.0);
if (newSize < 0) newSize = 0;
if (newSize < systemQuota) {
tag = true;
quota.setDouble("size", newSize);
quota.save();
}
return tag;
}
protected boolean quotaDelete(String id, String kind) {
boolean tag = false;
SEntity quota = findSystemQuota();
if (quota == null) {
quota = newSystemQuota();
}
double newSize = quota.getDouble("size") - ((double)getFileSize(id, kind) / 1048576.0);
if (newSize < 0) newSize = 0;
if (newSize < systemQuota) {
tag = true;
quota.setDouble("size", newSize);
quota.save();
}
return tag;
}
protected long getFileSize(String id, String kind) {
long tag = 0;
String fid = "";
for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
if (fid.length() > 0) fid += File.separator;
fid += id.substring(i, i + 2);
}
File file = new File(dirBackup, kind);
file = new File(file.getAbsolutePath(), fid);
String folder = file.getAbsolutePath();
file = new File(folder, id + ".txt");
if (file.exists()) {
tag = file.length();
}
return tag;
}
protected SEntity newSystemQuota() {
SEntity tag = new SEntity(this);
tag.setSchema("s|kind|d|size");
tag.setKind(KIND_QUOTA);
tag.setId(UUID.randomUUID().toString().replaceAll("-", ""));
tag.setString("kind", QUOTA_SYSTEM);
return tag;
}
protected SEntity findSystemQuota() {
List<SEntity> results = search(KIND_QUOTA, new TermQuery(new Term("kind", QUOTA_SYSTEM)), 1);
if (results.size() == 0) return null;
return results.get(0);
}
protected void createEntity(SEntity src) {
if (src.getId().length() == 0) return;
if (src.getKind().length() == 0) return;
try {
if (!src.getKind().equals(KIND_QUOTA)) {
if (!quotaCreate(src)) return;
}
backup(src);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
Document doc = new Document();
write(src, doc);
writer.addDocument(doc);
writer.close();
} catch (Exception e) {
}
}
public void update(SEntity src) {
Monitor monitor = new Monitor();
Timer timer = new Timer();
timer.schedule(new UpdateTask(timer, src, monitor), 1);
while (!monitor.finished) {
try {
Thread.sleep(10);
} catch (Exception e) {
}
}
timer = null;
}
protected void updateEntity(SEntity src) {
if (src.getId().length() == 0) return;
if (src.getKind().length() == 0) return;
try {
if (!src.getKind().equals(KIND_QUOTA)) {
if (!quotaUpdate(src)) return;
}
backup(src);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
Document doc = new Document();
write(src, doc);
writer.updateDocument(new Term(SEntity.ID, src.getId()), doc);
writer.close();
} catch (Exception e) {
}
}
public void load(String id, SEntity src) {
try {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
if (td.totalHits > 0) {
Document doc = searcher.doc(td.scoreDocs[0].doc);
if (allowLoad(id, doc.get(SEntity.KIND))) {
src.setSchema(doc.get(SEntity.SCHEMA));
read(src, doc);
}
}
searcher.close();
reader.close();
} catch (Exception e) {
}
}
protected boolean allowLoad(String id, String kind) {
return true;
}
public int count(String kind, Query query, Filter filter, Sort sort, int max) {
int tag = 0;
try {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
if (query != null) {
boolQuery.add(new BooleanClause(query, Occur.MUST));
}
TopDocs td = null;
if (filter != null && sort != null) {
td = searcher.search(boolQuery, filter, max, sort);
} else if (filter != null) {
td = searcher.search(boolQuery, filter, max);
} else if (sort != null) {
td = searcher.search(boolQuery, max, sort);
} else {
td = searcher.search(boolQuery, max);
}
tag = td.totalHits;
searcher.close();
reader.close();
} catch (Exception e) {
}
return tag;
}
public int count(String kind, Query query, int max) {
return count(kind, query, null, null, max);
}
public int count(String kind, Query query, Sort sort, int max) {
return count(kind, query, null, sort, max);
}
public int count(String kind, Query query, Filter filter, int max) {
return count(kind, query, filter, null, max);
}
public List<SEntity> search(String kind, Query query, int max) {
return search(kind, query, null, null, max);
}
public List<SEntity> search(String kind, Query query, Sort sort, int max) {
return search(kind, query, null, sort, max);
}
public List<SEntity> search(String kind, Query query, Filter filter, int max) {
return search(kind, query, filter, null, max);
}
public List<SEntity> search(String kind, Query query, int pagesize, int pageno) {
return search(kind, query, null, null, pagesize, pageno);
}
public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) {
return search(kind, query, null, sort, pagesize, pageno);
}
public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
return search(kind, query, filter, null, pagesize, pageno);
}
public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
List<SEntity> tag = new ArrayList<SEntity>();
try {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
if (query != null) {
boolQuery.add(new BooleanClause(query, Occur.MUST));
}
TopDocs td = null;
if (filter != null && sort != null) {
td = searcher.search(boolQuery, filter, max, sort);
} else if (filter != null) {
td = searcher.search(boolQuery, filter, max);
} else if (sort != null) {
td = searcher.search(boolQuery, max, sort);
} else {
td = searcher.search(boolQuery, max);
}
for (int i = 0; i < td.totalHits; i++) {
SEntity item = new SEntity(this);
Document doc = searcher.doc(td.scoreDocs[i].doc);
item.setSchema(doc.get(SEntity.SCHEMA));
read(item, doc);
tag.add(item);
}
searcher.close();
reader.close();
} catch (Exception e) {
}
return tag;
}
public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) {
List<SEntity> tag = new ArrayList<SEntity>();
try {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
if (query != null) {
boolQuery.add(new BooleanClause(query, Occur.MUST));
}
if (pagesize <= 0) pagesize = 10;
if (pageno <= 0) pageno = 1;
int max = pageno * pagesize;
TopDocs td = null;
if (filter != null && sort != null) {
td = searcher.search(boolQuery, filter, max, sort);
} else if (filter != null) {
td = searcher.search(boolQuery, filter, max);
} else if (sort != null) {
td = searcher.search(boolQuery, max, sort);
} else {
td = searcher.search(boolQuery, max);
}
for (int i = (pageno - 1) * pagesize; i < td.totalHits && i < max; i++) {
SEntity item = new SEntity(this);
Document doc = searcher.doc(td.scoreDocs[i].doc);
item.setSchema(doc.get(SEntity.SCHEMA));
read(item, doc);
tag.add(item);
}
searcher.close();
reader.close();
} catch (Exception e) {
}
return tag;
}
protected void backup(SEntity src) {
String id = src.getId();
if (id.length() == 0) return;
String kind = src.getKind();
if (kind.length() == 0) return;
String fid = "";
for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
if (fid.length() > 0) fid += File.separator;
fid += id.substring(i, i + 2);
}
try {
File file = new File(dirBackup, kind);
file = new File(file.getAbsolutePath(), fid);
file.mkdirs();
String folder = file.getAbsolutePath();
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(folder, id + ".txt"))));
writer.write(src.toString());
writer.close();
} catch (Exception e) {
}
}
protected void read(SEntity entity, Document doc) {
String schema = doc.get(SEntity.SCHEMA);
if (schema == null) schema = "";
String[] fields = schema.split("\\|");
for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
String kind = fields[i];
String fname = fields[i + 1];
String val = doc.get(fname);
if (val == null) val = "";
if (SEntity.ALL_KINDS.indexOf("|" + kind + "|") < 0) continue;
entity.setString(fname, val);
}
}
protected void write(SEntity entity, Document doc) {
String schema = entity.getSchema();
if (schema == null) schema = "";
String[] fields = schema.split("\\|");
for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
String kind = fields[i];
String fname = fields[i + 1];
if (SEntity.STRING.equalsIgnoreCase(kind)) {
Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS);
doc.add(field);
} else if (SEntity.DOUBLE.equalsIgnoreCase(kind)) {
NumericField field = new NumericField(fname, Store.YES, true);
field.setDoubleValue(entity.getDouble(fname));
doc.add(field);
} else if (SEntity.FLOAT.equalsIgnoreCase(kind)) {
NumericField field = new NumericField(fname, Store.YES, true);
field.setFloatValue(entity.getFloat(fname));
doc.add(field);
} else if (SEntity.INTEGER.equalsIgnoreCase(kind)) {
NumericField field = new NumericField(fname, Store.YES, true);
field.setIntValue(entity.getInteger(fname));
doc.add(field);
} else if (SEntity.LONG.equalsIgnoreCase(kind)) {
NumericField field = new NumericField(fname, Store.YES, true);
field.setLongValue(entity.getLong(fname));
doc.add(field);
} else if (SEntity.ANALYZED.equalsIgnoreCase(kind)) {
Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED);
doc.add(field);
}
}
}
public void delete(String id) {
Monitor monitor = new Monitor();
Timer timer = new Timer();
timer.schedule(new DeleteTask(timer, id, monitor), 1);
while (!monitor.finished) {
try {
Thread.sleep(10);
} catch (Exception e) {
}
}
timer = null;
}
protected void deleteEntity(String id) {
if (id.length() == 0) return;
String kind = "";
try {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
if (td.totalHits > 0) {
Document doc = searcher.doc(td.scoreDocs[0].doc);
kind = doc.get(SEntity.KIND);
}
searcher.close();
reader.close();
} catch (Exception e) {
}
if (kind.length() == 0) return;
if (!allowDelete(id, kind)) return;
try {
if (!kind.equals(KIND_QUOTA)) {
if (!quotaDelete(id, kind)) return;
}
removeBackup(id, kind);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
writer.deleteDocuments(new Term(SEntity.ID, id));
writer.close();
} catch (Exception e) {
}
}
protected boolean allowDelete(String id, String kind) {
return true;
}
protected void removeBackup(String id, String kind) {
if (id.length() == 0) return;
if (kind.length() == 0) return;
String fid = "";
for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
if (fid.length() > 0) fid += File.separator;
fid += id.substring(i, i + 2);
}
try {
File file = new File(dirBackup, kind);
file = new File(file.getAbsolutePath(), fid);
String folder = file.getAbsolutePath();
file = new File(folder, id + ".txt");
file.delete();
} catch (Exception e) {
}
}
public double storageQuota() {
return systemQuota;
}
public double storageSize() {
SEntity tag = findSystemQuota();
if (tag == null) return 0;
return tag.getDouble("size");
}
private class DeleteTask extends TimerTask {
private String id;
private Timer timer;
private Monitor monitor;
public DeleteTask(Timer timer, String id, Monitor monitor) {
this.timer = timer;
this.id = id;
this.monitor = monitor;
}
@Override
public void run() {
deleteEntity(id);
monitor.finished = true;
timer.cancel();
timer.purge();
timer = null;
}
}
private class CreateTask extends TimerTask {
private SEntity entity;
private Timer timer;
private Monitor monitor;
public CreateTask(Timer timer, SEntity entity, Monitor monitor) {
this.timer = timer;
this.entity = entity;
this.monitor = monitor;
}
@Override
public void run() {
createEntity(entity);
monitor.finished = true;
timer.cancel();
timer.purge();
timer = null;
}
}
private class UpdateTask extends TimerTask {
private SEntity entity;
private Timer timer;
private Monitor monitor;
public UpdateTask(Timer timer, SEntity entity, Monitor monitor) {
this.timer = timer;
this.entity = entity;
this.monitor = monitor;
}
@Override
public void run() {
updateEntity(entity);
monitor.finished = true;
timer.cancel();
timer.purge();
timer = null;
}
}
private class Monitor {
public boolean finished = false;
}
}
Modify DataHandler class
public static class DataHandler extends Machine.Handler {
private String dirIndex;
private String dirBackup;
private double systemQuota;
public DataHandler(String dirIndex, String dirBackup, double systemQuota) {
this.dirIndex = dirIndex;
this.dirBackup = dirBackup;
this.systemQuota = systemQuota;
}
public SEntity.Handler getEntityHandler() {
return new LuceneHandler(dirIndex, dirBackup, systemQuota);
}
..............
}
javascript
function main(env, args) {
var no = 1;
if (no == 1) {
test01(env, args); // Grab products
}
if (no == 2) {
test02(env, args); // List all products
}
if (no == 3) {
test03(env, args); // Search products
}
if (no == 4) {
test04(env, args); // Delete products
}
}
function test04(env, args) {
var entity = env.newEntity();
var query = entity.newMatchAllDocsQuery();
var products = entity.search('Link', query, 3, 1);
for (var i = 0; i < products.size(); i++) {
products.get(i).delete();
}
}
function test03(env, args) {
var term = 'Sleeping';
var entity = env.newEntity();
var query = entity.parseQuery([term, term], ['desc', 'title'], [entity.occurShould(), entity.occurShould()]);
var size = entity.count('Link', query, 999999);
var products = entity.search('Link', query, entity.newSort(org.apache.lucene.search.SortField.FIELD_SCORE), 999999);
for (var i = 0; i < products.size(); i++) {
var title = env.newString(products.get(i).getString('title').getBytes('UTF-8'), 'UTF-8');
try {
title = entity.highlight(query, title, 'title', 50, 3, ' (...) ');
} catch (e) {
env.error(e);
}
var desc = env.newString(products.get(i).getString('desc').getBytes('UTF-8'), 'UTF-8');
try {
desc = entity.highlight(query, desc, 'desc', 50, 3, ' (...) ');
} catch (e) {
env.error(e);
}
printProduct(products.get(i), env, desc, title);
}
}
function test02(env, args) {
var entity = env.newEntity();
var query = entity.newMatchAllDocsQuery();
var size = entity.count('Link', query, 999999);
var products = entity.search('Link', query, 999999);
env.info('Size: ' + size);
for (var i = 0; i < products.size(); i++) {
printProduct(products.get(i), env);
}
}
function test01(env, args) {
var astore = 'paesia';
var node = '100';
var maxpage = 2;
var products = grabProduct(astore, node, maxpage, env);
for (var i = 0; i < products.size(); i++) {
var pro = products.get(i);
saveProduct(pro, env);
}
env.info('Saved: ' + products.size());
}
function printProduct(pro, env, descH, titleH) {
var line = '';
line += '\nId: ' + pro.getId();
line += '\nTitle: ' + pro.getString('title');
line += '\nUrl: ' + pro.getString('url');
line += '\nDescription: \n' + pro.getString('desc');
if (titleH != null) {
line += '\nTitle Highlight: \n' + titleH;
}
if (descH != null) {
line += '\nDescription Highlight: \n' + descH;
}
env.info('\n' + line + '\n');
}
function saveProduct(pro, env) {
var title = pro.get('title');
var url = pro.get('url');
if (title == null || title.length == 0 || url == null || url.length == 0) return;
if (findProductByUrl(url, env)) return;
var desc = pro.get('description');
if (desc == null) desc = '';
if (desc.length() > 0) {
var doc = env.newJsoup().parse(desc);
desc = doc.select('body').first().text();
}
var schema = 's|url|a|title|a|desc';
var entity = env.newEntity();
entity.setSchema(schema);
entity.setKind('Link');
entity.setId(env.uniqid());
entity.setString('url', url);
entity.setString('title', title);
entity.setString('desc', desc);
entity.save();
}
function findProductByUrl(url, env) {
var entity = env.newEntity();
var query = entity.newTermQuery(entity.newTerm('url', url));
var size = entity.count('Link', query, 1);
return (size > 0);
}
function grabProduct(astore, node, maxpage, env) {
var tag = env.newArrayList();
for (var no = 1; no <= maxpage; no++) {
try {
var alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + node + '&page=' + no);
var doc = env.newJsoup().parse(alink, 60000);
var elements = doc.select('#featuredProducts .textrow a');
var map = env.newHashMap();
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var title = element.text();
var url = element.attr('href');
var pos = url.lastIndexOf('/detail/');
if (pos < 0) continue;
var code = url.substring(pos + 8);
var url = env.newURL(alink, url) + '';
var item = env.newHashMap();
item.put('code', code);
item.put('title', title);
item.put('url', url);
map.put(code, item);
}
elements = doc.select('#featuredProducts .imagerow a');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var url = element.attr('href');
var pos = url.lastIndexOf('/detail/');
if (pos < 0) continue;
var code = url.substring(pos + 8);
var item = map.get(code);
if (item == null) continue;
var child = element.select('img').first();
if (child == null) continue;
var title = child.attr('alt');
var smimg = child.attr('src');
if (title.length() > 0) {
item.put('title', title);
}
item.put('small-image', smimg);
}
var keys = env.getKeys(map);
for (var i = 0; i < keys.size(); i++) {
try {
var item = map.get(keys.get(i));
alink = env.newURL(item.get('url'));
doc = env.newJsoup().parse(alink, 60000);
var element = doc.select('#detailImage img').first();
if (element != null) {
item.put('large-image', element.attr('src'));
}
element = doc.select('#productDescription').first();
if (element != null) {
var desc = element.html();
var pattern = '<h2>Product Description</h2>';
var pos = desc.indexOf(pattern);
if (pos >= 0) {
desc = desc.substring(pos + pattern.length);
}
var bdoc = env.newJsoup().parse(desc, item.get('url'));
buildURL(bdoc, item.get('url'), env);
desc = bdoc.select('body').first().html();
if (desc.indexOf('<html') < 0) {
item.put('description', desc);
}
}
element = doc.select('#productDetails').first();
if (element != null) {
var desc = element.html();
var pattern = '<h2>Product Details</h2>';
var pos = desc.indexOf(pattern);
if (pos >= 0) {
desc = desc.substring(pos + pattern.length);
}
var bdoc = env.newJsoup().parse(desc, item.get('url'));
buildURL(bdoc, item.get('url'), env);
desc = bdoc.select('body').first().html();
if (desc.indexOf('<html') < 0) {
item.put('details', desc);
}
}
element = doc.select('#editorialReviews').first();
if (element != null) {
var desc = element.html();
var bdoc = env.newJsoup().parse(desc, item.get('url') + '');
buildURL(bdoc, item.get('url'), env);
desc = bdoc.select('body').first().html();
if (desc.indexOf('<html') < 0) {
item.put('editorial-reviews', desc);
}
}
element = doc.select('#detailListPrice').first();
if (element != null) {
item.put('list-price', element.text());
}
element = doc.select('#detailOfferPrice').first();
if (element != null) {
item.put('offer-price', element.text());
}
element = doc.select('#addToCartForm a').first();
if (element != null) {
item.put('buy-url', element.attr('href'));
}
} catch (e) {
env.error(e);
}
}
for (var i = 0; i < keys.size(); i++) {
tag.add(map.get(keys.get(i)));
}
} catch (e) {
env.error(e);
}
}
return tag;
}
function buildURL(doc, baseUrl, env) {
baseUrl = env.newURL(baseUrl);
var elements = doc.select('a');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var url = env.newURL(baseUrl, element.attr('href'));
element.attr('href', url + '');
}
elements = doc.select('img');
for (var i = 0; i < elements.size(); i++) {
var element = elements.get(i);
var url = env.newURL(baseUrl, element.attr('src'));
element.attr('src', url + '');
}
}