Wednesday, 18 April 2012

Add Lucene support to javascript sandbox

Add Lucene support to javascript sandbox
This task add Lucene support to javascript sandbox.
Add Lucene support to javascript sandbox
  1. Create javascript sandbox with jsoup support
  2. Create com.paesia.schema.script.safe.lucene.SEntity class as following
  3. Create com.paesia.schema.script.LuceneHandler class as following
  4. Modify com.paesia.schema.script.Machine class as following
  5. Modify DataHandler class as following
  6. Create javascript as following
  7. Call Machine.run() method as following
Call Machine.run() method
1String dirIndex = "";
2String dirBackup = "";
3double systemQuota = 1024 * 1024;
4String js = loadJS();
5Map args = new HashMap();
6List links = new ArrayList();
7args.put("links", links);
8
9Machine env = new Machine(new DataHandler(dirIndex, dirBackup, systemQuota));
10Machine.run(env, js, args);
11
12for (int i = 0; i < links.size(); i++) {
13 Map item = (Map)links.get(i);
14 String line = "";
15 for (Object key : item.keySet()) {
16 line += "\r\n" + key + " : " + item.get(key);
17 }
18 logger.info("\r\n" + (i + 1) + " --------------------------------\r\n" + line + "\r\n");
19}
String dirIndex = "";
String dirBackup = "";
double systemQuota = 1024 * 1024;
String js = loadJS();
Map args = new HashMap();
List links = new ArrayList();
args.put("links", links);

Machine env = new Machine(new DataHandler(dirIndex, dirBackup, systemQuota));
Machine.run(env, js, args);
         
for (int i = 0; i < links.size(); i++) {
    Map item = (Map)links.get(i);
    String line = "";
    for (Object key : item.keySet()) {
        line += "\r\n" + key + " : " + item.get(key);
    }
    logger.info("\r\n" + (i + 1) + " --------------------------------\r\n" + line + "\r\n");
}   
Modify com.paesia.schema.script.Machine class
1............
2
3import com.paesia.schema.script.safe.lucene.SEntity;
4
5public class Machine {
6
7 private Handler handler;
8
9 public static void run(Machine env, String js, Map args) throws Exception {
10 try {
11 Context cx = Context.enter();
12 cx.setClassShutter(new ClassShutter() {
13 public boolean visibleToScripts(String className) {
14...........
15 if ("org.apache.lucene.search.Query".equals(className)) return true;
16 if ("org.apache.lucene.search.Filter".equals(className)) return true;
17 if ("org.apache.lucene.search.Sort".equals(className)) return true;
18 if ("org.apache.lucene.search.BooleanQuery".equals(className)) return true;
19 if ("org.apache.lucene.search.BooleanClause".equals(className)) return true;
20 if (className.startsWith("org.apache.lucene.search.BooleanClause$")) return true;
21 if ("org.apache.lucene.search.PhraseQuery".equals(className)) return true;
22 if ("org.apache.lucene.index.Term".equals(className)) return true;
23 if ("org.apache.lucene.search.MultiPhraseQuery".equals(className)) return true;
24 if ("org.apache.lucene.search.NGramPhraseQuery".equals(className)) return true;
25 if ("org.apache.lucene.search.NumericRangeQuery".equals(className)) return true;
26 if ("org.apache.lucene.search.PrefixQuery".equals(className)) return true;
27 if ("org.apache.lucene.search.TermQuery".equals(className)) return true;
28 if ("org.apache.lucene.search.TermRangeQuery".equals(className)) return true;
29 if ("org.apache.lucene.search.WildcardQuery".equals(className)) return true;
30 if ("org.apache.lucene.search.MatchAllDocsQuery".equals(className)) return true;
31 if ("org.apache.lucene.search.FieldValueFilter".equals(className)) return true;
32 if ("org.apache.lucene.search.NumericRangeFilter".equals(className)) return true;
33 if ("org.apache.lucene.search.PrefixFilter".equals(className)) return true;
34 if ("org.apache.lucene.search.QueryWrapperFilter".equals(className)) return true;
35 if ("org.apache.lucene.search.TermRangeFilter".equals(className)) return true;
36 if ("org.apache.lucene.search.SortField".equals(className)) return true;
37...........
38 return false;
39 }
40 });
41
42...........
43
44 } catch (Exception e) {
45 throw e;
46 } finally {
47 Context.exit();
48 }
49 }
50...........
51 public SEntity newEntity() {
52 SEntity.Handler seh = null;
53 if (handler != null) {
54 seh = handler.getEntityHandler();
55 }
56 return new SEntity(seh);
57 }
58
59 public static class Handler {
60...........
61 public SEntity.Handler getEntityHandler() { return null; }
62
63 }
64...........
65}
............

import com.paesia.schema.script.safe.lucene.SEntity;

public class Machine {

    private Handler handler;
 
    public static void run(Machine env, String js, Map args) throws Exception {
        try {
            Context cx = Context.enter();
            cx.setClassShutter(new ClassShutter() {
                public boolean visibleToScripts(String className) {  
...........
                    if ("org.apache.lucene.search.Query".equals(className)) return true;
                    if ("org.apache.lucene.search.Filter".equals(className)) return true;
                    if ("org.apache.lucene.search.Sort".equals(className)) return true;
                    if ("org.apache.lucene.search.BooleanQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.BooleanClause".equals(className)) return true;
                    if (className.startsWith("org.apache.lucene.search.BooleanClause$")) return true;
                    if ("org.apache.lucene.search.PhraseQuery".equals(className)) return true;
                    if ("org.apache.lucene.index.Term".equals(className)) return true;
                    if ("org.apache.lucene.search.MultiPhraseQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.NGramPhraseQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.NumericRangeQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.PrefixQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.TermQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.TermRangeQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.WildcardQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.MatchAllDocsQuery".equals(className)) return true;
                    if ("org.apache.lucene.search.FieldValueFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.NumericRangeFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.PrefixFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.QueryWrapperFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.TermRangeFilter".equals(className)) return true;
                    if ("org.apache.lucene.search.SortField".equals(className)) return true;
...........
                    return false;
                }
            });   

...........

        } catch (Exception e) {
            throw e;
        } finally {
            Context.exit();   
        }
    }
...........
    public SEntity newEntity() {
        SEntity.Handler seh = null;
        if (handler != null) {
            seh = handler.getEntityHandler();
        }
        return new SEntity(seh);
    }

    public static class Handler {
...........
        public SEntity.Handler getEntityHandler() { return null; }
  
    }
...........
}
com.paesia.schema.script.safe.lucene.SEntity class
1package com.paesia.schema.script.safe.lucene;
2
3import java.io.ByteArrayInputStream;
4import java.io.ByteArrayOutputStream;
5import java.io.StringReader;
6import java.util.ArrayList;
7import java.util.Date;
8import java.util.List;
9import java.util.Properties;
10
11import org.apache.lucene.analysis.Analyzer;
12import org.apache.lucene.analysis.CachingTokenFilter;
13import org.apache.lucene.analysis.standard.StandardAnalyzer;
14import org.apache.lucene.index.Term;
15import org.apache.lucene.queryParser.MultiFieldQueryParser;
16import org.apache.lucene.search.BooleanClause;
17import org.apache.lucene.search.BooleanClause.Occur;
18import org.apache.lucene.search.BooleanQuery;
19import org.apache.lucene.search.FieldValueFilter;
20import org.apache.lucene.search.Filter;
21import org.apache.lucene.search.MatchAllDocsQuery;
22import org.apache.lucene.search.MultiPhraseQuery;
23import org.apache.lucene.search.NGramPhraseQuery;
24import org.apache.lucene.search.NumericRangeFilter;
25import org.apache.lucene.search.NumericRangeQuery;
26import org.apache.lucene.search.PhraseQuery;
27import org.apache.lucene.search.PrefixFilter;
28import org.apache.lucene.search.PrefixQuery;
29import org.apache.lucene.search.Query;
30import org.apache.lucene.search.QueryWrapperFilter;
31import org.apache.lucene.search.Sort;
32import org.apache.lucene.search.SortField;
33import org.apache.lucene.search.TermQuery;
34import org.apache.lucene.search.TermRangeFilter;
35import org.apache.lucene.search.TermRangeQuery;
36import org.apache.lucene.search.WildcardQuery;
37import org.apache.lucene.search.highlight.Highlighter;
38import org.apache.lucene.search.highlight.Scorer;
39import org.apache.lucene.search.highlight.SimpleFragmenter;
40import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
41import org.apache.lucene.util.Version;
42
43public class SEntity {
44
45 public static final String STRING = "s";
46 public static final String DOUBLE = "d";
47 public static final String FLOAT = "f";
48 public static final String INTEGER = "i";
49 public static final String LONG = "l";
50 public static final String ANALYZED = "a";
51
52 public static final String ALL_KINDS = "|s|d|f|i|l|a|";
53
54 public static final String SCHEMA = "F4f8cc93237f50";
55 public static final String ID = "F4f8cce61643dd";
56 public static final String CREATED = "F4f8cd83fcca31";
57 public static final String UPDATED = "F4f8cd84e2b74a";
58 public static final String KIND = "F4f8cd9c8ee13d";
59 public static final String MARK = "F4f8cda27d62fb";
60
61 protected Properties data = new Properties();
62 protected Properties schema = new Properties();
63 protected Handler handler = null;
64
65 public SEntity(Handler handler) {
66 this.handler = handler;
67 registerDefault();
68 }
69
70 public void register(String field, String type) {
71 if (ALL_KINDS.indexOf("|" + type + "|") < 0) return;
72 schema.put(field, type);
73 saveSchema();
74 }
75
76 public void setSchema(String src) {
77 String[] fields = src.split("\\|");
78 schema.clear();
79 for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
80 register(fields[i + 1], fields[i]);
81 }
82 registerDefault();
83 saveSchema();
84 }
85
86 public String getSchema() {
87 String tag = data.getProperty(SCHEMA);
88 if (tag == null) tag = "";
89 return tag;
90 }
91
92 public void fromString(String src) {
93 data.clear();
94 schema.clear();
95 try {
96 ByteArrayInputStream bais = new ByteArrayInputStream(src.getBytes("UTF-8"));
97 data.load(bais);
98 bais.close();
99 } catch (Exception e) {
100 }
101 loadSchema();
102 }
103
104 public String toString() {
105 String tag = "";
106 try {
107 ByteArrayOutputStream baos = new ByteArrayOutputStream();
108 data.store(baos, "");
109 tag = baos.toString();
110 baos.close();
111 } catch (Exception e) {
112 }
113 return tag;
114 }
115
116 public String getString(String field) {
117 String tag = data.getProperty(field);
118 if (tag == null) tag = "";
119 return tag;
120 }
121
122 public void setString(String field, String value) {
123 if (schema.containsKey(field)) {
124 if (value == null) value = "";
125 data.setProperty(field, value);
126 }
127 }
128
129 public double getDouble(String field) {
130 double tag = 0;
131 try {
132 tag = Double.parseDouble(getString(field));
133 } catch (Exception e) {
134 tag = 0;
135 }
136 return tag;
137 }
138
139 public void setDouble(String field, double value) {
140 setString(field, Double.toString(value));
141 }
142
143 public float getFloat(String field) {
144 float tag = 0;
145 try {
146 tag = Float.parseFloat(getString(field));
147 } catch (Exception e) {
148 tag = 0;
149 }
150 return tag;
151 }
152
153 public void setFloat(String field, float value) {
154 setString(field, Float.toString(value));
155 }
156
157 public long getLong(String field) {
158 long tag = 0;
159 try {
160 tag = Long.parseLong(getString(field));
161 } catch (Exception e) {
162 tag = 0;
163 }
164 return tag;
165 }
166
167 public void setLong(String field, long value) {
168 setString(field, Long.toString(value));
169 }
170
171 public int getInteger(String field) {
172 int tag = 0;
173 try {
174 tag = Integer.parseInt(getString(field));
175 } catch (Exception e) {
176 tag = 0;
177 }
178 return tag;
179 }
180
181 public void setInteger(String field, int value) {
182 setString(field, Integer.toString(value));
183 }
184
185 public String getId() {
186 return getString(ID);
187 }
188
189 public void setId(String src) {
190 setString(ID, src);
191 }
192
193 public String getKind() {
194 return getString(KIND);
195 }
196
197 public void setKind(String src) {
198 setString(KIND, src);
199 }
200
201 public String getMark() {
202 return getString(MARK);
203 }
204
205 public void setMark(String src) {
206 setString(MARK, src);
207 }
208
209 public Date getCreated() {
210 return new Date(getLong(CREATED));
211 }
212
213 public Date getUpdated() {
214 return new Date(getLong(UPDATED));
215 }
216
217 public boolean exists() {
218 if (handler == null) {
219 return false;
220 } else {
221 return handler.exists(getId());
222 }
223 }
224
225 public void save() {
226 if (handler != null) {
227 long now = new Date().getTime();
228 if (handler.exists(getId())) {
229 setLong(UPDATED, now);
230 handler.update(this);
231 } else {
232 setLong(CREATED, now);
233 setLong(UPDATED, now);
234 handler.create(this);
235 }
236 }
237 }
238
239 public int count(String kind, Query query, int max) {
240 if (handler != null) {
241 return handler.count(kind, query, max);
242 }
243 return 0;
244 }
245
246 public int count(String kind, Query query, Sort sort, int max) {
247 if (handler != null) {
248 return handler.count(kind, query, sort, max);
249 }
250 return 0;
251 }
252
253 public int count(String kind, Query query, Filter filter, int max) {
254 if (handler != null) {
255 return handler.count(kind, query, filter, max);
256 }
257 return 0;
258 }
259
260 public int count(String kind, Query query, Filter filter, Sort sort, int max) {
261 if (handler != null) {
262 return handler.count(kind, query, filter, sort, max);
263 }
264 return 0;
265 }
266
267 public List<SEntity> search(String kind, Query query, int max) {
268 if (handler != null) {
269 return handler.search(kind, query, max);
270 }
271 return new ArrayList<SEntity>();
272 }
273
274 public List<SEntity> search(String kind, Query query, Sort sort, int max) {
275 if (handler != null) {
276 return handler.search(kind, query, sort, max);
277 }
278 return new ArrayList<SEntity>();
279 }
280
281 public List<SEntity> search(String kind, Query query, Filter filter, int max) {
282 if (handler != null) {
283 return handler.search(kind, query, filter, max);
284 }
285 return new ArrayList<SEntity>();
286 }
287
288 public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
289 if (handler != null) {
290 return handler.search(kind, query, filter, sort, max);
291 }
292 return new ArrayList<SEntity>();
293 }
294
295 public List<SEntity> search(String kind, Query query, int pagesize, int pageno) {
296 if (handler != null) {
297 return handler.search(kind, query, pagesize, pageno);
298 }
299 return new ArrayList<SEntity>();
300 }
301
302 public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) {
303 if (handler != null) {
304 return handler.search(kind, query, sort, pagesize, pageno);
305 }
306 return new ArrayList<SEntity>();
307 }
308
309 public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
310 if (handler != null) {
311 return handler.search(kind, query, filter, pagesize, pageno);
312 }
313 return new ArrayList<SEntity>();
314 }
315
316 public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max, int pagesize, int pageno) {
317 if (handler != null) {
318 return handler.search(kind, query, filter, sort, pagesize, pageno);
319 }
320 return new ArrayList<SEntity>();
321 }
322
323 public void load(String id) {
324 if (handler != null) {
325 handler.load(id, this);
326 }
327 }
328
329 public BooleanQuery newBooleanQuery() {
330 return new BooleanQuery();
331 }
332
333 public BooleanClause newBooleanClause(Query query, Occur occur) {
334 return new BooleanClause(query, occur);
335 }
336
337 public Occur occurMust() {
338 return Occur.MUST;
339 }
340
341 public Occur occurMustNot() {
342 return Occur.MUST_NOT;
343 }
344
345 public Occur occurShould() {
346 return Occur.SHOULD;
347 }
348
349 public MatchAllDocsQuery newMatchAllDocsQuery() {
350 return new MatchAllDocsQuery();
351 }
352
353 public MultiPhraseQuery newMultiPhraseQuery() {
354 return new MultiPhraseQuery();
355 }
356
357 public PhraseQuery newPhraseQuery() {
358 return new PhraseQuery();
359 }
360
361 public NGramPhraseQuery newNGramPhraseQuery(int n) {
362 return new NGramPhraseQuery(n);
363 }
364
365 public Term newTerm(String field, String value) {
366 return new Term(field, value);
367 }
368
369 public NumericRangeQuery<Double> newDoubleRangeQuery(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
370 return NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive);
371 }
372
373 public NumericRangeQuery<Double> newDoubleRangeQuery(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
374 return NumericRangeQuery.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
375 }
376
377 public NumericRangeQuery<Float> newFloatRangeQuery(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
378 return NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive);
379 }
380
381 public NumericRangeQuery<Float> newFloatRangeQuery(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
382 return NumericRangeQuery.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
383 }
384
385 public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
386 return NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive);
387 }
388
389 public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
390 return NumericRangeQuery.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
391 }
392
393 public NumericRangeQuery<Long> newLongRangeQuery(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
394 return NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive);
395 }
396
397 public NumericRangeQuery<Long> newLongRangeQuery(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
398 return NumericRangeQuery.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
399 }
400
401 public PrefixQuery newPrefixQuery(Term term) {
402 return new PrefixQuery(term);
403 }
404
405 public TermQuery newTermQuery(Term term) {
406 return new TermQuery(term);
407 }
408
409 public TermRangeQuery newTermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
410 return new TermRangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper);
411 }
412
413 public WildcardQuery newWildcardQuery(Term term) {
414 return new WildcardQuery(term);
415 }
416
417 public FieldValueFilter newFieldValueFilter(String field, boolean negate) {
418 return new FieldValueFilter(field, negate);
419 }
420
421 public NumericRangeFilter<Double> newDoubleRangeFilter(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
422 return NumericRangeFilter.newDoubleRange(field, min, max, minInclusive, maxInclusive);
423 }
424
425 public NumericRangeFilter<Double> newDoubleRangeFilter(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
426 return NumericRangeFilter.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
427 }
428
429 public NumericRangeFilter<Float> newFloatRangeFilter(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
430 return NumericRangeFilter.newFloatRange(field, min, max, minInclusive, maxInclusive);
431 }
432
433 public NumericRangeFilter<Float> newFloatRangeFilter(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
434 return NumericRangeFilter.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
435 }
436
437 public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
438 return NumericRangeFilter.newIntRange(field, min, max, minInclusive, maxInclusive);
439 }
440
441 public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
442 return NumericRangeFilter.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
443 }
444
445 public NumericRangeFilter<Long> newLongRangeFilter(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
446 return NumericRangeFilter.newLongRange(field, min, max, minInclusive, maxInclusive);
447 }
448
449 public NumericRangeFilter<Long> newLongRangeFilter(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
450 return NumericRangeFilter.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
451 }
452
453 public PrefixFilter newPrefixFilter(Term term) {
454 return new PrefixFilter(term);
455 }
456
457 public QueryWrapperFilter newQueryWrapperFilter(Query query) {
458 return new QueryWrapperFilter(query);
459 }
460
461 public TermRangeFilter newTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
462 return new TermRangeFilter(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
463 }
464
465 public SortField newSortField(String field, int type, boolean reverse) {
466 return new SortField(field, type, reverse);
467 }
468
469 public Sort newSort() {
470 return new Sort();
471 }
472
473 public Sort newSort(SortField... fields) {
474 return new Sort(fields);
475 }
476
477 public Sort newSort(SortField field) {
478 return new Sort(field);
479 }
480
481 public Query parseQuery(String[] queries, String[] fields) throws Exception {
482 return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, new StandardAnalyzer(Version.LUCENE_36));
483 }
484
485 public Query parseQuery(String[] queries, String[] fields, BooleanClause.Occur[] flags) throws Exception {
486 return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
487 }
488
489 public Query parseQuery(String query, String[] fields, BooleanClause.Occur[] flags) throws Exception {
490 return MultiFieldQueryParser.parse(Version.LUCENE_36, query, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
491 }
492
493 public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception {
494 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
495 CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(field, new StringReader(text)));
496 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
497 Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
498 Highlighter highlighter = new Highlighter(formatter, scorer);
499 highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
500 tokenStream.reset();
501 String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
502 return rv.length() == 0 ? text : rv;
503 }
504
505 protected void registerDefault() {
506 register(SCHEMA, "s");
507 register(ID, "s");
508 register(CREATED, "l");
509 register(UPDATED, "l");
510 register(KIND, "s");
511 register(MARK, "s");
512 }
513
514 protected void saveSchema() {
515 String tag = "";
516 for (Object key : schema.keySet()) {
517 if (tag.length() > 0) tag += "|";
518 tag += schema.get(key) + "|" + key;
519 }
520 data.put(SCHEMA, tag);
521 }
522
523 protected void loadSchema() {
524 String src = data.getProperty(SCHEMA);
525 if (src == null) src = "";
526 String[] fields = src.split("\\|");
527 schema.clear();
528 for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
529 register(fields[i + 1], fields[i]);
530 }
531 registerDefault();
532
533 String tag = "";
534 for (Object key : schema.keySet()) {
535 if (tag.length() > 0) tag += "|";
536 tag += schema.get(key) + "|" + key;
537 }
538 data.put(SCHEMA, tag);
539 }
540
541 public void delete() {
542 delete(getId());
543 }
544
545 public void delete(String id) {
546 if (handler != null) {
547 handler.delete(id);
548 }
549 }
550
551 public SortField sortFieldDoc() {
552 return SortField.FIELD_DOC;
553 }
554
555 public SortField sortFieldScore() {
556 return SortField.FIELD_SCORE;
557 }
558
559 public int sortFieldLong() {
560 return SortField.LONG;
561 }
562
563 public int sortFieldInteger() {
564 return SortField.INT;
565 }
566
567 public int sortFieldDouble() {
568 return SortField.DOUBLE;
569 }
570
571 public int sortFieldFloat() {
572 return SortField.FLOAT;
573 }
574
575 public int sortFieldString() {
576 return SortField.STRING_VAL;
577 }
578
579 public double storageQuota() {
580 if (handler != null) {
581 return handler.storageQuota();
582 }
583 return 0;
584 }
585
586 public double storageSize() {
587 if (handler != null) {
588 return handler.storageSize();
589 }
590 return 0;
591 }
592
593 public static class Handler {
594
595 public boolean exists(String id) { return false; }
596 public void create(SEntity src) { }
597 public void update(SEntity src) { }
598 public void load(String id, SEntity src) { }
599 public void delete(String id) { }
600 public List<SEntity> search(String kind, Query query, int max) { return new ArrayList<SEntity>(); }
601 public List<SEntity> search(String kind, Query query, Sort sort, int max) { return new ArrayList<SEntity>(); }
602 public List<SEntity> search(String kind, Query query, Filter filter, int max) { return new ArrayList<SEntity>(); }
603 public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { return new ArrayList<SEntity>(); }
604 public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
605 public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
606 public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
607 public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
608 public int count(String kind, Query query, int max) { return 0; }
609 public int count(String kind, Query query, Sort sort, int max) { return 0; }
610 public int count(String kind, Query query, Filter filter, int max) { return 0; }
611 public int count(String kind, Query query, Filter filter, Sort sort, int max) { return 0; }
612 public double storageQuota() { return 0; }
613 public double storageSize() { return 0; }
614
615 }
616
617}
package com.paesia.schema.script.safe.lucene;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldValueFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.NGramPhraseQuery;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixFilter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;

public class SEntity {

    public static final String STRING = "s";
    public static final String DOUBLE = "d";
    public static final String FLOAT = "f";
    public static final String INTEGER = "i";
    public static final String LONG = "l";
    public static final String ANALYZED = "a";
 
    public static final String ALL_KINDS = "|s|d|f|i|l|a|";
 
    public static final String SCHEMA = "F4f8cc93237f50";
    public static final String ID = "F4f8cce61643dd";
    public static final String CREATED = "F4f8cd83fcca31";
    public static final String UPDATED = "F4f8cd84e2b74a";
    public static final String KIND = "F4f8cd9c8ee13d";
    public static final String MARK = "F4f8cda27d62fb";

    protected Properties data = new Properties();
    protected Properties schema = new Properties();
    protected Handler handler = null;
 
    public SEntity(Handler handler) {
        this.handler = handler;
        registerDefault();
    }
 
    public void register(String field, String type) {
        if (ALL_KINDS.indexOf("|" + type + "|") < 0) return;
        schema.put(field, type);
        saveSchema();
    }
 
    public void setSchema(String src) {
        String[] fields = src.split("\\|");
        schema.clear();
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            register(fields[i + 1], fields[i]);
        }
        registerDefault();
        saveSchema();
    }
 
    public String getSchema() {
        String tag = data.getProperty(SCHEMA);
        if (tag == null) tag = "";
        return tag;
    }
 
    public void fromString(String src) {
        data.clear();
        schema.clear();
        try {
            ByteArrayInputStream bais = new ByteArrayInputStream(src.getBytes("UTF-8"));
            data.load(bais);
            bais.close();
        } catch (Exception e) {
        }
        loadSchema();
    }
 
    public String toString() {
        String tag = "";
        try {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            data.store(baos, "");
            tag = baos.toString();
            baos.close();
        } catch (Exception e) {
        }
        return tag;
    }
 
    public String getString(String field) {
        String tag = data.getProperty(field);
        if (tag == null) tag = "";
        return tag;
    }
 
    public void setString(String field, String value) {
        if (schema.containsKey(field)) {
            if (value == null) value = "";
            data.setProperty(field, value);
        }
    }
 
    public double getDouble(String field) {
        double tag = 0;
        try {
            tag = Double.parseDouble(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setDouble(String field, double value) {
        setString(field, Double.toString(value));
    }

    public float getFloat(String field) {
        float tag = 0;
        try {
            tag = Float.parseFloat(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setFloat(String field, float value) {
        setString(field, Float.toString(value));
    }

    public long getLong(String field) {
        long tag = 0;
        try {
            tag = Long.parseLong(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setLong(String field, long value) {
        setString(field, Long.toString(value));
    }

    public int getInteger(String field) {
        int tag = 0;
        try {
            tag = Integer.parseInt(getString(field));
        } catch (Exception e) {
            tag = 0;
        }
        return tag;
    }
 
    public void setInteger(String field, int value) {
        setString(field, Integer.toString(value));
    }
 
    public String getId() {
        return getString(ID);
    }
 
    public void setId(String src) {
        setString(ID, src);
    }

    public String getKind() {
        return getString(KIND);
    }
 
    public void setKind(String src) {
        setString(KIND, src);
    }
 
    public String getMark() {
        return getString(MARK);
    }
 
    public void setMark(String src) {
        setString(MARK, src);
    }
 
    public Date getCreated() {
        return new Date(getLong(CREATED));
    }
 
    public Date getUpdated() {
        return new Date(getLong(UPDATED));
    }
 
    public boolean exists() {
        if (handler == null) {
            return false;
        } else {
            return handler.exists(getId());
        }
    }
 
    public void save() {
        if (handler != null) {
            long now = new Date().getTime();
            if (handler.exists(getId())) {
                setLong(UPDATED, now);
                handler.update(this);
            } else {
                setLong(CREATED, now);
                setLong(UPDATED, now);
                handler.create(this);
            }
        }
    }

    public int count(String kind, Query query, int max) {
        if (handler != null) {
            return handler.count(kind, query, max);
        }
        return 0; 
    }
 
    public int count(String kind, Query query, Sort sort, int max) {
        if (handler != null) {
            return handler.count(kind, query, sort, max);
        }
        return 0; 
    }
 
    public int count(String kind, Query query, Filter filter, int max) {
        if (handler != null) {
            return handler.count(kind, query, filter, max);
        }
        return 0; 
    }
 
    public int count(String kind, Query query, Filter filter, Sort sort, int max) {
        if (handler != null) {
            return handler.count(kind, query, filter, sort, max);
        }
        return 0; 
    }
 
    public List<SEntity> search(String kind, Query query, int max) {
        if (handler != null) {
            return handler.search(kind, query, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Sort sort, int max) {
        if (handler != null) {
            return handler.search(kind, query, sort, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int max) {
        if (handler != null) {
            return handler.search(kind, query, filter, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
        if (handler != null) {
            return handler.search(kind, query, filter, sort, max);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, sort, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, filter, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max, int pagesize, int pageno) {
        if (handler != null) {
            return handler.search(kind, query, filter, sort, pagesize, pageno);
        }
        return new ArrayList<SEntity>(); 
    }
 
    public void load(String id) {
        if (handler != null) {
            handler.load(id, this);
        }
    }
 
    public BooleanQuery newBooleanQuery() {
        return new BooleanQuery();
    }
 
    public BooleanClause newBooleanClause(Query query, Occur occur) {
        return new BooleanClause(query, occur);
    }
 
    public Occur occurMust() {
        return Occur.MUST;
    }
 
    public Occur occurMustNot() {
        return Occur.MUST_NOT;
    }
 
    public Occur occurShould() {
        return Occur.SHOULD;
    }

    public MatchAllDocsQuery newMatchAllDocsQuery() {
        return new MatchAllDocsQuery();
    }
 
    public MultiPhraseQuery newMultiPhraseQuery() {
        return new MultiPhraseQuery();
    }
 
    public PhraseQuery newPhraseQuery() {
        return new PhraseQuery();
    }
 
    public NGramPhraseQuery newNGramPhraseQuery(int n) {
        return new NGramPhraseQuery(n);
    }
 
    public Term newTerm(String field, String value) {
        return new Term(field, value);
    }
 
    public NumericRangeQuery<Double> newDoubleRangeQuery(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newDoubleRange(field, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeQuery<Double> newDoubleRangeQuery(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeQuery<Float> newFloatRangeQuery(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeQuery<Float> newFloatRangeQuery(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newIntRange(field, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeQuery<Integer> newIntegerRangeQuery(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeQuery<Long> newLongRangeQuery(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newLongRange(field, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeQuery<Long> newLongRangeQuery(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeQuery.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public PrefixQuery newPrefixQuery(Term term) {
        return new PrefixQuery(term);
    }
 
    public TermQuery newTermQuery(Term term) {
        return new TermQuery(term);
    }
 
    public TermRangeQuery newTermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
        return new TermRangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper); 
    }
 
    public WildcardQuery newWildcardQuery(Term term) {
        return new WildcardQuery(term);
    }
 
    public FieldValueFilter newFieldValueFilter(String field, boolean negate) {
        return new FieldValueFilter(field, negate);
    }
 
    public NumericRangeFilter<Double> newDoubleRangeFilter(String field, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newDoubleRange(field, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeFilter<Double> newDoubleRangeFilter(String field, int precisionStep, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeFilter<Float> newFloatRangeFilter(String field, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newFloatRange(field, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeFilter<Float> newFloatRangeFilter(String field, int precisionStep, Float min, Float max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newIntRange(field, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeFilter<Integer> newIntegerRangeFilter(String field, int precisionStep, Integer min, Integer max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newIntRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public NumericRangeFilter<Long> newLongRangeFilter(String field, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newLongRange(field, min, max, minInclusive, maxInclusive);
    }

    public NumericRangeFilter<Long> newLongRangeFilter(String field, int precisionStep, Long min, Long max, boolean minInclusive, boolean maxInclusive) {
        return NumericRangeFilter.newLongRange(field, precisionStep, min, max, minInclusive, maxInclusive);
    }
 
    public PrefixFilter newPrefixFilter(Term term) {
        return new PrefixFilter(term);
    }
 
    public QueryWrapperFilter newQueryWrapperFilter(Query query) {
        return new QueryWrapperFilter(query);
    }
 
    public TermRangeFilter newTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
        return new TermRangeFilter(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
    }
 
    public SortField newSortField(String field, int type, boolean reverse) {
        return new SortField(field, type, reverse);
    }
 
    public Sort newSort() {
        return new Sort();
    }

    public Sort newSort(SortField... fields) {
        return new Sort(fields);
    }

    public Sort newSort(SortField field) {
        return new Sort(field);
    }
 
    public Query parseQuery(String[] queries, String[] fields) throws Exception {
        return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, new StandardAnalyzer(Version.LUCENE_36));
    }
 
    public Query parseQuery(String[] queries, String[] fields, BooleanClause.Occur[] flags) throws Exception {
        return MultiFieldQueryParser.parse(Version.LUCENE_36, queries, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
    }
 
    public Query parseQuery(String query, String[] fields, BooleanClause.Occur[] flags) throws Exception {
        return MultiFieldQueryParser.parse(Version.LUCENE_36, query, fields, flags, new StandardAnalyzer(Version.LUCENE_36));
    }
 
    public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception {
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream(field, new StringReader(text)));
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
        Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
        Highlighter highlighter = new Highlighter(formatter, scorer);
        highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
        tokenStream.reset();
        String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
        return rv.length() == 0 ? text : rv;
    }
 
    protected void registerDefault() {
        register(SCHEMA, "s");
        register(ID, "s");
        register(CREATED, "l");
        register(UPDATED, "l");
        register(KIND, "s");
        register(MARK, "s");
    }
 
    protected void saveSchema() {
        String tag = "";
        for (Object key : schema.keySet()) {
            if (tag.length() > 0) tag += "|";
            tag += schema.get(key) + "|" + key;
        }
        data.put(SCHEMA, tag);
    }

    protected void loadSchema() {
        String src = data.getProperty(SCHEMA);
        if (src == null) src = "";
        String[] fields = src.split("\\|");
        schema.clear();
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            register(fields[i + 1], fields[i]);
        }
        registerDefault();

        String tag = "";
        for (Object key : schema.keySet()) {
            if (tag.length() > 0) tag += "|";
            tag += schema.get(key) + "|" + key;
        }
        data.put(SCHEMA, tag);
    }
 
    public void delete() {
        delete(getId());
    }
 
    public void delete(String id) {
        if (handler != null) {
            handler.delete(id);
        }
    }

    public SortField sortFieldDoc() {
        return SortField.FIELD_DOC;
    }
    
    public SortField sortFieldScore() {
        return SortField.FIELD_SCORE;
    }
    
    public int sortFieldLong() {
        return SortField.LONG;
    }
    
    public int sortFieldInteger() {
        return SortField.INT;
    }
    
    public int sortFieldDouble() {
        return SortField.DOUBLE;
    }
    
    public int sortFieldFloat() {
        return SortField.FLOAT;
    }
    
    public int sortFieldString() {
        return SortField.STRING_VAL;
    }

    public double storageQuota() {
        if (handler != null) {
            return handler.storageQuota();
        }
        return 0; 
    }

    public double storageSize() { 
        if (handler != null) {
            return handler.storageSize();
        }
        return 0; 
    }

    public static class Handler {
  
        public boolean exists(String id) { return false; }
        public void create(SEntity src) { }
        public void update(SEntity src) { }
        public void load(String id, SEntity src) { }
        public void delete(String id) { }
        public List<SEntity> search(String kind, Query query, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Sort sort, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) { return new ArrayList<SEntity>(); }
        public int count(String kind, Query query, int max) { return 0; }
        public int count(String kind, Query query, Sort sort, int max) { return 0; }
        public int count(String kind, Query query, Filter filter, int max) { return 0; }
        public int count(String kind, Query query, Filter filter, Sort sort, int max) { return 0; }
        public double storageQuota() { return 0; }
        public double storageSize() { return 0; }
  
    }
 
}
com.paesia.schema.script.LuceneHandler class
1package com.paesia.schema.script;
2
3import java.io.BufferedWriter;
4import java.io.File;
5import java.io.FileOutputStream;
6import java.io.OutputStreamWriter;
7import java.util.ArrayList;
8import java.util.List;
9import java.util.Timer;
10import java.util.TimerTask;
11import java.util.UUID;
12
13import org.apache.lucene.analysis.Analyzer;
14import org.apache.lucene.analysis.standard.StandardAnalyzer;
15import org.apache.lucene.document.Document;
16import org.apache.lucene.document.Field;
17import org.apache.lucene.document.Field.Index;
18import org.apache.lucene.document.Field.Store;
19import org.apache.lucene.document.NumericField;
20import org.apache.lucene.index.IndexReader;
21import org.apache.lucene.index.IndexWriter;
22import org.apache.lucene.index.IndexWriterConfig;
23import org.apache.lucene.index.IndexWriterConfig.OpenMode;
24import org.apache.lucene.index.Term;
25import org.apache.lucene.search.BooleanClause;
26import org.apache.lucene.search.BooleanClause.Occur;
27import org.apache.lucene.search.BooleanQuery;
28import org.apache.lucene.search.Filter;
29import org.apache.lucene.search.IndexSearcher;
30import org.apache.lucene.search.Query;
31import org.apache.lucene.search.Sort;
32import org.apache.lucene.search.TermQuery;
33import org.apache.lucene.search.TopDocs;
34import org.apache.lucene.store.FSDirectory;
35import org.apache.lucene.util.Version;
36
37import com.paesia.schema.script.safe.lucene.SEntity;
38
39public class LuceneHandler extends SEntity.Handler {
40
41 public static final String KIND_QUOTA = "C4f91ee1eb414a";
42 public static final String QUOTA_SYSTEM = "F4f91ee659b1ec";
43
44 protected String dirIndex = "";
45 protected String dirBackup = "";
46 protected double systemQuota = 0;
47
48 public LuceneHandler(String dirIndex, String dirBackup, double systemQuota) {
49 this.dirIndex = dirIndex;
50 this.dirBackup = dirBackup;
51 this.systemQuota = systemQuota;
52 }
53
54 public boolean exists(String id) {
55 boolean tag = false;
56 if (id.length() == 0) return tag;
57 try {
58 IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
59 IndexSearcher searcher = new IndexSearcher(reader);
60 TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
61 if (td.totalHits > 0) {
62 tag = true;
63 }
64 searcher.close();
65 reader.close();
66 } catch (Exception e) {
67 }
68
69 return tag;
70 }
71
72 public void create(SEntity src) {
73 Monitor monitor = new Monitor();
74 Timer timer = new Timer();
75 timer.schedule(new CreateTask(timer, src, monitor), 1);
76 while (!monitor.finished) {
77 try {
78 Thread.sleep(10);
79 } catch (Exception e) {
80 }
81 }
82 timer = null;
83 }
84
85 protected boolean quotaCreate(SEntity src) {
86 boolean tag = false;
87 SEntity quota = findSystemQuota();
88 if (quota == null) {
89 quota = newSystemQuota();
90 }
91 double newSize = quota.getDouble("size") + ((double)src.toString().length() / 1048576.0);
92 if (newSize < 0) newSize = 0;
93 if (newSize < systemQuota) {
94 tag = true;
95 quota.setDouble("size", newSize);
96 quota.save();
97 }
98 return tag;
99 }
100
101 protected boolean quotaUpdate(SEntity src) {
102 boolean tag = false;
103 SEntity quota = findSystemQuota();
104 if (quota == null) {
105 quota = newSystemQuota();
106 }
107 double newSize = quota.getDouble("size") - ((double)getFileSize(src.getId(), src.getKind()) / 1048576.0) + ((double)src.toString().length() / 1048576.0);
108 if (newSize < 0) newSize = 0;
109 if (newSize < systemQuota) {
110 tag = true;
111 quota.setDouble("size", newSize);
112 quota.save();
113 }
114 return tag;
115 }
116
117 protected boolean quotaDelete(String id, String kind) {
118 boolean tag = false;
119 SEntity quota = findSystemQuota();
120 if (quota == null) {
121 quota = newSystemQuota();
122 }
123 double newSize = quota.getDouble("size") - ((double)getFileSize(id, kind) / 1048576.0);
124 if (newSize < 0) newSize = 0;
125 if (newSize < systemQuota) {
126 tag = true;
127 quota.setDouble("size", newSize);
128 quota.save();
129 }
130 return tag;
131 }
132
133 protected long getFileSize(String id, String kind) {
134 long tag = 0;
135 String fid = "";
136 for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
137 if (fid.length() > 0) fid += File.separator;
138 fid += id.substring(i, i + 2);
139 }
140 File file = new File(dirBackup, kind);
141 file = new File(file.getAbsolutePath(), fid);
142 String folder = file.getAbsolutePath();
143 file = new File(folder, id + ".txt");
144 if (file.exists()) {
145 tag = file.length();
146 }
147 return tag;
148 }
149
150 protected SEntity newSystemQuota() {
151 SEntity tag = new SEntity(this);
152 tag.setSchema("s|kind|d|size");
153 tag.setKind(KIND_QUOTA);
154 tag.setId(UUID.randomUUID().toString().replaceAll("-", ""));
155 tag.setString("kind", QUOTA_SYSTEM);
156 return tag;
157 }
158
159 protected SEntity findSystemQuota() {
160 List<SEntity> results = search(KIND_QUOTA, new TermQuery(new Term("kind", QUOTA_SYSTEM)), 1);
161 if (results.size() == 0) return null;
162 return results.get(0);
163 }
164
165 protected void createEntity(SEntity src) {
166 if (src.getId().length() == 0) return;
167 if (src.getKind().length() == 0) return;
168
169 try {
170 if (!src.getKind().equals(KIND_QUOTA)) {
171 if (!quotaCreate(src)) return;
172 }
173 backup(src);
174 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
175 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
176 iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
177 IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
178 Document doc = new Document();
179 write(src, doc);
180 writer.addDocument(doc);
181 writer.close();
182 } catch (Exception e) {
183 }
184 }
185
186 public void update(SEntity src) {
187 Monitor monitor = new Monitor();
188 Timer timer = new Timer();
189 timer.schedule(new UpdateTask(timer, src, monitor), 1);
190 while (!monitor.finished) {
191 try {
192 Thread.sleep(10);
193 } catch (Exception e) {
194 }
195 }
196 timer = null;
197 }
198
199 protected void updateEntity(SEntity src) {
200 if (src.getId().length() == 0) return;
201 if (src.getKind().length() == 0) return;
202
203 try {
204 if (!src.getKind().equals(KIND_QUOTA)) {
205 if (!quotaUpdate(src)) return;
206 }
207 backup(src);
208 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
209 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
210 iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
211 IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
212 Document doc = new Document();
213 write(src, doc);
214 writer.updateDocument(new Term(SEntity.ID, src.getId()), doc);
215 writer.close();
216 } catch (Exception e) {
217 }
218 }
219
220 public void load(String id, SEntity src) {
221 try {
222 IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
223 IndexSearcher searcher = new IndexSearcher(reader);
224 TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
225 if (td.totalHits > 0) {
226 Document doc = searcher.doc(td.scoreDocs[0].doc);
227 if (allowLoad(id, doc.get(SEntity.KIND))) {
228 src.setSchema(doc.get(SEntity.SCHEMA));
229 read(src, doc);
230 }
231 }
232 searcher.close();
233 reader.close();
234 } catch (Exception e) {
235 }
236 }
237
238 protected boolean allowLoad(String id, String kind) {
239 return true;
240 }
241
242 public int count(String kind, Query query, Filter filter, Sort sort, int max) {
243 int tag = 0;
244 try {
245 IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
246 IndexSearcher searcher = new IndexSearcher(reader);
247 BooleanQuery boolQuery = new BooleanQuery();
248 boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
249 if (query != null) {
250 boolQuery.add(new BooleanClause(query, Occur.MUST));
251 }
252 TopDocs td = null;
253 if (filter != null && sort != null) {
254 td = searcher.search(boolQuery, filter, max, sort);
255 } else if (filter != null) {
256 td = searcher.search(boolQuery, filter, max);
257 } else if (sort != null) {
258 td = searcher.search(boolQuery, max, sort);
259 } else {
260 td = searcher.search(boolQuery, max);
261 }
262 tag = td.totalHits;
263 searcher.close();
264 reader.close();
265 } catch (Exception e) {
266 }
267 return tag;
268 }
269
270 public int count(String kind, Query query, int max) {
271 return count(kind, query, null, null, max);
272 }
273
274 public int count(String kind, Query query, Sort sort, int max) {
275 return count(kind, query, null, sort, max);
276 }
277
278 public int count(String kind, Query query, Filter filter, int max) {
279 return count(kind, query, filter, null, max);
280 }
281
282 public List<SEntity> search(String kind, Query query, int max) {
283 return search(kind, query, null, null, max);
284 }
285
286 public List<SEntity> search(String kind, Query query, Sort sort, int max) {
287 return search(kind, query, null, sort, max);
288 }
289
290 public List<SEntity> search(String kind, Query query, Filter filter, int max) {
291 return search(kind, query, filter, null, max);
292 }
293
294 public List<SEntity> search(String kind, Query query, int pagesize, int pageno) {
295 return search(kind, query, null, null, pagesize, pageno);
296 }
297
298 public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) {
299 return search(kind, query, null, sort, pagesize, pageno);
300 }
301
302 public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
303 return search(kind, query, filter, null, pagesize, pageno);
304 }
305
306 public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
307 List<SEntity> tag = new ArrayList<SEntity>();
308 try {
309 IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
310 IndexSearcher searcher = new IndexSearcher(reader);
311 BooleanQuery boolQuery = new BooleanQuery();
312 boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
313 if (query != null) {
314 boolQuery.add(new BooleanClause(query, Occur.MUST));
315 }
316 TopDocs td = null;
317 if (filter != null && sort != null) {
318 td = searcher.search(boolQuery, filter, max, sort);
319 } else if (filter != null) {
320 td = searcher.search(boolQuery, filter, max);
321 } else if (sort != null) {
322 td = searcher.search(boolQuery, max, sort);
323 } else {
324 td = searcher.search(boolQuery, max);
325 }
326 for (int i = 0; i < td.totalHits; i++) {
327 SEntity item = new SEntity(this);
328 Document doc = searcher.doc(td.scoreDocs[i].doc);
329 item.setSchema(doc.get(SEntity.SCHEMA));
330 read(item, doc);
331 tag.add(item);
332 }
333 searcher.close();
334 reader.close();
335 } catch (Exception e) {
336 }
337 return tag;
338 }
339
340 public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) {
341 List<SEntity> tag = new ArrayList<SEntity>();
342 try {
343 IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
344 IndexSearcher searcher = new IndexSearcher(reader);
345 BooleanQuery boolQuery = new BooleanQuery();
346 boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
347 if (query != null) {
348 boolQuery.add(new BooleanClause(query, Occur.MUST));
349 }
350 if (pagesize <= 0) pagesize = 10;
351 if (pageno <= 0) pageno = 1;
352 int max = pageno * pagesize;
353 TopDocs td = null;
354 if (filter != null && sort != null) {
355 td = searcher.search(boolQuery, filter, max, sort);
356 } else if (filter != null) {
357 td = searcher.search(boolQuery, filter, max);
358 } else if (sort != null) {
359 td = searcher.search(boolQuery, max, sort);
360 } else {
361 td = searcher.search(boolQuery, max);
362 }
363 for (int i = (pageno - 1) * pagesize; i < td.totalHits && i < max; i++) {
364 SEntity item = new SEntity(this);
365 Document doc = searcher.doc(td.scoreDocs[i].doc);
366 item.setSchema(doc.get(SEntity.SCHEMA));
367 read(item, doc);
368 tag.add(item);
369 }
370 searcher.close();
371 reader.close();
372 } catch (Exception e) {
373 }
374 return tag;
375 }
376
377 protected void backup(SEntity src) {
378 String id = src.getId();
379 if (id.length() == 0) return;
380 String kind = src.getKind();
381 if (kind.length() == 0) return;
382 String fid = "";
383 for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
384 if (fid.length() > 0) fid += File.separator;
385 fid += id.substring(i, i + 2);
386 }
387 try {
388 File file = new File(dirBackup, kind);
389 file = new File(file.getAbsolutePath(), fid);
390 file.mkdirs();
391 String folder = file.getAbsolutePath();
392 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(folder, id + ".txt"))));
393 writer.write(src.toString());
394 writer.close();
395 } catch (Exception e) {
396 }
397 }
398
399 protected void read(SEntity entity, Document doc) {
400 String schema = doc.get(SEntity.SCHEMA);
401 if (schema == null) schema = "";
402 String[] fields = schema.split("\\|");
403 for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
404 String kind = fields[i];
405 String fname = fields[i + 1];
406 String val = doc.get(fname);
407 if (val == null) val = "";
408 if (SEntity.ALL_KINDS.indexOf("|" + kind + "|") < 0) continue;
409 entity.setString(fname, val);
410 }
411 }
412
413 protected void write(SEntity entity, Document doc) {
414 String schema = entity.getSchema();
415 if (schema == null) schema = "";
416 String[] fields = schema.split("\\|");
417 for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
418 String kind = fields[i];
419 String fname = fields[i + 1];
420 if (SEntity.STRING.equalsIgnoreCase(kind)) {
421 Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS);
422 doc.add(field);
423 } else if (SEntity.DOUBLE.equalsIgnoreCase(kind)) {
424 NumericField field = new NumericField(fname, Store.YES, true);
425 field.setDoubleValue(entity.getDouble(fname));
426 doc.add(field);
427 } else if (SEntity.FLOAT.equalsIgnoreCase(kind)) {
428 NumericField field = new NumericField(fname, Store.YES, true);
429 field.setFloatValue(entity.getFloat(fname));
430 doc.add(field);
431 } else if (SEntity.INTEGER.equalsIgnoreCase(kind)) {
432 NumericField field = new NumericField(fname, Store.YES, true);
433 field.setIntValue(entity.getInteger(fname));
434 doc.add(field);
435 } else if (SEntity.LONG.equalsIgnoreCase(kind)) {
436 NumericField field = new NumericField(fname, Store.YES, true);
437 field.setLongValue(entity.getLong(fname));
438 doc.add(field);
439 } else if (SEntity.ANALYZED.equalsIgnoreCase(kind)) {
440 Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED);
441 doc.add(field);
442 }
443 }
444 }
445
446 public void delete(String id) {
447 Monitor monitor = new Monitor();
448 Timer timer = new Timer();
449 timer.schedule(new DeleteTask(timer, id, monitor), 1);
450 while (!monitor.finished) {
451 try {
452 Thread.sleep(10);
453 } catch (Exception e) {
454 }
455 }
456 timer = null;
457 }
458
459 protected void deleteEntity(String id) {
460 if (id.length() == 0) return;
461 String kind = "";
462
463 try {
464 IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
465 IndexSearcher searcher = new IndexSearcher(reader);
466 TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
467 if (td.totalHits > 0) {
468 Document doc = searcher.doc(td.scoreDocs[0].doc);
469 kind = doc.get(SEntity.KIND);
470 }
471 searcher.close();
472 reader.close();
473 } catch (Exception e) {
474 }
475 if (kind.length() == 0) return;
476 if (!allowDelete(id, kind)) return;
477
478 try {
479 if (!kind.equals(KIND_QUOTA)) {
480 if (!quotaDelete(id, kind)) return;
481 }
482 removeBackup(id, kind);
483 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
484 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
485 iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
486 IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
487 writer.deleteDocuments(new Term(SEntity.ID, id));
488 writer.close();
489 } catch (Exception e) {
490 }
491 }
492
493 protected boolean allowDelete(String id, String kind) {
494 return true;
495 }
496
497 protected void removeBackup(String id, String kind) {
498 if (id.length() == 0) return;
499 if (kind.length() == 0) return;
500 String fid = "";
501 for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
502 if (fid.length() > 0) fid += File.separator;
503 fid += id.substring(i, i + 2);
504 }
505 try {
506 File file = new File(dirBackup, kind);
507 file = new File(file.getAbsolutePath(), fid);
508 String folder = file.getAbsolutePath();
509 file = new File(folder, id + ".txt");
510 file.delete();
511 } catch (Exception e) {
512 }
513 }
514
515 public double storageQuota() {
516 return systemQuota;
517 }
518
519 public double storageSize() {
520 SEntity tag = findSystemQuota();
521 if (tag == null) return 0;
522 return tag.getDouble("size");
523 }
524
525 private class DeleteTask extends TimerTask {
526
527 private String id;
528 private Timer timer;
529 private Monitor monitor;
530
531 public DeleteTask(Timer timer, String id, Monitor monitor) {
532 this.timer = timer;
533 this.id = id;
534 this.monitor = monitor;
535 }
536
537 @Override
538 public void run() {
539 deleteEntity(id);
540 monitor.finished = true;
541 timer.cancel();
542 timer.purge();
543 timer = null;
544 }
545
546 }
547
548 private class CreateTask extends TimerTask {
549
550 private SEntity entity;
551 private Timer timer;
552 private Monitor monitor;
553
554 public CreateTask(Timer timer, SEntity entity, Monitor monitor) {
555 this.timer = timer;
556 this.entity = entity;
557 this.monitor = monitor;
558 }
559
560 @Override
561 public void run() {
562 createEntity(entity);
563 monitor.finished = true;
564 timer.cancel();
565 timer.purge();
566 timer = null;
567 }
568
569 }
570
571 private class UpdateTask extends TimerTask {
572
573 private SEntity entity;
574 private Timer timer;
575 private Monitor monitor;
576
577 public UpdateTask(Timer timer, SEntity entity, Monitor monitor) {
578 this.timer = timer;
579 this.entity = entity;
580 this.monitor = monitor;
581 }
582
583 @Override
584 public void run() {
585 updateEntity(entity);
586 monitor.finished = true;
587 timer.cancel();
588 timer.purge();
589 timer = null;
590 }
591
592 }
593
594 private class Monitor {
595 public boolean finished = false;
596 }
597
598}
package com.paesia.schema.script;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
import java.util.UUID;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import com.paesia.schema.script.safe.lucene.SEntity;

public class LuceneHandler extends SEntity.Handler {

    public static final String KIND_QUOTA = "C4f91ee1eb414a";
    public static final String QUOTA_SYSTEM = "F4f91ee659b1ec";
 
    protected String dirIndex = "";
    protected String dirBackup = "";
    protected double systemQuota = 0;

    public LuceneHandler(String dirIndex, String dirBackup, double systemQuota) {
        this.dirIndex = dirIndex;
        this.dirBackup = dirBackup;
        this.systemQuota = systemQuota;
    }
 
    public boolean exists(String id) {
        boolean tag = false;
        if (id.length() == 0) return tag;
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
            if (td.totalHits > 0) {
                tag = true;
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
  
        return tag; 
    }
 
    public void create(SEntity src) {
        Monitor monitor = new Monitor();
        Timer timer = new Timer();
        timer.schedule(new CreateTask(timer, src, monitor), 1);
        while (!monitor.finished) {
            try {
                Thread.sleep(10);
            } catch (Exception e) {
            }
        }
        timer = null;
    }

    protected boolean quotaCreate(SEntity src) {
        boolean tag = false;
        SEntity quota = findSystemQuota();
        if (quota == null) {
            quota = newSystemQuota();
        }
        double newSize = quota.getDouble("size") + ((double)src.toString().length() / 1048576.0);
        if (newSize < 0) newSize = 0;
        if (newSize < systemQuota) {
            tag = true;
            quota.setDouble("size", newSize);
            quota.save();
        }
        return tag;
    }

    protected boolean quotaUpdate(SEntity src) {
        boolean tag = false;
        SEntity quota = findSystemQuota();
        if (quota == null) {
            quota = newSystemQuota();
        }
        double newSize = quota.getDouble("size") - ((double)getFileSize(src.getId(), src.getKind()) / 1048576.0) + ((double)src.toString().length() / 1048576.0);
        if (newSize < 0) newSize = 0;
        if (newSize < systemQuota) {
            tag = true;
            quota.setDouble("size", newSize);
            quota.save();
        }
        return tag;
    }

    protected boolean quotaDelete(String id, String kind) {
        boolean tag = false;
        SEntity quota = findSystemQuota();
        if (quota == null) {
            quota = newSystemQuota();
        }
        double newSize = quota.getDouble("size") - ((double)getFileSize(id, kind) / 1048576.0);
        if (newSize < 0) newSize = 0;
        if (newSize < systemQuota) {
            tag = true;
            quota.setDouble("size", newSize);
            quota.save();
        }
        return tag;
    }
 
    protected long getFileSize(String id, String kind) {
        long tag = 0;
        String fid = "";
        for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
            if (fid.length() > 0) fid += File.separator;
            fid += id.substring(i, i + 2);
        }
        File file = new File(dirBackup, kind);
        file = new File(file.getAbsolutePath(), fid);
        String folder = file.getAbsolutePath();
        file = new File(folder, id + ".txt");
        if (file.exists()) {
            tag = file.length();
        }
        return tag;
    }
 
    protected SEntity newSystemQuota() {
        SEntity tag = new SEntity(this);
        tag.setSchema("s|kind|d|size");
        tag.setKind(KIND_QUOTA);
        tag.setId(UUID.randomUUID().toString().replaceAll("-", ""));
        tag.setString("kind", QUOTA_SYSTEM);
        return tag;
    }
 
    protected SEntity findSystemQuota() {
        List<SEntity> results = search(KIND_QUOTA, new TermQuery(new Term("kind", QUOTA_SYSTEM)), 1);
        if (results.size() == 0) return null;
        return results.get(0);
    }

    protected void createEntity(SEntity src) { 
        if (src.getId().length() == 0) return;
        if (src.getKind().length() == 0) return;

        try {
            if (!src.getKind().equals(KIND_QUOTA)) {
                if (!quotaCreate(src)) return;
            }
            backup(src);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
            Document doc = new Document();
            write(src, doc);
            writer.addDocument(doc);
            writer.close();
        } catch (Exception e) {
        }
    }
 
    public void update(SEntity src) {
        Monitor monitor = new Monitor();
        Timer timer = new Timer();
        timer.schedule(new UpdateTask(timer, src, monitor), 1);
        while (!monitor.finished) {
            try {
                Thread.sleep(10);
            } catch (Exception e) {
            }
        }
        timer = null;
    }

    protected void updateEntity(SEntity src) { 
        if (src.getId().length() == 0) return;
        if (src.getKind().length() == 0) return;

        try {
            if (!src.getKind().equals(KIND_QUOTA)) {
                if (!quotaUpdate(src)) return;
            }
            backup(src);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
            Document doc = new Document();
            write(src, doc);
            writer.updateDocument(new Term(SEntity.ID, src.getId()), doc);
            writer.close();
        } catch (Exception e) {
        }
    }
 
    public void load(String id, SEntity src) {
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
            if (td.totalHits > 0) {
                Document doc = searcher.doc(td.scoreDocs[0].doc);
                if (allowLoad(id, doc.get(SEntity.KIND))) {
                    src.setSchema(doc.get(SEntity.SCHEMA));
                    read(src, doc);
                }
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
    }
 
    protected boolean allowLoad(String id, String kind) {
        return true;
    }
 
    public int count(String kind, Query query, Filter filter, Sort sort, int max) {
        int tag = 0;
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            BooleanQuery boolQuery = new BooleanQuery();
            boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
            if (query != null) {
                boolQuery.add(new BooleanClause(query, Occur.MUST));
            }
            TopDocs td = null;
            if (filter != null && sort != null) {
                td = searcher.search(boolQuery, filter, max, sort);
            } else if (filter != null) {
                td = searcher.search(boolQuery, filter, max);
            } else if (sort != null) {
                td = searcher.search(boolQuery, max, sort);
            } else {
                td = searcher.search(boolQuery, max);
            }
            tag = td.totalHits;
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        return tag;
    }

    public int count(String kind, Query query, int max) {
        return count(kind, query, null, null, max);
    }

    public int count(String kind, Query query, Sort sort, int max) {
        return count(kind, query, null, sort, max);
    }
 
    public int count(String kind, Query query, Filter filter, int max) {
        return count(kind, query, filter, null, max);
    }
 
    public List<SEntity> search(String kind, Query query, int max) {
        return search(kind, query, null, null, max);
    }

    public List<SEntity> search(String kind, Query query, Sort sort, int max) {
        return search(kind, query, null, sort, max);
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int max) {
        return search(kind, query, filter, null, max);
    }
 
    public List<SEntity> search(String kind, Query query, int pagesize, int pageno) { 
        return search(kind, query, null, null, pagesize, pageno);
    }
 
    public List<SEntity> search(String kind, Query query, Sort sort, int pagesize, int pageno) { 
        return search(kind, query, null, sort, pagesize, pageno);
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, int pagesize, int pageno) {
        return search(kind, query, filter, null, pagesize, pageno);
    }
 
    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) {
        List<SEntity> tag = new ArrayList<SEntity>();
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            BooleanQuery boolQuery = new BooleanQuery();
            boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
            if (query != null) {
                boolQuery.add(new BooleanClause(query, Occur.MUST));
            }
            TopDocs td = null;
            if (filter != null && sort != null) {
                td = searcher.search(boolQuery, filter, max, sort);
            } else if (filter != null) {
                td = searcher.search(boolQuery, filter, max);
            } else if (sort != null) {
                td = searcher.search(boolQuery, max, sort);
            } else {
                td = searcher.search(boolQuery, max);
            }
            for (int i = 0; i < td.totalHits; i++) {
                SEntity item = new SEntity(this);
                Document doc = searcher.doc(td.scoreDocs[i].doc);
                item.setSchema(doc.get(SEntity.SCHEMA));
                read(item, doc);
                tag.add(item);
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        return tag;
    }

    public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int pagesize, int pageno) {
        List<SEntity> tag = new ArrayList<SEntity>();
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            BooleanQuery boolQuery = new BooleanQuery();
            boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST));
            if (query != null) {
                boolQuery.add(new BooleanClause(query, Occur.MUST));
            }
            if (pagesize <= 0) pagesize = 10;
            if (pageno <= 0) pageno = 1;
            int max = pageno * pagesize;
            TopDocs td = null;
            if (filter != null && sort != null) {
                td = searcher.search(boolQuery, filter, max, sort);
            } else if (filter != null) {
                td = searcher.search(boolQuery, filter, max);
            } else if (sort != null) {
                td = searcher.search(boolQuery, max, sort);
            } else {
                td = searcher.search(boolQuery, max);
            }
            for (int i = (pageno - 1) * pagesize; i < td.totalHits && i < max; i++) {
                SEntity item = new SEntity(this);
                Document doc = searcher.doc(td.scoreDocs[i].doc);
                item.setSchema(doc.get(SEntity.SCHEMA));
                read(item, doc);
                tag.add(item);
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        return tag;
    }
 
    protected void backup(SEntity src) {
        String id = src.getId();
        if (id.length() == 0) return;
        String kind = src.getKind();
        if (kind.length() == 0) return;
        String fid = "";
        for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
            if (fid.length() > 0) fid += File.separator;
            fid += id.substring(i, i + 2);
        }
        try {
            File file = new File(dirBackup, kind);
            file = new File(file.getAbsolutePath(), fid);
            file.mkdirs();
            String folder = file.getAbsolutePath();
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(folder, id + ".txt"))));
            writer.write(src.toString());
            writer.close();
        } catch (Exception e) {
        }
    }
 
    protected void read(SEntity entity, Document doc) {
        String schema = doc.get(SEntity.SCHEMA);
        if (schema == null) schema = "";
        String[] fields = schema.split("\\|");
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            String kind = fields[i];
            String fname = fields[i + 1];
            String val = doc.get(fname);
            if (val == null) val = "";
            if (SEntity.ALL_KINDS.indexOf("|" + kind + "|") < 0) continue;
            entity.setString(fname, val);
        }
    }
 
    protected void write(SEntity entity, Document doc) {
        String schema = entity.getSchema();
        if (schema == null) schema = "";
        String[] fields = schema.split("\\|");
        for (int i = 0; i < fields.length && i + 1 < fields.length; i+= 2) {
            String kind = fields[i];
            String fname = fields[i + 1];
            if (SEntity.STRING.equalsIgnoreCase(kind)) {
                Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS);
                doc.add(field);
            } else if (SEntity.DOUBLE.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setDoubleValue(entity.getDouble(fname));
                doc.add(field);
            } else if (SEntity.FLOAT.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setFloatValue(entity.getFloat(fname));
                doc.add(field);
            } else if (SEntity.INTEGER.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setIntValue(entity.getInteger(fname));
                doc.add(field);
            } else if (SEntity.LONG.equalsIgnoreCase(kind)) {
                NumericField field = new NumericField(fname, Store.YES, true);
                field.setLongValue(entity.getLong(fname));
                doc.add(field);
            } else if (SEntity.ANALYZED.equalsIgnoreCase(kind)) {
                Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED);
                doc.add(field);
            }
        }
    }
 
    public void delete(String id) {
        Monitor monitor = new Monitor();
        Timer timer = new Timer();
        timer.schedule(new DeleteTask(timer, id, monitor), 1);
        while (!monitor.finished) {
            try {
                Thread.sleep(10);
            } catch (Exception e) {
            }
        }
        timer = null;
    }
 
    protected void deleteEntity(String id) { 
        if (id.length() == 0) return;
        String kind = "";
        
        try {
            IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex)));
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs td = searcher.search(new TermQuery(new Term(SEntity.ID, id)), 1);
            if (td.totalHits > 0) {
                Document doc = searcher.doc(td.scoreDocs[0].doc);
                kind = doc.get(SEntity.KIND);
            }
            searcher.close();
            reader.close();
        } catch (Exception e) {
        }
        if (kind.length() == 0) return;
        if (!allowDelete(id, kind)) return;
        
        try {
            if (!kind.equals(KIND_QUOTA)) {
                if (!quotaDelete(id, kind)) return;
            }
            removeBackup(id, kind);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dirIndex)), iwc);
            writer.deleteDocuments(new Term(SEntity.ID, id));
            writer.close();
        } catch (Exception e) {
        }
    }
 
    protected boolean allowDelete(String id, String kind) {
        return true;
    }
 
    protected void removeBackup(String id, String kind) {
        if (id.length() == 0) return;
        if (kind.length() == 0) return;
        String fid = "";
        for (int i = 0; i < id.length() && i + 1 < id.length(); i += 2) {
            if (fid.length() > 0) fid += File.separator;
            fid += id.substring(i, i + 2);
        }
        try {
            File file = new File(dirBackup, kind);
            file = new File(file.getAbsolutePath(), fid);
            String folder = file.getAbsolutePath();
            file = new File(folder, id + ".txt");
            file.delete();
        } catch (Exception e) {
        }
    }

    public double storageQuota() {
        return systemQuota;
    }
 
    public double storageSize() {
        SEntity tag = findSystemQuota();
        if (tag == null) return 0;
        return tag.getDouble("size");
    }

    private class DeleteTask extends TimerTask {

        private String id;
        private Timer timer;
        private Monitor monitor;
  
        public DeleteTask(Timer timer, String id, Monitor monitor) {
            this.timer = timer;
            this.id = id;
            this.monitor = monitor;
        }
  
        @Override
        public void run() {
            deleteEntity(id);
            monitor.finished = true;
            timer.cancel();
            timer.purge();
            timer = null;
        }
  
    }

    private class CreateTask extends TimerTask {

        private SEntity entity;
        private Timer timer;
        private Monitor monitor;
  
        public CreateTask(Timer timer, SEntity entity, Monitor monitor) {
            this.timer = timer;
            this.entity = entity;
            this.monitor = monitor;
        }
  
        @Override
        public void run() {
            createEntity(entity);
            monitor.finished = true;
            timer.cancel();
            timer.purge();
            timer = null;
        }
  
    }

    private class UpdateTask extends TimerTask {

        private SEntity entity;
        private Timer timer;
        private Monitor monitor;
  
        public UpdateTask(Timer timer, SEntity entity, Monitor monitor) {
            this.timer = timer;
            this.entity = entity;
            this.monitor = monitor;
        }
  
        @Override
        public void run() {
            updateEntity(entity);
            monitor.finished = true;
            timer.cancel();
            timer.purge();
            timer = null;
        }
  
    }
 
    private class Monitor {
        public boolean finished = false;
    }
 
}
Modify DataHandler class
1public static class DataHandler extends Machine.Handler {
2
3 private String dirIndex;
4 private String dirBackup;
5 private double systemQuota;
6
7 public DataHandler(String dirIndex, String dirBackup, double systemQuota) {
8 this.dirIndex = dirIndex;
9 this.dirBackup = dirBackup;
10 this.systemQuota = systemQuota;
11 }
12
13 public SEntity.Handler getEntityHandler() {
14 return new LuceneHandler(dirIndex, dirBackup, systemQuota);
15 }
16
17..............
18}
public static class DataHandler extends Machine.Handler {

    private String dirIndex;
    private String dirBackup;
    private double systemQuota;
     
    public DataHandler(String dirIndex, String dirBackup, double systemQuota) {
        this.dirIndex = dirIndex;
        this.dirBackup = dirBackup;
        this.systemQuota = systemQuota;
    }
     
    public SEntity.Handler getEntityHandler() { 
        return new LuceneHandler(dirIndex, dirBackup, systemQuota);
    }

..............    
}
javascript
1function main(env, args) {
2 var no = 1;
3 if (no == 1) {
4 test01(env, args); // Grab products
5 }
6 if (no == 2) {
7 test02(env, args); // List all products
8 }
9 if (no == 3) {
10 test03(env, args); // Search products
11 }
12 if (no == 4) {
13 test04(env, args); // Delete products
14 }
15}
16
17function test04(env, args) {
18 var entity = env.newEntity();
19 var query = entity.newMatchAllDocsQuery();
20 var products = entity.search('Link', query, 3, 1);
21 for (var i = 0; i < products.size(); i++) {
22 products.get(i).delete();
23 }
24}
25
26function test03(env, args) {
27 var term = 'Sleeping';
28
29 var entity = env.newEntity();
30 var query = entity.parseQuery([term, term], ['desc', 'title'], [entity.occurShould(), entity.occurShould()]);
31 var size = entity.count('Link', query, 999999);
32 var products = entity.search('Link', query, entity.newSort(org.apache.lucene.search.SortField.FIELD_SCORE), 999999);
33 for (var i = 0; i < products.size(); i++) {
34 var title = env.newString(products.get(i).getString('title').getBytes('UTF-8'), 'UTF-8');
35 try {
36 title = entity.highlight(query, title, 'title', 50, 3, ' (...) ');
37 } catch (e) {
38 env.error(e);
39 }
40 var desc = env.newString(products.get(i).getString('desc').getBytes('UTF-8'), 'UTF-8');
41 try {
42 desc = entity.highlight(query, desc, 'desc', 50, 3, ' (...) ');
43 } catch (e) {
44 env.error(e);
45 }
46 printProduct(products.get(i), env, desc, title);
47 }
48}
49
50function test02(env, args) {
51 var entity = env.newEntity();
52 var query = entity.newMatchAllDocsQuery();
53 var size = entity.count('Link', query, 999999);
54 var products = entity.search('Link', query, 999999);
55 env.info('Size: ' + size);
56 for (var i = 0; i < products.size(); i++) {
57 printProduct(products.get(i), env);
58 }
59}
60
61function test01(env, args) {
62 var astore = 'paesia';
63 var node = '100';
64 var maxpage = 2;
65 var products = grabProduct(astore, node, maxpage, env);
66 for (var i = 0; i < products.size(); i++) {
67 var pro = products.get(i);
68 saveProduct(pro, env);
69 }
70 env.info('Saved: ' + products.size());
71}
72
73function printProduct(pro, env, descH, titleH) {
74 var line = '';
75 line += '\nId: ' + pro.getId();
76 line += '\nTitle: ' + pro.getString('title');
77 line += '\nUrl: ' + pro.getString('url');
78 line += '\nDescription: \n' + pro.getString('desc');
79 if (titleH != null) {
80 line += '\nTitle Highlight: \n' + titleH;
81 }
82 if (descH != null) {
83 line += '\nDescription Highlight: \n' + descH;
84 }
85 env.info('\n' + line + '\n');
86}
87
88function saveProduct(pro, env) {
89 var title = pro.get('title');
90 var url = pro.get('url');
91 if (title == null || title.length == 0 || url == null || url.length == 0) return;
92 if (findProductByUrl(url, env)) return;
93 var desc = pro.get('description');
94 if (desc == null) desc = '';
95 if (desc.length() > 0) {
96 var doc = env.newJsoup().parse(desc);
97 desc = doc.select('body').first().text();
98 }
99 var schema = 's|url|a|title|a|desc';
100 var entity = env.newEntity();
101 entity.setSchema(schema);
102 entity.setKind('Link');
103 entity.setId(env.uniqid());
104 entity.setString('url', url);
105 entity.setString('title', title);
106 entity.setString('desc', desc);
107 entity.save();
108}
109
110function findProductByUrl(url, env) {
111 var entity = env.newEntity();
112 var query = entity.newTermQuery(entity.newTerm('url', url));
113 var size = entity.count('Link', query, 1);
114 return (size > 0);
115}
116
117function grabProduct(astore, node, maxpage, env) {
118 var tag = env.newArrayList();
119 for (var no = 1; no <= maxpage; no++) {
120 try {
121 var alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + node + '&page=' + no);
122 var doc = env.newJsoup().parse(alink, 60000);
123 var elements = doc.select('#featuredProducts .textrow a');
124 var map = env.newHashMap();
125 for (var i = 0; i < elements.size(); i++) {
126 var element = elements.get(i);
127 var title = element.text();
128 var url = element.attr('href');
129 var pos = url.lastIndexOf('/detail/');
130 if (pos < 0) continue;
131 var code = url.substring(pos + 8);
132 var url = env.newURL(alink, url) + '';
133 var item = env.newHashMap();
134 item.put('code', code);
135 item.put('title', title);
136 item.put('url', url);
137 map.put(code, item);
138 }
139 elements = doc.select('#featuredProducts .imagerow a');
140 for (var i = 0; i < elements.size(); i++) {
141 var element = elements.get(i);
142 var url = element.attr('href');
143 var pos = url.lastIndexOf('/detail/');
144 if (pos < 0) continue;
145 var code = url.substring(pos + 8);
146 var item = map.get(code);
147 if (item == null) continue;
148 var child = element.select('img').first();
149 if (child == null) continue;
150 var title = child.attr('alt');
151 var smimg = child.attr('src');
152 if (title.length() > 0) {
153 item.put('title', title);
154 }
155 item.put('small-image', smimg);
156 }
157
158 var keys = env.getKeys(map);
159 for (var i = 0; i < keys.size(); i++) {
160 try {
161 var item = map.get(keys.get(i));
162 alink = env.newURL(item.get('url'));
163 doc = env.newJsoup().parse(alink, 60000);
164 var element = doc.select('#detailImage img').first();
165 if (element != null) {
166 item.put('large-image', element.attr('src'));
167 }
168 element = doc.select('#productDescription').first();
169 if (element != null) {
170 var desc = element.html();
171 var pattern = '<h2>Product Description</h2>';
172 var pos = desc.indexOf(pattern);
173 if (pos >= 0) {
174 desc = desc.substring(pos + pattern.length);
175 }
176 var bdoc = env.newJsoup().parse(desc, item.get('url'));
177 buildURL(bdoc, item.get('url'), env);
178 desc = bdoc.select('body').first().html();
179 if (desc.indexOf('<html') < 0) {
180 item.put('description', desc);
181 }
182 }
183 element = doc.select('#productDetails').first();
184 if (element != null) {
185 var desc = element.html();
186 var pattern = '<h2>Product Details</h2>';
187 var pos = desc.indexOf(pattern);
188 if (pos >= 0) {
189 desc = desc.substring(pos + pattern.length);
190 }
191 var bdoc = env.newJsoup().parse(desc, item.get('url'));
192 buildURL(bdoc, item.get('url'), env);
193 desc = bdoc.select('body').first().html();
194 if (desc.indexOf('<html') < 0) {
195 item.put('details', desc);
196 }
197 }
198 element = doc.select('#editorialReviews').first();
199 if (element != null) {
200 var desc = element.html();
201 var bdoc = env.newJsoup().parse(desc, item.get('url') + '');
202 buildURL(bdoc, item.get('url'), env);
203 desc = bdoc.select('body').first().html();
204 if (desc.indexOf('<html') < 0) {
205 item.put('editorial-reviews', desc);
206 }
207 }
208 element = doc.select('#detailListPrice').first();
209 if (element != null) {
210 item.put('list-price', element.text());
211 }
212 element = doc.select('#detailOfferPrice').first();
213 if (element != null) {
214 item.put('offer-price', element.text());
215 }
216 element = doc.select('#addToCartForm a').first();
217 if (element != null) {
218 item.put('buy-url', element.attr('href'));
219 }
220 } catch (e) {
221 env.error(e);
222 }
223 }
224
225 for (var i = 0; i < keys.size(); i++) {
226 tag.add(map.get(keys.get(i)));
227 }
228 } catch (e) {
229 env.error(e);
230 }
231 }
232 return tag;
233}
234
235function buildURL(doc, baseUrl, env) {
236 baseUrl = env.newURL(baseUrl);
237 var elements = doc.select('a');
238 for (var i = 0; i < elements.size(); i++) {
239 var element = elements.get(i);
240 var url = env.newURL(baseUrl, element.attr('href'));
241 element.attr('href', url + '');
242 }
243 elements = doc.select('img');
244 for (var i = 0; i < elements.size(); i++) {
245 var element = elements.get(i);
246 var url = env.newURL(baseUrl, element.attr('src'));
247 element.attr('src', url + '');
248 }
249}
function main(env, args) {
  var no = 1;
  if (no == 1) {
    test01(env, args); // Grab products
  }
  if (no == 2) {
    test02(env, args); // List all products
  }
  if (no == 3) {
    test03(env, args); // Search products
  }
  if (no == 4) {
    test04(env, args); // Delete products
  }
}

function test04(env, args) {
  var entity = env.newEntity();
  var query = entity.newMatchAllDocsQuery();
  var products = entity.search('Link', query, 3, 1);
  for (var i = 0; i < products.size(); i++) {
    products.get(i).delete();
  }
}

function test03(env, args) {
  var term = 'Sleeping';

  var entity = env.newEntity();
  var query = entity.parseQuery([term, term], ['desc', 'title'], [entity.occurShould(), entity.occurShould()]);
  var size = entity.count('Link', query, 999999);
  var products = entity.search('Link', query, entity.newSort(org.apache.lucene.search.SortField.FIELD_SCORE), 999999);
  for (var i = 0; i < products.size(); i++) {
    var title = env.newString(products.get(i).getString('title').getBytes('UTF-8'), 'UTF-8');
    try {
      title = entity.highlight(query, title, 'title', 50, 3, ' (...) ');
    } catch (e) {
      env.error(e);
    }
    var desc = env.newString(products.get(i).getString('desc').getBytes('UTF-8'), 'UTF-8');
    try {
      desc = entity.highlight(query, desc, 'desc', 50, 3, ' (...) ');
    } catch (e) {
      env.error(e);
    }
    printProduct(products.get(i), env, desc, title);
  }
}

function test02(env, args) {
  var entity = env.newEntity();
  var query = entity.newMatchAllDocsQuery();
  var size = entity.count('Link', query, 999999);
  var products = entity.search('Link', query, 999999);
  env.info('Size: ' + size);
  for (var i = 0; i < products.size(); i++) {
    printProduct(products.get(i), env);
  }
}

function test01(env, args) {
  var astore = 'paesia';
  var node = '100';
  var maxpage = 2;
  var products = grabProduct(astore, node, maxpage, env);
  for (var i = 0; i < products.size(); i++) {
    var pro = products.get(i);
    saveProduct(pro, env);
  }
  env.info('Saved: ' + products.size());
}

function printProduct(pro, env, descH, titleH) {
  var line = '';
  line += '\nId: ' + pro.getId();
  line += '\nTitle: ' + pro.getString('title');
  line += '\nUrl: ' + pro.getString('url');
  line += '\nDescription: \n' + pro.getString('desc');
  if (titleH != null) {
    line += '\nTitle Highlight: \n' + titleH;
  }
  if (descH != null) {
    line += '\nDescription Highlight: \n' + descH;
  }
  env.info('\n' + line + '\n');
}

function saveProduct(pro, env) {
  var title = pro.get('title');
  var url = pro.get('url');
  if (title == null || title.length == 0 || url == null || url.length == 0) return;
  if (findProductByUrl(url, env)) return;
  var desc = pro.get('description');
  if (desc == null) desc = '';
  if (desc.length() > 0) {
    var doc = env.newJsoup().parse(desc);
    desc = doc.select('body').first().text();
  }
  var schema = 's|url|a|title|a|desc';
  var entity = env.newEntity();
  entity.setSchema(schema);
  entity.setKind('Link');
  entity.setId(env.uniqid());
  entity.setString('url', url);
  entity.setString('title', title);
  entity.setString('desc', desc);
  entity.save();
}

function findProductByUrl(url, env) {
  var entity = env.newEntity();
  var query = entity.newTermQuery(entity.newTerm('url', url));
  var size = entity.count('Link', query, 1);
  return (size > 0);
}

function grabProduct(astore, node, maxpage, env) {
  var tag = env.newArrayList();
  for (var no = 1; no <= maxpage; no++) {
    try {
      var alink = env.newURL('http://astore.amazon.com/' + astore + '-20?node=' + node + '&page=' + no);
      var doc = env.newJsoup().parse(alink, 60000);
      var elements = doc.select('#featuredProducts .textrow a');
      var map = env.newHashMap();
      for (var i = 0; i < elements.size(); i++) {
        var element = elements.get(i);
        var title = element.text();
        var url = element.attr('href');
        var pos = url.lastIndexOf('/detail/');
        if (pos < 0) continue;
        var code = url.substring(pos + 8);
        var url = env.newURL(alink, url) + '';
        var item = env.newHashMap();
        item.put('code', code);
        item.put('title', title);
        item.put('url', url);
        map.put(code, item);
      }
      elements = doc.select('#featuredProducts .imagerow a');
      for (var i = 0; i < elements.size(); i++) {
        var element = elements.get(i);
        var url = element.attr('href');
        var pos = url.lastIndexOf('/detail/');
        if (pos < 0) continue;
        var code = url.substring(pos + 8);
        var item = map.get(code);
        if (item == null) continue;
        var child = element.select('img').first();
        if (child == null) continue;
        var title = child.attr('alt');
        var smimg = child.attr('src');
        if (title.length() > 0) {
          item.put('title', title);
        }
        item.put('small-image', smimg);
      }

      var keys = env.getKeys(map);
      for (var i = 0; i < keys.size(); i++) {
        try {
          var item = map.get(keys.get(i));
          alink = env.newURL(item.get('url'));
          doc = env.newJsoup().parse(alink, 60000);
          var element = doc.select('#detailImage img').first();
          if (element != null) {
            item.put('large-image', element.attr('src'));
          }
          element = doc.select('#productDescription').first();
          if (element != null) {
            var desc = element.html();
            var pattern = '<h2>Product Description</h2>';
            var pos = desc.indexOf(pattern);
            if (pos >= 0) {
              desc = desc.substring(pos + pattern.length);
            }
            var bdoc = env.newJsoup().parse(desc, item.get('url'));
            buildURL(bdoc, item.get('url'), env);
            desc = bdoc.select('body').first().html();
            if (desc.indexOf('<html') < 0) {
              item.put('description', desc);
            }
          }
          element = doc.select('#productDetails').first();
          if (element != null) {
            var desc = element.html();
            var pattern = '<h2>Product Details</h2>';
            var pos = desc.indexOf(pattern);
            if (pos >= 0) {
              desc = desc.substring(pos + pattern.length);
            }
            var bdoc = env.newJsoup().parse(desc, item.get('url'));
            buildURL(bdoc, item.get('url'), env);
            desc = bdoc.select('body').first().html();
            if (desc.indexOf('<html') < 0) {
              item.put('details', desc);
            }
          }
          element = doc.select('#editorialReviews').first();
          if (element != null) {
            var desc = element.html();
            var bdoc = env.newJsoup().parse(desc, item.get('url') + '');
            buildURL(bdoc, item.get('url'), env);
            desc = bdoc.select('body').first().html();
            if (desc.indexOf('<html') < 0) {
              item.put('editorial-reviews', desc);
            }
          }
          element = doc.select('#detailListPrice').first();
          if (element != null) {
            item.put('list-price', element.text());
          }
          element = doc.select('#detailOfferPrice').first();
          if (element != null) {
            item.put('offer-price', element.text());
          }
          element = doc.select('#addToCartForm a').first();
          if (element != null) {
            item.put('buy-url', element.attr('href'));
          }
        } catch (e) {
          env.error(e);
        }
      }

      for (var i = 0; i < keys.size(); i++) {
        tag.add(map.get(keys.get(i)));
      }
    } catch (e) {
      env.error(e);
    }
  }
  return tag;
}

function buildURL(doc, baseUrl, env) {
  baseUrl = env.newURL(baseUrl);
  var elements = doc.select('a');
  for (var i = 0; i < elements.size(); i++) {
    var element = elements.get(i);
    var url = env.newURL(baseUrl, element.attr('href'));
    element.attr('href', url + '');
  }
  elements = doc.select('img');
  for (var i = 0; i < elements.size(); i++) {
    var element = elements.get(i);
    var url = env.newURL(baseUrl, element.attr('src'));
    element.attr('src', url + '');
  }
}

  Protected by Copyscape Online Copyright Protection

No comments:

Post a Comment