]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.db.indexing/src/org/simantics/db/indexing/IndexedRelationsSearcherBase.java
dad5ad07b13ea49011ea03d0d70b5a254c62d32c
[simantics/platform.git] / bundles / org.simantics.db.indexing / src / org / simantics / db / indexing / IndexedRelationsSearcherBase.java
1 /*******************************************************************************
2  * Copyright (c) 2007, 2015 Association for Decentralized Information Management
3  * in Industry THTH ry.
4  * All rights reserved. This program and the accompanying materials
5  * are made available under the terms of the Eclipse Public License v1.0
6  * which accompanies this distribution, and is available at
7  * http://www.eclipse.org/legal/epl-v10.html
8  *
9  * Contributors:
10  *     VTT Technical Research Centre of Finland - initial API and implementation
11  *     Semantum Oy - Fix for simantics issue #6053
12  *******************************************************************************/
13 package org.simantics.db.indexing;
14
15 import java.io.IOException;
16 import java.nio.file.Files;
17 import java.nio.file.Path;
18 import java.util.ArrayList;
19 import java.util.Collection;
20 import java.util.Collections;
21 import java.util.Iterator;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.concurrent.CompletableFuture;
25 import java.util.concurrent.Semaphore;
26 import java.util.concurrent.atomic.AtomicReference;
27
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.DocumentStoredFieldVisitor;
30 import org.apache.lucene.document.Field;
31 import org.apache.lucene.document.FieldType;
32 import org.apache.lucene.document.LongField;
33 import org.apache.lucene.document.TextField;
34 import org.apache.lucene.index.CorruptIndexException;
35 import org.apache.lucene.index.DirectoryReader;
36 import org.apache.lucene.index.FieldInfo;
37 import org.apache.lucene.index.IndexNotFoundException;
38 import org.apache.lucene.index.IndexReader;
39 import org.apache.lucene.index.IndexWriter;
40 import org.apache.lucene.index.IndexWriterConfig;
41 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
42 import org.apache.lucene.index.IndexableField;
43 import org.apache.lucene.index.StoredFieldVisitor;
44 import org.apache.lucene.index.Term;
45 import org.apache.lucene.queryparser.classic.ParseException;
46 import org.apache.lucene.search.IndexSearcher;
47 import org.apache.lucene.search.MatchAllDocsQuery;
48 import org.apache.lucene.search.Query;
49 import org.apache.lucene.search.ScoreDoc;
50 import org.apache.lucene.search.TermQuery;
51 import org.apache.lucene.search.TopDocs;
52 import org.apache.lucene.store.Directory;
53 import org.apache.lucene.store.FSDirectory;
54 import org.apache.lucene.util.Version;
55 import org.eclipse.core.runtime.IProgressMonitor;
56 import org.eclipse.core.runtime.SubMonitor;
57 import org.simantics.databoard.util.ObjectUtils;
58 import org.simantics.db.ReadGraph;
59 import org.simantics.db.RequestProcessor;
60 import org.simantics.db.Resource;
61 import org.simantics.db.Session;
62 import org.simantics.db.common.request.SafeName;
63 import org.simantics.db.common.utils.NameUtils;
64 import org.simantics.db.exception.DatabaseException;
65 import org.simantics.db.indexing.exception.IndexCorruptedException;
66 import org.simantics.db.indexing.exception.IndexingException;
67 import org.simantics.db.indexing.internal.IndexingJob;
68 import org.simantics.db.layer0.adapter.GenericRelation;
69 import org.simantics.db.layer0.genericrelation.IndexException;
70 import org.simantics.db.request.Read;
71 import org.simantics.db.service.CollectionSupport;
72 import org.simantics.db.service.SerialisationSupport;
73 import org.simantics.utils.FileUtils;
74 import org.simantics.utils.datastructures.Pair;
75 import org.simantics.utils.threads.ThreadUtils;
76 import org.slf4j.Logger;
77
78 import gnu.trove.map.hash.THashMap;
79
80 /**
81  * @author Tuukka Lehtonen
82  * @author Antti Villberg
83  */
84 abstract public class IndexedRelationsSearcherBase {
85
86     protected enum State {
87         // No index is available
88         NONE, 
89         // An index is available, but there is a problem with it
90         PROBLEM, 
91         // An index is available but no reader or writer is ready
92         READY,
93         // A reader is ready
94         READ, 
95         // A writer (and a reader) is ready
96         WRITE
97     }
98     
99     private State state = State.READY;
100     private Throwable exception;
101     
102     public Throwable getException() {
103         return exception;
104     }
105
106     public void setProblem(Throwable t) {
107         if (t != null)
108             getLogger().error("Setting problem for {} and previous state {}", this, this.state, t);
109         this.state = State.PROBLEM;
110         this.exception = t;
111     }
112     
113     public void setNone() {
114         this.state = State.NONE;
115     }
116     
117     public void setReady() {
118         this.state = State.READY;
119     }
120     
121     public State state() {
122         return state;
123     }
124     
125     protected boolean checkState(State state) {
126         return this.state == state;
127     }
128     
129     protected void assertState(State state) throws IndexException {
130         State s = this.state;
131         if (s != state)
132             throw new IndexException("Illegal index searcher state, expected " + state.name() + " but state was " + s.name());
133     }
134     
135     public void changeState(IProgressMonitor monitor, Session session, State state) {
136         changeState(monitor, session, state, 0);
137     }
138
139     protected void changeState(IProgressMonitor monitor, Session session, State state, int depth) {
140
141         if (this.state == state) {
142             if (getLogger().isDebugEnabled())
143                 getLogger().debug("Trying to change state {} to the same as previous state {} in depth {} with {}", state, this.state, depth, this);
144             return;
145         }
146
147         if (IndexPolicy.TRACE_INDEX_MANAGEMENT)
148                 System.err.println("Index state " + this.state.name() + " => " + state.name() + " " + this);
149
150         // Check transitions
151         
152         // Try to exit problem state
153         if (State.PROBLEM == this.state && depth > 0) {
154             getLogger().info("Try to exit problem state for {} and state {}", this, state);
155                 Throwable t = bestEffortClear(monitor, session);
156                 if(t != null) {
157                     getLogger().error("Best effort clear has failed for state {} and this {}", state, this, t);
158                                 exception = t;
159                                 return;
160                 }
161                 // Managed to get into initial state
162                 this.state = State.NONE;
163                 getLogger().info("Managed to get into initial state {}", this.state);
164                 return;
165         }
166
167         // Cannot move into read from no index
168         if (State.NONE ==  this.state && State.READ == state) {
169             if (getLogger().isDebugEnabled())
170                 getLogger().debug("Cannot move into read from no index in {} with state {}", this, state);
171             return;
172         }
173         // Cannot move into write from no index
174         if (State.NONE ==  this.state && State.WRITE == state) {
175             if (getLogger().isDebugEnabled())
176                 getLogger().debug("Cannot move into write from no index in {} with state {}", this, state);
177             return;
178         }
179         
180                 boolean success = false;
181
182         try {
183
184                 if (searcher != null) {
185                         searcher = null;
186                 }
187                 if (reader != null) {
188                         reader.close();
189                         reader = null;
190                 }
191                         closeWriter(writer);
192                         directory = null;
193                         
194                         success = true;
195
196                 // Enter new state
197                 if (State.READ == state || State.WRITE == state) {
198                         
199                         success = false;
200                         
201                         boolean forWriting = State.WRITE == state;
202
203                         if (directory != null)
204                                 throw new IllegalStateException(getDescriptor() + "Index already loaded");
205
206                         SubMonitor mon = SubMonitor.convert(monitor, 100);
207
208                         mon.beginTask("Loading index", 100);
209
210                         if (IndexPolicy.TRACE_INDEX_LOAD)
211                     System.out.println(getDescriptor() + "Loading Lucene index from " + indexPath + " for " + (forWriting ? "writing" : "reading"));
212
213                 long start = System.nanoTime();
214
215                 directory = getDirectory(session);
216
217                 if (forWriting) {
218                     // Never overwrite an index that is about to be loaded.
219                     // TODO: could use OpenMode.CREATE_OR_APPEND but must test first
220                     IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, Queries.getAnalyzer()).setOpenMode(OpenMode.APPEND);
221                     try {
222                         // FIXME: platform #4676
223                         writer = new IndexWriter(directory, config);
224                     } catch (IndexNotFoundException e) {
225                         // There was no pre-existing index on disk. Create it now.
226                         writer = new IndexWriter(directory, config.setOpenMode(OpenMode.CREATE));
227                         writer.commit();
228                     }
229                     reader = DirectoryReader.open(directory);
230                     searcher = new IndexSearcher(reader);
231                 } else {
232                     reader = DirectoryReader.open(directory);
233                     searcher = new IndexSearcher(reader);
234                 }
235
236                 long end = System.nanoTime();
237
238                 mon.worked(100);
239
240                 if (IndexPolicy.PERF_INDEX_LOAD) {
241                     double time = (end - start) * 1e-6;
242                     System.out.println(getDescriptor() + "Loaded Lucene index from " + indexPath + " for " + (forWriting ? "writing" : "reading") + " in " + time + " ms");
243                 }
244
245                 success = true;
246                 
247                 }
248                 
249         } catch (Throwable t) {
250                 setProblem(t);
251         } finally {
252
253                 if(!success) {
254                         this.state = State.PROBLEM;
255                         changeState(monitor, session, State.NONE, depth+1);
256                         return;
257                 }
258
259         }
260
261         this.state = state;
262         
263     }
264
265     public static final FieldType STRING_TYPE = new FieldType();
266
267     static {
268       STRING_TYPE.setIndexed(true);
269       STRING_TYPE.setStored(true);
270       STRING_TYPE.setTokenized(true);
271       STRING_TYPE.freeze();
272     }
273
274     protected static Field makeField(String fieldName, String fieldClass) throws IndexingException {
275         switch (fieldClass) {
276         case "Long":   return new LongField(fieldName, 0L, Field.Store.YES);
277         case "String": return new Field    (fieldName, "", STRING_TYPE);
278         case "Text":   return new TextField(fieldName, "", Field.Store.YES);
279         default:
280             throw new IndexingException("Can only index Long, String and Text fields, encountered field type " + fieldClass);
281         }
282     }
283
284     protected static Field[] makeFieldsForRelation(GenericRelation r, int boundLength, Document document) throws DatabaseException {
285         Pair<String, String>[] fields = r.getFields();
286         Field[] fs = new Field[Math.max(0, fields.length - boundLength)];
287         for (int i = boundLength; i < fields.length; i++) {
288             Field f = makeField(fields[i].first, fields[i].second);
289             fs[i - boundLength] = f;
290             if (document != null)
291                 document.add(f);
292         }
293         return fs;
294     }
295
296     void insertIndex(IProgressMonitor monitor, GenericRelation r, int boundLength, Collection<Object[]> documentsData)
297     throws CorruptIndexException, IOException, DatabaseException {
298         assertAccessOpen(true);
299
300         if (IndexPolicy.TRACE_INDEX_UPDATE)
301             System.out.println(getDescriptor() + "Inserting " + documentsData.size() + " documents into index at " + indexPath);
302
303         long start = 0, end = 0;
304         if (IndexPolicy.PERF_INDEX_UPDATE)
305             start = System.nanoTime();
306
307         try {
308             Document document = new Document();
309             Field[] fs = makeFieldsForRelation(r, boundLength, document);
310
311             for (Object[] documentData : documentsData) {
312                 if (setFields(fs, documentData) == null)
313                     continue;
314
315                 if (IndexPolicy.TRACE_INDEX_UPDATE)
316                     System.out.println(getDescriptor() + "Inserting document " + document);
317
318                 writer.addDocument(document);
319             }
320
321             if (IndexPolicy.PERF_INDEX_UPDATE) {
322                 end = System.nanoTime();
323                 double ms = (end - start) * 1e-6;
324                 System.out.println(getDescriptor() + "Inserted " + documentsData.size() + " documents into index at " + indexPath + " in " + ms + " ms");
325             }
326
327         } finally {
328         }
329     }
330
331     void removeIndex(IProgressMonitor monitor, GenericRelation r, RequestProcessor processor, String key, Collection<Object> keyValues) throws DatabaseException, CorruptIndexException, IOException {
332         assertAccessOpen(true);
333
334         if (IndexPolicy.TRACE_INDEX_UPDATE)
335             System.out.println(getDescriptor() + "Removing " + keyValues.size() + " documents from index at " + indexPath);
336
337         long start = 0, end = 0;
338         if (IndexPolicy.PERF_INDEX_UPDATE)
339             start = System.nanoTime();
340
341         try {
342             for (Object keyValue : keyValues) {
343                 Term removedTerm = null;
344                 if (keyValue instanceof Long) {
345                     removedTerm = IndexUtils.longTerm(key, (Long) keyValue);
346                 } else if (keyValue instanceof String) {
347                     removedTerm = new Term(key, (String) keyValue);
348                 } else {
349                     // FIXME: should throw an exception for illegal input data but this would leave the index in an incoherent state
350                     getLogger().error("Attempting to remove document from index of {} with key {} and unrecognized key value type {} : {}", input, key, keyValue, keyValue != null ? keyValue.getClass() : "null");
351                     continue;
352                 }
353
354                 if (IndexPolicy.TRACE_INDEX_UPDATE)
355                     System.out.println(getDescriptor() + "Removing document with key " + removedTerm);
356                 writer.deleteDocuments(removedTerm);
357             }
358
359             if (IndexPolicy.PERF_INDEX_UPDATE) {
360                 end = System.nanoTime();
361                 double ms = (end - start) * 1e-6;
362                 System.out.println(getDescriptor() + "Removed " + keyValues.size() + " documents from index at " + indexPath + " in " + ms + " ms");
363             }
364
365         } finally {
366         }
367     }
368
369     void removeIndex(IProgressMonitor monitor) throws DatabaseException, CorruptIndexException, IOException {
370         assertAccessOpen(true);
371
372         long start = 0, end = 0;
373         if (IndexPolicy.PERF_INDEX_UPDATE)
374             start = System.nanoTime();
375
376         try {
377
378             writer.deleteAll();
379
380             if (IndexPolicy.PERF_INDEX_UPDATE) {
381                 end = System.nanoTime();
382                 double ms = (end - start) * 1e-6;
383                 System.out.println(getDescriptor() + "Removed all documents from index at " + indexPath + " in " + ms + " ms");
384             }
385
386         } finally {
387         }
388     }
389     
390     boolean replaceIndex(IProgressMonitor monitor, String key, Collection<Object> keyValues, GenericRelation r, int boundLength, Collection<Object[]> documentsData) throws CorruptIndexException, IOException, DatabaseException {
391
392         boolean didReplace = false;
393         
394         assertAccessOpen(true);
395         if (keyValues.size() != documentsData.size())
396             throw new IllegalArgumentException("keyValues size does not match documents data size, " + keyValues.size() + " <> " + documentsData.size());
397
398         if (IndexPolicy.TRACE_INDEX_UPDATE)
399             System.out.println(getDescriptor() + "Replacing " + keyValues.size() + " documents from index at " + indexPath);
400
401         long start = 0, end = 0;
402         if (IndexPolicy.PERF_INDEX_UPDATE)
403             start = System.nanoTime();
404
405         try {
406             Iterator<Object> keyIt = keyValues.iterator();
407             Iterator<Object[]> documentDataIt = documentsData.iterator();
408
409             Document document = new Document();
410             Field[] fs = makeFieldsForRelation(r, boundLength, document);
411
412             nextDocument:
413                 while (keyIt.hasNext()) {
414                     Object keyValue = keyIt.next();
415                     Object[] documentData = documentDataIt.next();
416
417                     Term removedTerm = null;
418                     if (keyValue instanceof Long) {
419                         removedTerm = IndexUtils.longTerm(key, (Long) keyValue);
420                     } else if (keyValue instanceof String) {
421                         removedTerm = new Term(key, (String) keyValue);
422                     } else {
423                         // FIXME: should throw an exception for illegal input data but this would leave the index in an incoherent state
424                         System.err.println("[" + getClass().getSimpleName() + "] Unrecognized document key to remove '" + keyValue + "', only " + String.class + " and " + Resource.class + " are supported.");
425                         continue nextDocument;
426                     }
427
428                     if (setFields(fs, documentData) == null)
429                         continue nextDocument;
430
431                     if (IndexPolicy.TRACE_INDEX_UPDATE)
432                         System.out.println(getDescriptor() + "Replacing document with key " + removedTerm + " with " + document);
433
434                     boolean done = false;
435                     if(requireChangeInfoOnReplace()) {
436                             TopDocs exist = searcher.search(new TermQuery(removedTerm), null, 2);
437                             if(exist.scoreDocs.length == 1) {
438                                 Document doc = reader.document(exist.scoreDocs[0].doc);
439                                 if(!areSame(doc, document)) {
440                                     writer.deleteDocuments(removedTerm);
441                                     writer.addDocument(document);
442                                     didReplace |= true;
443                                     if (IndexPolicy.TRACE_INDEX_UPDATE)
444                                         System.out.println("-replaced single existing");
445                                 } else {
446                                     if (IndexPolicy.TRACE_INDEX_UPDATE)
447                                         System.out.println("-was actually same than single existing");
448                                 }
449                                 done = true;
450                             } 
451                     }
452                     if(!done) {
453                         writer.deleteDocuments(removedTerm);
454                         writer.addDocument(document);
455                         didReplace |= true;
456                         if (IndexPolicy.TRACE_INDEX_UPDATE)
457                                 System.out.println("-had many or none - removed all existing");
458                     }
459                     
460                 }
461
462             if (IndexPolicy.PERF_INDEX_UPDATE) {
463                 end = System.nanoTime();
464                 double ms = (end - start) * 1e-6;
465                 System.out.println(getDescriptor() + "Replaced " + keyValues.size() + " documents from index at " + indexPath + " in " + ms + " ms");
466             }
467
468         } finally {
469         }
470         
471         return didReplace;
472         
473     }
474     
475     protected boolean requireChangeInfoOnReplace() {
476         return true;
477     }
478     
479     private boolean areSame(Document d1, Document d2) {
480         List<IndexableField> fs1 = d1.getFields();
481         List<IndexableField> fs2 = d2.getFields();
482         if(fs1.size() != fs2.size()) return false;
483         for(int i=0;i<fs1.size();i++) {
484                 IndexableField f1 = fs1.get(i);
485                 IndexableField f2 = fs2.get(i);
486                 String s1 = f1.stringValue();
487                 String s2 = f2.stringValue();
488             if (IndexPolicy.TRACE_INDEX_UPDATE)
489                 System.err.println("areSame " + s1 + " vs. " + s2 );
490                 if(!ObjectUtils.objectEquals(s1,s2)) return false;
491         }
492         return true;
493     }
494
495     final RequestProcessor session;
496
497     final Resource         relation;
498
499     /**
500      * The schema of the index, i.e. the fields that will be indexed per
501      * document for the specified relation. Since the relation stays the same
502      * throughout the lifetime of this class, the index schema is also assumed
503      * to the same. This means that {@link GenericRelation#getFields()} is
504      * assumed to stay the same.
505      */
506     final IndexSchema      schema;
507
508     final Resource         input;
509
510     Path             indexPath;
511
512     Directory        directory;
513
514     IndexReader      reader;
515
516     IndexWriter      writer;
517
518     IndexSearcher    searcher;
519
520     IndexedRelationsSearcherBase(RequestProcessor session, Resource relation, Resource input) {
521         this.session = session;
522         this.relation = relation;
523         this.input = input;
524         this.indexPath = getIndexDirectory(session.getSession(), relation, input);
525         if(isIndexAvailable()) {
526                 state = State.READY;
527         } else {
528                 state = State.NONE;
529         }
530         this.schema = IndexSchema.readFromRelation(session, relation);
531     }
532
533     public Resource getRelation() {
534         return relation;
535     }
536
537     public Resource getInput() {
538         return input;
539     }
540
541     Directory getDirectory(Session session) throws IOException {
542         return FSDirectory.open(indexPath.toFile());
543     }
544
545     abstract String getDescriptor();
546     
547     /**
548      * Ensures that searcher is in read or write state.
549      * 
550      * @param forWriting <code>true</code> to open index for writing,
551      *        <code>false</code> for reading
552      * @return true is required state was reached       
553      *        
554      */
555     boolean startAccess(IProgressMonitor monitor, Session session, boolean forWriting) {
556         if(forWriting) {
557                 changeState(monitor, session, State.WRITE);
558                 return checkState(State.WRITE);
559         } else {
560                 changeState(monitor, session, State.READ);
561                 return checkState(State.READ);
562         }
563     }
564
565     boolean hasAccess(boolean forWriting) {
566         
567         if (forWriting)
568                 return checkState(State.WRITE); 
569         else
570                 return checkState(State.WRITE) || checkState(State.READ);
571         
572     }
573     
574     void assertAccessOpen(boolean forWriting) {
575         if (forWriting)
576                 if(!checkState(State.WRITE)) 
577                 throw new IllegalStateException("index not opened for writing (directory=" + directory + ", reader=" + reader + ")");
578         else
579                 if(!(checkState(State.WRITE) || checkState(State.READ))) 
580                 throw new IllegalStateException("index not opened for reading (directory=" + directory + ", writer=" + writer + ")");
581     }
582     
583     void closeWriter(IndexWriter writer) throws CorruptIndexException, IOException {
584         if (writer == null)
585             return;
586
587         try {
588             // May throw OOME, see IndexWriter javadoc for the correct actions.
589             writer.close(false);
590         } catch (OutOfMemoryError e) {
591             writer.close();
592             throw e;
593         }
594     }
595
596     public static String getPattern(GenericRelation relation, int boundCount) {
597         String result = "";
598         for (int i = 0; i < boundCount; i++)
599             result += "b";
600         for (int i = 0; i < relation.getFields().length - boundCount; i++)
601             result += "f";
602         return result;
603     }
604
605     void initializeIndex(IProgressMonitor monitor, ReadGraph graph, Object[] bound, boolean overwrite)
606             throws IOException, DatabaseException
607     {
608         IndexingJob.jobifyIfPossible(
609                 monitor,
610                 "Reindexing " + NameUtils.getSafeLabel(graph, input),
611                 mon -> {
612                     try {
613                         GenericRelation r = graph.adapt(relation, GenericRelation.class);
614                         if (r == null)
615                             throw new IndexingException("Given resource " + relation + "could not be adapted to GenericRelation.");
616
617                         GenericRelation selection = r.select(getPattern(r, bound.length), bound);
618
619                         List<Object[]> results = selection.realize(graph);
620                         initializeIndexImpl(new CompletableFuture<>(), mon, r, results, bound, overwrite);
621                     } catch (IOException e) {
622                         getLogger().error("Index is in problematic state! {}", this, e);
623                         throw new IndexingException(e);
624                     }
625                 });
626     }
627
628     private static final int INDEXING_THREAD_COUNT = 2; // this is quite good parallelism level for lucene
629
630     void initializeIndexImpl(CompletableFuture<?> result, IProgressMonitor monitor, GenericRelation r, List<Object[]> results, final Object[] bound, boolean overwrite) throws IOException {
631         try {
632             final SubMonitor mon = SubMonitor.convert(monitor, 100);
633     
634             if (IndexPolicy.TRACE_INDEX_INIT)
635                 System.out.println(getDescriptor() + "Initializing index at " + indexPath + " (overwrite = " + overwrite + ")");
636             mon.beginTask("Initializing Index", 100);
637     
638             if (overwrite) {
639                 if (Files.exists(indexPath)) {
640                     mon.subTask("Erasing previous index");
641                     if (getLogger().isDebugEnabled())
642                         getLogger().debug("Erasing previous index {}", indexPath.toAbsolutePath());
643                     FileUtils.emptyDirectory(indexPath);
644                 }
645             }
646     
647             final AtomicReference<FSDirectory> directory = new AtomicReference<FSDirectory>();
648             final AtomicReference<IndexWriter> writer = new AtomicReference<IndexWriter>();
649     
650             try {
651                 mon.subTask("Start index write");
652                 Files.createDirectories(indexPath);
653     
654                 directory.set(FSDirectory.open(indexPath.toFile()));
655                 IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_9, Queries.getAnalyzer()).setOpenMode(OpenMode.CREATE);
656                 writer.set(new IndexWriter(directory.get(), conf));
657     
658                 mon.worked(5);
659     
660                 long realizeStart = 0;
661                 if (IndexPolicy.PERF_INDEX_INIT)
662                     realizeStart = System.nanoTime();
663     
664                 mon.subTask("Calculating indexed content");
665                 mon.worked(5);
666                 
667                 mon.worked(40);
668     
669                 if (IndexPolicy.PERF_INDEX_INIT)
670                     System.out.println(getDescriptor() + "Realized index with " + results.size() + " entries at " + indexPath + " in " + (1e-9 * (System.nanoTime()-realizeStart)) + " seconds.");
671     
672                 if (IndexPolicy.TRACE_INDEX_INIT)
673                     System.out.println(getDescriptor() + "Indexed relation " + r + " produced " + results.size() + " results");
674                 
675                 long start = IndexPolicy.PERF_INDEX_INIT ? System.nanoTime() : 0;
676     
677                 mon.subTask("Indexing content");
678                 final Semaphore s = new Semaphore(0);
679                 mon.setWorkRemaining(results.size());
680                 for (int i = 0; i < INDEXING_THREAD_COUNT; i++) {
681                     final int startIndex = i;
682                     ThreadUtils.getBlockingWorkExecutor().submit(() -> {
683                         try {
684                             Document document = new Document();
685                             Field[] fs = makeFieldsForRelation(r, bound.length, document);
686     
687                             for (int index = startIndex; index < results.size(); index += INDEXING_THREAD_COUNT) {
688                                 if (setFields(fs, results.get(index)) == null)
689                                     continue;
690                                 try {
691                                     writer.get().addDocument(document);
692                                 } catch (CorruptIndexException e) {
693                                     getLogger().error("Index is corrupted! {}", this, e);
694                                     throw new IllegalStateException(e);
695                                 } catch (IOException e) {
696                                     getLogger().error("Index is in problematic state! {}", this, e);
697                                     throw new IllegalStateException(e);
698                                 } finally {
699                                     synchronized (mon) {
700                                         mon.worked(1);
701                                     }
702                                 }
703                             }
704                         } catch (DatabaseException e) {
705                             throw new IllegalStateException(e);
706                         } finally {
707                             s.release();
708                         }
709                     });
710                 }
711     
712                 try {
713                     s.acquire(INDEXING_THREAD_COUNT);
714                 } catch (InterruptedException e) {
715                     getLogger().error("Could not initialize index {}", this, e);
716                 }
717     
718                 // http://www.gossamer-threads.com/lists/lucene/java-dev/47895
719                 // and http://lucene.apache.org/java/docs/index.html#27+November+2011+-+Lucene+Core+3.5.0
720                 // advise against calling optimize at all. So let's not do it anymore.
721                 //writer.get().optimize();
722                 //writer.get().commit();
723     
724                 mon.subTask("Flushing");
725     
726                 if (IndexPolicy.PERF_INDEX_INIT)
727                     System.out.println(getDescriptor() + "Wrote index at " + indexPath + " in " + (1e-9 * (System.nanoTime()-start)) + " seconds.");
728                 
729                 result.complete(null);
730     //        } catch (DatabaseException e) {
731     //            getLogger().error("Could not initialize index due to db {}", this, e);
732             } finally {
733                 try {
734                     closeWriter(writer.getAndSet(null));
735                 } finally {
736                     FileUtils.uncheckedClose(directory.getAndSet(null));
737                 }
738             }
739         } catch (Throwable t) {
740             getLogger().error("Could not initialize index", t);
741             result.completeExceptionally(t);
742         }
743     }
744
745     
746     public List<Object[]> debugDocs(IProgressMonitor monitor) throws ParseException, IOException, IndexingException {
747     
748             Query query = new MatchAllDocsQuery(); 
749         
750             TopDocs td = searcher.search(query, Integer.MAX_VALUE);
751     
752             ScoreDoc[ ] scoreDocs = td.scoreDocs; 
753             List<Object[]> result = new ArrayList<Object[]>(scoreDocs.length);
754         
755             for(ScoreDoc scoreDoc:scoreDocs) {
756         
757                 try {
758         
759                     Document doc = reader.document(scoreDoc.doc);
760                     List<IndexableField> fs = doc.getFields();
761                     Object[] o = new Object[fs.size()];
762                     int index = 0; 
763                     for (IndexableField f : fs) {
764                     o[index++] = f.stringValue();
765                     }
766                     result.add(o);
767         
768             } catch (CorruptIndexException e) {
769                 getLogger().error("Index is corrupted! {}", this, e);
770                 throw new IndexCorruptedException("Index is corrupted! " + this, e);
771             } catch (IOException e) {
772                 getLogger().error("Index is in problematic state! {}", this, e);
773                 throw new IndexingException(e);
774             }
775
776             }
777             
778             return result;
779             
780     }
781
782     
783     List<Map<String, Object>> doSearch(IProgressMonitor monitor, RequestProcessor processor, String search, int maxResultCount) throws ParseException, IOException,
784     IndexingException {
785
786         // An empty search string will crash QueryParser
787         // Just return no results for empty queries.
788         //System.out.println("search: '" + search + "'");
789         if (search.isEmpty())
790             return Collections.emptyList();
791
792         assertAccessOpen(false);
793
794         Query query = Queries.parse(search, schema);
795
796         long start = System.nanoTime();
797
798         maxResultCount = Math.min(maxResultCount, searcher.getIndexReader().numDocs());
799         if (maxResultCount == 0)
800             return Collections.emptyList();
801         
802         final TopDocs docs = searcher.search(query, null, maxResultCount);
803         
804 //        for(Object[] o : debugDocs(monitor)) {
805 //            System.err.println("-" + Arrays.toString(o));
806 //        }
807         
808         if (IndexPolicy.PERF_INDEX_QUERY) {
809             long end = System.nanoTime();
810             System.out.println(getDescriptor() + "search(" + search + ", " + maxResultCount + ") into index at " + indexPath + " took " + (1e-9 * (end-start)) + " seconds.");
811         }
812
813         if (docs.totalHits == 0) {
814             return Collections.emptyList();
815         }
816
817         try {
818             return processor.syncRequest(new Read<List<Map<String, Object>>>() {
819
820                 @Override
821                 public List<Map<String, Object>> perform(ReadGraph graph) throws DatabaseException {
822
823                     GenericRelation r = graph.adapt(relation, GenericRelation.class);
824                     if (r == null)
825                         throw new IndexingException("Given resource " + graph.syncRequest(new SafeName(relation))
826                                 + "could not be adapted to GenericRelation.");
827
828                     SerialisationSupport support = graph.getService(SerialisationSupport.class);
829
830                     List<Map<String, Object>> result = new ArrayList<Map<String, Object>>(docs.scoreDocs.length);
831                     
832                     final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
833                     
834                     for (ScoreDoc scoreDoc : docs.scoreDocs) {
835
836                         try {
837
838                             reader.document(scoreDoc.doc, visitor);
839                             
840                             Document doc = visitor.getDocument();
841
842                             List<IndexableField> fs = doc.getFields();
843                             Map<String, Object> entry = new THashMap<String, Object>(fs.size());
844                             for (IndexableField f : fs) {
845                                 IndexSchema.Type type = schema.typeMap.get(f.name());
846                                 if (type == IndexSchema.Type.LONG) {
847                                     entry.put(f.name(), support.getResource(f.numericValue().longValue()));
848                                 } else {
849                                     entry.put(f.name(), f.stringValue());
850                                 }
851                             }
852                             
853                             result.add(entry);
854
855                         } catch (CorruptIndexException e) {
856                             getLogger().error("Index is corrupted! {}", this, e);
857                             throw new IndexCorruptedException("Index is corrupted! " + " " + this + " " + scoreDoc, e);
858                         } catch (IOException e) {
859                             getLogger().error("Index is in problematic state! {}", this, e);
860                             throw new IndexingException(e);
861                         }
862                     }
863                     return result;
864                 }
865             });
866         } catch (DatabaseException e) {
867             if (e instanceof IndexingException) {
868                 throw (IndexingException) e;
869             } else {
870                 throw new IndexingException(e);
871             }
872         }
873     }
874
875     static class ResourceVisitor extends StoredFieldVisitor {
876         
877         public long id;
878
879                 @Override
880                 public Status needsField(FieldInfo fieldInfo) throws IOException {
881                         if("Resource".equals(fieldInfo.name)) return Status.YES;
882                         return Status.NO;
883                 }
884                 
885                 @Override
886                 public void longField(FieldInfo fieldInfo, long value) throws IOException {
887                         id = value;
888                 }
889         
890     };
891     
892     static class DumpVisitor extends StoredFieldVisitor {
893
894         public List<Object> values;
895         
896         DumpVisitor(List<Object> values) {
897                 this.values = values;
898         }
899
900                 @Override
901                 public Status needsField(FieldInfo fieldInfo) throws IOException {
902                         return Status.YES;
903                 }
904                 
905                 @Override
906                 public void longField(FieldInfo fieldInfo, long value) throws IOException {
907                         values.add(value);
908                 }
909                 
910                 @Override
911                 public void stringField(FieldInfo fieldInfo, String value) throws IOException {
912                         values.add(value);
913                 }
914
915     }
916
917     List<Resource> doSearchResources(IProgressMonitor monitor, RequestProcessor processor, String search, int maxResultCount) throws ParseException, IOException,
918     IndexingException {
919
920         // An empty search string will crash QueryParser
921         // Just return no results for empty queries.
922         //System.out.println("search: '" + search + "'");
923         if (search.isEmpty())
924             return Collections.emptyList();
925
926         assertAccessOpen(false);
927
928         Query query = Queries.parse(search, schema);
929
930         long start = System.nanoTime();
931
932         maxResultCount = Math.min(maxResultCount, searcher.getIndexReader().numDocs());
933         if (maxResultCount == 0)
934             return Collections.emptyList();
935         
936         final TopDocs docs = searcher.search(query, null, maxResultCount);
937         
938 //        for(Object[] o : debugDocs(monitor)) {
939 //            System.err.println("-" + Arrays.toString(o));
940 //        }
941         
942         if (IndexPolicy.PERF_INDEX_QUERY) {
943             long end = System.nanoTime();
944             System.out.println(getDescriptor() + "search(" + search + ", " + maxResultCount + ") into index at " + indexPath + " took " + (1e-9 * (end-start)) + " seconds.");
945         }
946
947         if (docs.totalHits == 0) {
948             return Collections.emptyList();
949         }
950         
951         try {
952             return processor.syncRequest(new Read<List<Resource>>() {
953
954                 @Override
955                 public List<Resource> perform(ReadGraph graph) throws DatabaseException {
956
957                         CollectionSupport cs = graph.getService(CollectionSupport.class);
958                     SerialisationSupport support = graph.getService(SerialisationSupport.class);
959                     
960                         List<Resource> result = cs.createList();
961                     
962                         ResourceVisitor visitor = new ResourceVisitor();
963                     
964                     for (ScoreDoc scoreDoc : docs.scoreDocs) {
965                         try {
966                                 reader.document(scoreDoc.doc, visitor);
967                                 result.add(support.getResource(visitor.id));
968                         } catch (CorruptIndexException e) {
969                             getLogger().error("Index is corrupted! {}", this, e);
970                             throw new IndexCorruptedException("Index is corrupted! " + " " + this + " " + scoreDoc, e);
971                         } catch (IOException e) {
972                             getLogger().error("Index is in problematic state! {}", this, e);
973                             throw new IndexingException(e);
974                         }
975                     }
976                     return result;
977                 }
978             });
979         } catch (DatabaseException e) {
980             if (e instanceof IndexingException) {
981                 throw (IndexingException) e;
982             } else {
983                 throw new IndexingException(e);
984             }
985         }
986     }
987
988     List<Object> doList(IProgressMonitor monitor, RequestProcessor processor) throws ParseException, IOException,
989     IndexingException {
990
991         assertAccessOpen(false);
992
993         Query query = new MatchAllDocsQuery(); 
994
995         final TopDocs docs = searcher.search(query, Integer.MAX_VALUE);
996         
997         ArrayList<Object> result = new ArrayList<Object>();
998         
999         DumpVisitor visitor = new DumpVisitor(result);
1000                 
1001         for (ScoreDoc scoreDoc : docs.scoreDocs) {
1002
1003                 try {
1004
1005                         reader.document(scoreDoc.doc, visitor);
1006
1007                 } catch (CorruptIndexException e) {
1008                     getLogger().error("Index is corrupted! {}", this, e);
1009                     throw new IndexCorruptedException("Index is corrupted! " + " " + this + " " + scoreDoc, e);
1010                 } catch (IOException e) {
1011                     getLogger().error("Index is in problematic state! {}", this, e);
1012                         throw new IndexingException(e);
1013                 }
1014
1015         }
1016
1017         return result;
1018
1019     }
1020     
1021     protected static Path getIndexDirectory(Session session, Resource relation, Resource input) {
1022         Path path = DatabaseIndexing.getIndexLocation(session, relation, input);
1023 //        System.out.println("getIndexDirectory = " + path);
1024         return path;
1025     }
1026
1027     Path getIndexPath() {
1028         return indexPath;
1029     }
1030
1031     boolean isIndexAvailable() {
1032         return Files.isDirectory(indexPath);
1033     }
1034
1035     abstract Throwable bestEffortClear(IProgressMonitor monitor, Session session);
1036
1037     /*
1038      * Start from scratch. Clear all caches and rebuild the index. 
1039      */
1040     Throwable clearDirectory(IProgressMonitor monitor, Session session) {
1041         
1042                 Path file = getIndexPath();
1043
1044         try {
1045                         FileUtils.delete(file);
1046         } catch (Throwable t) {
1047                 getLogger().error("Could not delete directory {}", file.toAbsolutePath(), t);
1048                 return t;
1049         }
1050         if (Files.exists(file))
1051             return new IllegalStateException("Failed to delete directory " + file.toAbsolutePath());
1052         return null;
1053     }
1054
1055     private Field[] setFields(Field[] fs, Object[] result) {
1056         for (int i = 0; i < result.length; i++) {
1057             Object value = result[i];
1058             if (value instanceof String) {
1059                 if (IndexPolicy.DEBUG_INDEX_INIT)
1060                     System.out.println(getDescriptor() + "index " + fs[i].name() + " = " + value + " : String");
1061                 fs[i].setStringValue((String) value);
1062             } else if (value instanceof Long) {
1063                 if (IndexPolicy.DEBUG_INDEX_INIT)
1064                     System.out.println(getDescriptor() + "index " + fs[i].name() + " = " + value + " : Long");
1065                 fs[i].setLongValue((Long) value);
1066             } else {
1067                 getLogger().error("Can only index Long and String fields, encountered " + value);
1068                 return null;
1069             }
1070         }
1071         return fs;
1072     }
1073
1074     protected abstract Logger getLogger();
1075     
1076     @Override
1077     public String toString() {
1078         return getClass().getSimpleName() + " [" + String.valueOf(schema) + ", " + String.valueOf(relation) + ", " + String.valueOf(input) + ", " + String.valueOf(indexPath) + ", " + String.valueOf(directory) + ", " + String.valueOf(state) + "]";
1079     }
1080 }