]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.db.indexing/src/org/simantics/db/indexing/IndexedRelationsSearcherBase.java
c70650ede27bb92c86222654b22e2f039fd12d8a
[simantics/platform.git] / bundles / org.simantics.db.indexing / src / org / simantics / db / indexing / IndexedRelationsSearcherBase.java
1 /*******************************************************************************
2  * Copyright (c) 2007, 2015 Association for Decentralized Information Management
3  * in Industry THTH ry.
4  * All rights reserved. This program and the accompanying materials
5  * are made available under the terms of the Eclipse Public License v1.0
6  * which accompanies this distribution, and is available at
7  * http://www.eclipse.org/legal/epl-v10.html
8  *
9  * Contributors:
10  *     VTT Technical Research Centre of Finland - initial API and implementation
11  *     Semantum Oy - Fix for simantics issue #6053
12  *******************************************************************************/
13 package org.simantics.db.indexing;
14
15 import java.io.IOException;
16 import java.nio.file.Files;
17 import java.nio.file.Path;
18 import java.util.ArrayList;
19 import java.util.Collection;
20 import java.util.Collections;
21 import java.util.Iterator;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.concurrent.ExecutorService;
25 import java.util.concurrent.Executors;
26 import java.util.concurrent.Semaphore;
27 import java.util.concurrent.ThreadFactory;
28 import java.util.concurrent.atomic.AtomicReference;
29
30 import org.apache.lucene.document.Document;
31 import org.apache.lucene.document.DocumentStoredFieldVisitor;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.document.FieldType;
34 import org.apache.lucene.document.LongField;
35 import org.apache.lucene.document.TextField;
36 import org.apache.lucene.index.CorruptIndexException;
37 import org.apache.lucene.index.DirectoryReader;
38 import org.apache.lucene.index.FieldInfo;
39 import org.apache.lucene.index.IndexNotFoundException;
40 import org.apache.lucene.index.IndexReader;
41 import org.apache.lucene.index.IndexWriter;
42 import org.apache.lucene.index.IndexWriterConfig;
43 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
44 import org.apache.lucene.index.IndexableField;
45 import org.apache.lucene.index.StoredFieldVisitor;
46 import org.apache.lucene.index.Term;
47 import org.apache.lucene.queryparser.classic.ParseException;
48 import org.apache.lucene.search.IndexSearcher;
49 import org.apache.lucene.search.MatchAllDocsQuery;
50 import org.apache.lucene.search.Query;
51 import org.apache.lucene.search.ScoreDoc;
52 import org.apache.lucene.search.TermQuery;
53 import org.apache.lucene.search.TopDocs;
54 import org.apache.lucene.store.Directory;
55 import org.apache.lucene.store.FSDirectory;
56 import org.apache.lucene.util.Version;
57 import org.eclipse.core.runtime.IProgressMonitor;
58 import org.eclipse.core.runtime.SubMonitor;
59 import org.simantics.databoard.util.ObjectUtils;
60 import org.simantics.db.ReadGraph;
61 import org.simantics.db.RequestProcessor;
62 import org.simantics.db.Resource;
63 import org.simantics.db.Session;
64 import org.simantics.db.common.request.SafeName;
65 import org.simantics.db.common.utils.NameUtils;
66 import org.simantics.db.exception.DatabaseException;
67 import org.simantics.db.indexing.internal.IndexingJob;
68 import org.simantics.db.layer0.adapter.GenericRelation;
69 import org.simantics.db.request.Read;
70 import org.simantics.db.service.CollectionSupport;
71 import org.simantics.db.service.SerialisationSupport;
72 import org.simantics.utils.FileUtils;
73 import org.simantics.utils.datastructures.Pair;
74 import org.slf4j.Logger;
75
76 import gnu.trove.map.hash.THashMap;
77
78 /**
79  * @author Tuukka Lehtonen
80  * @author Antti Villberg
81  */
82 abstract public class IndexedRelationsSearcherBase {
83
84     protected enum State {
85         // No index is available
86         NONE, 
87         // An index is available, but there is a problem with it
88         PROBLEM, 
89         // An index is available but no reader or writer is ready
90         READY,
91         // A reader is ready
92         READ, 
93         // A writer (and a reader) is ready
94         WRITE
95     }
96     
97     private State state = State.READY;
98     private Throwable exception;
99     
100     public Throwable getException() {
101         return exception;
102     }
103
104     public void setProblem(Throwable t) {
105         if (t != null)
106             getLogger().error("Setting problem for {} and previous state {}", this, this.state, t);
107         this.state = State.PROBLEM;
108         this.exception = t;
109     }
110     
111     public void setNone() {
112         this.state = State.NONE;
113     }
114     
115     public void setReady() {
116         this.state = State.READY;
117     }
118     
119     protected boolean checkState(State state) {
120         return this.state == state;
121     }
122     
123     protected void assertState(State state) throws AssertionError {
124
125         if(this.state != state) throw new AssertionError("Illegal state, expected " + state.name() + " but was in " + this.state.name());
126         
127     }
128     
129     public void changeState(IProgressMonitor monitor, Session session, State state) {
130         changeState(monitor, session, state, 0);
131     }
132
133     protected void changeState(IProgressMonitor monitor, Session session, State state, int depth) {
134
135         if (this.state == state) {
136             if (getLogger().isDebugEnabled())
137                 getLogger().debug("Trying to change state {} to the same as previous state {} in depth {} with {}", state, this.state, depth, this);
138             return;
139         }
140
141         if (IndexPolicy.TRACE_INDEX_MANAGEMENT)
142                 System.err.println("Index state " + this.state.name() + " => " + state.name() + " " + this);
143
144         // Check transitions
145         
146         // Try to exit problem state
147         if (State.PROBLEM == this.state && depth > 0) {
148             getLogger().info("Try to exit problem state for {} and state {}", this, state);
149                 Throwable t = bestEffortClear(monitor, session);
150                 if(t != null) {
151                     getLogger().error("Best effort clear has failed for state {} and this {}", state, this, t);
152                                 exception = t;
153                                 return;
154                 }
155                 // Managed to get into initial state
156                 this.state = State.NONE;
157                 getLogger().info("Managed to get into initial state {}", this.state);
158                 return;
159         }
160
161         // Cannot move into read from no index
162         if (State.NONE ==  this.state && State.READ == state) {
163             if (getLogger().isDebugEnabled())
164                 getLogger().debug("Cannot move into read from no index in {} with state {}", this, state);
165             return;
166         }
167         // Cannot move into write from no index
168         if (State.NONE ==  this.state && State.WRITE == state) {
169             if (getLogger().isDebugEnabled())
170                 getLogger().debug("Cannot move into write from no index in {} with state {}", this, state);
171             return;
172         }
173         
174                 boolean success = false;
175
176         try {
177
178                 if (searcher != null) {
179                         searcher = null;
180                 }
181                 if (reader != null) {
182                         reader.close();
183                         reader = null;
184                 }
185                         closeWriter(writer);
186                         directory = null;
187                         
188                         success = true;
189
190                 // Enter new state
191                 if (State.READ == state || State.WRITE == state) {
192                         
193                         success = false;
194                         
195                         boolean forWriting = State.WRITE == state;
196
197                         if (directory != null)
198                                 throw new IllegalStateException(getDescriptor() + "Index already loaded");
199
200                         SubMonitor mon = SubMonitor.convert(monitor, 100);
201
202                         mon.beginTask("Loading index", 100);
203
204                         if (IndexPolicy.TRACE_INDEX_LOAD)
205                     System.out.println(getDescriptor() + "Loading Lucene index from " + indexPath + " for " + (forWriting ? "writing" : "reading"));
206
207                 long start = System.nanoTime();
208
209                 directory = getDirectory(session);
210
211                 if (forWriting) {
212                     // Never overwrite an index that is about to be loaded.
213                     // TODO: could use OpenMode.CREATE_OR_APPEND but must test first
214                     IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, Queries.getAnalyzer()).setOpenMode(OpenMode.APPEND);
215                     try {
216                         // FIXME: platform #4676
217                         writer = new IndexWriter(directory, config);
218                     } catch (IndexNotFoundException e) {
219                         // There was no pre-existing index on disk. Create it now.
220                         writer = new IndexWriter(directory, config.setOpenMode(OpenMode.CREATE));
221                         writer.commit();
222                     }
223                     reader = DirectoryReader.open(directory);
224                     searcher = new IndexSearcher(reader);
225                 } else {
226                     reader = DirectoryReader.open(directory);
227                     searcher = new IndexSearcher(reader);
228                 }
229
230                 long end = System.nanoTime();
231
232                 mon.worked(100);
233
234                 if (IndexPolicy.PERF_INDEX_LOAD) {
235                     double time = (end - start) * 1e-6;
236                     System.out.println(getDescriptor() + "Loaded Lucene index from " + indexPath + " for " + (forWriting ? "writing" : "reading") + " in " + time + " ms");
237                 }
238
239                 success = true;
240                 
241                 }
242                 
243         } catch (Throwable t) {
244                 setProblem(t);
245         } finally {
246
247                 if(!success) {
248                         this.state = State.PROBLEM;
249                         changeState(monitor, session, State.NONE, depth+1);
250                         return;
251                 }
252
253         }
254
255         this.state = state;
256         
257     }
258
259     public static final FieldType STRING_TYPE = new FieldType();
260
261     static {
262       STRING_TYPE.setIndexed(true);
263       STRING_TYPE.setStored(true);
264       STRING_TYPE.setTokenized(true);
265       STRING_TYPE.freeze();
266     }
267
268     protected static Field makeField(String fieldName, String fieldClass) throws DatabaseException {
269         switch (fieldClass) {
270         case "Long":   return new LongField(fieldName, 0L, Field.Store.YES);
271         case "String": return new Field    (fieldName, "", STRING_TYPE);
272         case "Text":   return new TextField(fieldName, "", Field.Store.YES);
273         default:
274             throw new DatabaseException("Can only index Long, String and Text fields, encountered field type " + fieldClass);
275         }
276     }
277
278     protected static Field[] makeFieldsForRelation(GenericRelation r, int boundLength, Document document) throws DatabaseException {
279         Pair<String, String>[] fields = r.getFields();
280         Field[] fs = new Field[Math.max(0, fields.length - boundLength)];
281         for (int i = boundLength; i < fields.length; i++) {
282             Field f = makeField(fields[i].first, fields[i].second);
283             fs[i - boundLength] = f;
284             if (document != null)
285                 document.add(f);
286         }
287         return fs;
288     }
289
290     void insertIndex(IProgressMonitor monitor, GenericRelation r, int boundLength, Collection<Object[]> documentsData)
291     throws CorruptIndexException, IOException, DatabaseException {
292         assertAccessOpen(true);
293
294         if (IndexPolicy.TRACE_INDEX_UPDATE)
295             System.out.println(getDescriptor() + "Inserting " + documentsData.size() + " documents into index at " + indexPath);
296
297         long start = 0, end = 0;
298         if (IndexPolicy.PERF_INDEX_UPDATE)
299             start = System.nanoTime();
300
301         try {
302             Document document = new Document();
303             Field[] fs = makeFieldsForRelation(r, boundLength, document);
304
305             for (Object[] documentData : documentsData) {
306                 if (setFields(fs, documentData) == null)
307                     continue;
308
309                 if (IndexPolicy.TRACE_INDEX_UPDATE)
310                     System.out.println(getDescriptor() + "Inserting document " + document);
311
312                 writer.addDocument(document);
313             }
314
315             if (IndexPolicy.PERF_INDEX_UPDATE) {
316                 end = System.nanoTime();
317                 double ms = (end - start) * 1e-6;
318                 System.out.println(getDescriptor() + "Inserted " + documentsData.size() + " documents into index at " + indexPath + " in " + ms + " ms");
319             }
320
321         } finally {
322         }
323     }
324
325     void removeIndex(IProgressMonitor monitor, GenericRelation r, RequestProcessor processor, String key, Collection<Object> keyValues) throws DatabaseException, CorruptIndexException, IOException {
326         assertAccessOpen(true);
327
328         if (IndexPolicy.TRACE_INDEX_UPDATE)
329             System.out.println(getDescriptor() + "Removing " + keyValues.size() + " documents from index at " + indexPath);
330
331         long start = 0, end = 0;
332         if (IndexPolicy.PERF_INDEX_UPDATE)
333             start = System.nanoTime();
334
335         try {
336             for (Object keyValue : keyValues) {
337                 Term removedTerm = null;
338                 if (keyValue instanceof Long) {
339                     removedTerm = IndexUtils.longTerm(key, (Long) keyValue);
340                 } else if (keyValue instanceof String) {
341                     removedTerm = new Term(key, (String) keyValue);
342                 } else {
343                     // FIXME: should throw an exception for illegal input data but this would leave the index in an incoherent state
344                     continue;
345                 }
346
347                 if (IndexPolicy.TRACE_INDEX_UPDATE)
348                     System.out.println(getDescriptor() + "Removing document with key " + removedTerm);
349                 writer.deleteDocuments(removedTerm);
350             }
351
352             if (IndexPolicy.PERF_INDEX_UPDATE) {
353                 end = System.nanoTime();
354                 double ms = (end - start) * 1e-6;
355                 System.out.println(getDescriptor() + "Removed " + keyValues.size() + " documents from index at " + indexPath + " in " + ms + " ms");
356             }
357
358         } finally {
359         }
360     }
361
362     void removeIndex(IProgressMonitor monitor) throws DatabaseException, CorruptIndexException, IOException {
363         assertAccessOpen(true);
364
365         long start = 0, end = 0;
366         if (IndexPolicy.PERF_INDEX_UPDATE)
367             start = System.nanoTime();
368
369         try {
370
371             writer.deleteAll();
372
373             if (IndexPolicy.PERF_INDEX_UPDATE) {
374                 end = System.nanoTime();
375                 double ms = (end - start) * 1e-6;
376                 System.out.println(getDescriptor() + "Removed all documents from index at " + indexPath + " in " + ms + " ms");
377             }
378
379         } finally {
380         }
381     }
382     
383     boolean replaceIndex(IProgressMonitor monitor, String key, Collection<Object> keyValues, GenericRelation r, int boundLength, Collection<Object[]> documentsData) throws CorruptIndexException, IOException, DatabaseException {
384
385         boolean didReplace = false;
386         
387         assertAccessOpen(true);
388         if (keyValues.size() != documentsData.size())
389             throw new IllegalArgumentException("keyValues size does not match documents data size, " + keyValues.size() + " <> " + documentsData.size());
390
391         if (IndexPolicy.TRACE_INDEX_UPDATE)
392             System.out.println(getDescriptor() + "Replacing " + keyValues.size() + " documents from index at " + indexPath);
393
394         long start = 0, end = 0;
395         if (IndexPolicy.PERF_INDEX_UPDATE)
396             start = System.nanoTime();
397
398         try {
399             Iterator<Object> keyIt = keyValues.iterator();
400             Iterator<Object[]> documentDataIt = documentsData.iterator();
401
402             Document document = new Document();
403             Field[] fs = makeFieldsForRelation(r, boundLength, document);
404
405             nextDocument:
406                 while (keyIt.hasNext()) {
407                     Object keyValue = keyIt.next();
408                     Object[] documentData = documentDataIt.next();
409
410                     Term removedTerm = null;
411                     if (keyValue instanceof Long) {
412                         removedTerm = IndexUtils.longTerm(key, (Long) keyValue);
413                     } else if (keyValue instanceof String) {
414                         removedTerm = new Term(key, (String) keyValue);
415                     } else {
416                         // FIXME: should throw an exception for illegal input data but this would leave the index in an incoherent state
417                         System.err.println("[" + getClass().getSimpleName() + "] Unrecognized document key to remove '" + keyValue + "', only " + String.class + " and " + Resource.class + " are supported.");
418                         continue nextDocument;
419                     }
420
421                     if (setFields(fs, documentData) == null)
422                         continue nextDocument;
423
424                     if (IndexPolicy.TRACE_INDEX_UPDATE)
425                         System.out.println(getDescriptor() + "Replacing document with key " + removedTerm + " with " + document);
426
427                     boolean done = false;
428                     if(requireChangeInfoOnReplace()) {
429                             TopDocs exist = searcher.search(new TermQuery(removedTerm), null, 2);
430                             if(exist.scoreDocs.length == 1 && requireChangeInfoOnReplace()) {
431                                 Document doc = reader.document(exist.scoreDocs[0].doc);
432                                 if(!areSame(doc, document)) {
433                                     writer.deleteDocuments(removedTerm);
434                                     writer.addDocument(document);
435                                     didReplace |= true;
436                                     if (IndexPolicy.TRACE_INDEX_UPDATE)
437                                         System.out.println("-replaced single existing");
438                                 } else {
439                                     if (IndexPolicy.TRACE_INDEX_UPDATE)
440                                         System.out.println("-was actually same than single existing");
441                                 }
442                                 done = true;
443                             } 
444                     }
445                     if(!done) {
446                         writer.deleteDocuments(removedTerm);
447                         writer.addDocument(document);
448                         didReplace |= true;
449                         if (IndexPolicy.TRACE_INDEX_UPDATE)
450                                 System.out.println("-had many or none - removed all existing");
451                     }
452                     
453                 }
454
455             if (IndexPolicy.PERF_INDEX_UPDATE) {
456                 end = System.nanoTime();
457                 double ms = (end - start) * 1e-6;
458                 System.out.println(getDescriptor() + "Replaced " + keyValues.size() + " documents from index at " + indexPath + " in " + ms + " ms");
459             }
460
461         } finally {
462         }
463         
464         return didReplace;
465         
466     }
467     
468     protected boolean requireChangeInfoOnReplace() {
469         return true;
470     }
471     
472     private boolean areSame(Document d1, Document d2) {
473         List<IndexableField> fs1 = d1.getFields();
474         List<IndexableField> fs2 = d2.getFields();
475         if(fs1.size() != fs2.size()) return false;
476         for(int i=0;i<fs1.size();i++) {
477                 IndexableField f1 = fs1.get(i);
478                 IndexableField f2 = fs2.get(i);
479                 String s1 = f1.stringValue();
480                 String s2 = f2.stringValue();
481             if (IndexPolicy.TRACE_INDEX_UPDATE)
482                 System.err.println("areSame " + s1 + " vs. " + s2 );
483                 if(!ObjectUtils.objectEquals(s1,s2)) return false;
484         }
485         return true;
486     }
487
488     final RequestProcessor session;
489
490     final Resource         relation;
491
492     /**
493      * The schema of the index, i.e. the fields that will be indexed per
494      * document for the specified relation. Since the relation stays the same
495      * throughout the lifetime of this class, the index schema is also assumed
496      * to the same. This means that {@link GenericRelation#getFields()} is
497      * assumed to stay the same.
498      */
499     final IndexSchema      schema;
500
501     Resource         input;
502
503     Path             indexPath;
504
505     Directory        directory;
506
507     IndexReader      reader;
508
509     IndexWriter      writer;
510
511     IndexSearcher    searcher;
512
513     IndexedRelationsSearcherBase(RequestProcessor session, Resource relation, Resource input) {
514         this.session = session;
515         this.relation = relation;
516         this.input = input;
517         this.indexPath = getIndexDirectory(session.getSession(), relation, input);
518         if(isIndexAvailable()) {
519                 state = State.READY;
520         } else {
521                 state = State.NONE;
522         }
523         this.schema = IndexSchema.readFromRelation(session, relation);
524     }
525
526     Directory getDirectory(Session session) throws IOException {
527         return FSDirectory.open(indexPath.toFile());
528     }
529
530     abstract String getDescriptor();
531     
532     /**
533      * Ensures that searcher is in read or write state.
534      * 
535      * @param forWriting <code>true</code> to open index for writing,
536      *        <code>false</code> for reading
537      * @return true is required state was reached       
538      *        
539      */
540     boolean startAccess(IProgressMonitor monitor, Session session, boolean forWriting) {
541         if(forWriting) {
542                 changeState(monitor, session, State.WRITE);
543                 return checkState(State.WRITE);
544         } else {
545                 changeState(monitor, session, State.READ);
546                 return checkState(State.READ);
547         }
548     }
549
550     boolean hasAccess(boolean forWriting) {
551         
552         if (forWriting)
553                 return checkState(State.WRITE); 
554         else
555                 return checkState(State.WRITE) || checkState(State.READ);
556         
557     }
558     
559     void assertAccessOpen(boolean forWriting) {
560         if (forWriting)
561                 if(!checkState(State.WRITE)) 
562                 throw new IllegalStateException("index not opened for writing (directory=" + directory + ", reader=" + reader + ")");
563         else
564                 if(!(checkState(State.WRITE) || checkState(State.READ))) 
565                 throw new IllegalStateException("index not opened for reading (directory=" + directory + ", writer=" + writer + ")");
566     }
567     
568     void closeWriter(IndexWriter writer) throws CorruptIndexException, IOException {
569         if (writer == null)
570             return;
571
572         try {
573             // May throw OOME, see IndexWriter javadoc for the correct actions.
574             writer.close(false);
575         } catch (OutOfMemoryError e) {
576             writer.close();
577             throw e;
578         }
579     }
580
581     private static String getPattern(GenericRelation relation, int boundCount) {
582         String result = "";
583         for (int i = 0; i < boundCount; i++)
584             result += "b";
585         for (int i = 0; i < relation.getFields().length - boundCount; i++)
586             result += "f";
587         return result;
588     }
589     
590     private static final int INDEXING_THREAD_COUNT = 2;
591     
592     private static final ExecutorService executor = Executors.newFixedThreadPool(INDEXING_THREAD_COUNT, new ThreadFactory() {
593         @Override
594         public Thread newThread(Runnable r) {
595             Thread t = new Thread(r, "Lucene Index Creator");
596             if (!t.isDaemon())
597                 t.setDaemon(true);
598             if (t.getPriority() != Thread.NORM_PRIORITY)
599                 t.setPriority(Thread.NORM_PRIORITY);
600             return t;
601         }
602     });
603
604     void initializeIndex(IProgressMonitor monitor, ReadGraph graph, Object[] bound, boolean overwrite)
605             throws IOException, DatabaseException
606     {
607         IndexingJob.jobifyIfPossible(
608                 monitor,
609                 "Reindexing " + NameUtils.getSafeLabel(graph, input),
610                 mon -> {
611                     try {
612                         initializeIndexImpl(mon, graph, bound, overwrite);
613                     } catch (IOException e) {
614                         getLogger().error("Index is in problematic state! {}", this, e);
615                         throw new DatabaseException(e);
616                     }
617                 });
618     }
619
620     void initializeIndexImpl(IProgressMonitor monitor, ReadGraph graph, final Object[] bound, boolean overwrite) throws IOException,
621     DatabaseException {
622
623         final SubMonitor mon = SubMonitor.convert(monitor, 100);
624
625         if (IndexPolicy.TRACE_INDEX_INIT)
626             System.out.println(getDescriptor() + "Initializing index at " + indexPath + " (overwrite = " + overwrite + ")");
627         mon.beginTask("Initializing Index", 100);
628
629         if (overwrite) {
630             if (Files.exists(indexPath)) {
631                 mon.subTask("Erasing previous index");
632                 if (getLogger().isDebugEnabled())
633                     getLogger().debug("Erasing previous index {}", indexPath.toAbsolutePath());
634                 FileUtils.delete(indexPath);
635             }
636         }
637
638         final AtomicReference<FSDirectory> directory = new AtomicReference<FSDirectory>();
639         final AtomicReference<IndexWriter> writer = new AtomicReference<IndexWriter>();
640
641         try {
642             mon.subTask("Start index write");
643             createDirectory(indexPath);
644
645             directory.set(FSDirectory.open(indexPath.toFile()));
646             IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_9, Queries.getAnalyzer()).setOpenMode(OpenMode.CREATE);
647             writer.set(new IndexWriter(directory.get(), conf));
648
649             mon.worked(5);
650
651             final GenericRelation r = graph.adapt(relation, GenericRelation.class);
652             if (r == null)
653                 throw new DatabaseException("Given resource " + graph.syncRequest(new SafeName(relation))
654                 + "could not be adapted to GenericRelation.");
655
656             long realizeStart = 0;
657             if (IndexPolicy.PERF_INDEX_INIT)
658                 realizeStart = System.nanoTime();
659
660             mon.subTask("Calculating indexed content");
661             GenericRelation selection = r.select(getPattern(r, bound.length), bound);
662             mon.worked(5);
663             List<Object[]> results = selection.realize(graph);
664             mon.worked(40);
665
666             if (IndexPolicy.PERF_INDEX_INIT)
667                 System.out.println(getDescriptor() + "Realized index with " + results.size() + " entries at " + indexPath + " in " + (1e-9 * (System.nanoTime()-realizeStart)) + " seconds.");
668             if (IndexPolicy.TRACE_INDEX_INIT)
669                 System.out.println(getDescriptor() + "Indexed relation " + r + " produced " + results.size() + " results");
670
671             long start = IndexPolicy.PERF_INDEX_INIT ? System.nanoTime() : 0;
672
673             mon.subTask("Indexing content");
674             final Semaphore s = new Semaphore(0);
675             mon.setWorkRemaining(results.size());
676
677             for (int i = 0; i < INDEXING_THREAD_COUNT; i++) {
678                 final int startIndex = i;
679                 executor.submit(() -> {
680                     try {
681                         Document document = new Document();
682                         Field[] fs = makeFieldsForRelation(r, bound.length, document);
683
684                         for (int index = startIndex; index < results.size(); index += INDEXING_THREAD_COUNT) {
685                             if (setFields(fs, results.get(index)) == null)
686                                 continue;
687                             try {
688                                 writer.get().addDocument(document);
689                             } catch (CorruptIndexException e) {
690                                 getLogger().error("Index is corrupted! {}", this, e);
691                                 throw new IllegalStateException(e);
692                             } catch (IOException e) {
693                                 getLogger().error("Index is in problematic state! {}", this, e);
694                                 throw new IllegalStateException(e);
695                             } finally {
696                                 synchronized (mon) {
697                                     mon.worked(1);
698                                 }
699                             }
700                         }
701
702                         s.release();
703                     } catch (DatabaseException e) {
704                         throw new IllegalStateException(e);
705                     }
706                 });
707             }
708
709             try {
710                 s.acquire(INDEXING_THREAD_COUNT);
711             } catch (InterruptedException e) {
712                 getLogger().error("Could not initialize index {}", this, e);
713             }
714
715             // http://www.gossamer-threads.com/lists/lucene/java-dev/47895
716             // and http://lucene.apache.org/java/docs/index.html#27+November+2011+-+Lucene+Core+3.5.0
717             // advise against calling optimize at all. So let's not do it anymore.
718             //writer.get().optimize();
719             //writer.get().commit();
720
721             mon.subTask("Flushing");
722
723             if (IndexPolicy.PERF_INDEX_INIT)
724                 System.out.println(getDescriptor() + "Wrote index at " + indexPath + " in " + (1e-9 * (System.nanoTime()-start)) + " seconds.");
725
726         } catch (DatabaseException e) {
727             getLogger().error("Could not initialize index due to db {}", this, e);
728         } finally {
729             try {
730                 closeWriter(writer.getAndSet(null));
731             } finally {
732                 FileUtils.uncheckedClose(directory.getAndSet(null));
733             }
734         }
735     }
736
737     
738     public List<Object[]> debugDocs(IProgressMonitor monitor) throws ParseException, IOException, DatabaseException {
739     
740             Query query = new MatchAllDocsQuery(); 
741         
742             TopDocs td = searcher.search(query, Integer.MAX_VALUE);
743     
744             ScoreDoc[ ] scoreDocs = td.scoreDocs; 
745             List<Object[]> result = new ArrayList<Object[]>(scoreDocs.length);
746         
747             for(ScoreDoc scoreDoc:scoreDocs) {
748         
749                 try {
750         
751                     Document doc = reader.document(scoreDoc.doc);
752                     List<IndexableField> fs = doc.getFields();
753                     Object[] o = new Object[fs.size()];
754                     int index = 0; 
755                     for (IndexableField f : fs) {
756                     o[index++] = f.stringValue();
757                     }
758                     result.add(o);
759         
760             } catch (CorruptIndexException e) {
761                 getLogger().error("Index is corrupted! {}", this, e);
762                 throw new DatabaseException(e);
763             } catch (IOException e) {
764                 getLogger().error("Index is in problematic state! {}", this, e);
765                 throw new DatabaseException(e);
766             }
767
768             }
769             
770             return result;
771             
772     }
773
774     
775     List<Map<String, Object>> doSearch(IProgressMonitor monitor, RequestProcessor processor, String search, int maxResultCount) throws ParseException, IOException,
776     DatabaseException {
777
778         // An empty search string will crash QueryParser
779         // Just return no results for empty queries.
780         //System.out.println("search: '" + search + "'");
781         if (search.isEmpty())
782             return Collections.emptyList();
783
784         assertAccessOpen(false);
785
786         Query query = Queries.parse(search, schema);
787
788         long start = System.nanoTime();
789
790         maxResultCount = Math.min(maxResultCount, searcher.getIndexReader().numDocs());
791         if (maxResultCount == 0)
792             return Collections.emptyList();
793         
794         final TopDocs docs = searcher.search(query, null, maxResultCount);
795         
796 //        for(Object[] o : debugDocs(monitor)) {
797 //            System.err.println("-" + Arrays.toString(o));
798 //        }
799         
800         if (IndexPolicy.PERF_INDEX_QUERY) {
801             long end = System.nanoTime();
802             System.out.println(getDescriptor() + "search(" + search + ", " + maxResultCount + ") into index at " + indexPath + " took " + (1e-9 * (end-start)) + " seconds.");
803         }
804
805         if (docs.totalHits == 0) {
806             return Collections.emptyList();
807         }
808
809         return processor.syncRequest(new Read<List<Map<String, Object>>>() {
810
811             @Override
812             public List<Map<String, Object>> perform(ReadGraph graph) throws DatabaseException {
813
814                 GenericRelation r = graph.adapt(relation, GenericRelation.class);
815                 if (r == null)
816                     throw new DatabaseException("Given resource " + graph.syncRequest(new SafeName(relation))
817                             + "could not be adapted to GenericRelation.");
818
819                 SerialisationSupport support = graph.getService(SerialisationSupport.class);
820
821                 List<Map<String, Object>> result = new ArrayList<Map<String, Object>>(docs.scoreDocs.length);
822                 
823                 final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
824                 
825                 for (ScoreDoc scoreDoc : docs.scoreDocs) {
826
827                     try {
828
829                         reader.document(scoreDoc.doc, visitor);
830                         
831                         Document doc = visitor.getDocument();
832
833                         List<IndexableField> fs = doc.getFields();
834                         Map<String, Object> entry = new THashMap<String, Object>(fs.size());
835                         for (IndexableField f : fs) {
836                             IndexSchema.Type type = schema.typeMap.get(f.name());
837                             if (type == IndexSchema.Type.LONG) {
838                                 entry.put(f.name(), support.getResource(f.numericValue().longValue()));
839                             } else {
840                                 entry.put(f.name(), f.stringValue());
841                             }
842                         }
843                         
844                         result.add(entry);
845
846                     } catch (CorruptIndexException e) {
847                         getLogger().error("Index is corrupted! {}", this, e);
848                         throw new DatabaseException(e);
849                     } catch (IOException e) {
850                         getLogger().error("Index is in problematic state! {}", this, e);
851                         throw new DatabaseException(e);
852                     }
853                 }
854                 return result;
855             }
856         });
857     }
858
859     static class ResourceVisitor extends StoredFieldVisitor {
860         
861         public long id;
862
863                 @Override
864                 public Status needsField(FieldInfo fieldInfo) throws IOException {
865                         if("Resource".equals(fieldInfo.name)) return Status.YES;
866                         return Status.NO;
867                 }
868                 
869                 @Override
870                 public void longField(FieldInfo fieldInfo, long value) throws IOException {
871                         id = value;
872                 }
873         
874     };
875     
876     static class DumpVisitor extends StoredFieldVisitor {
877
878         public List<Object> values;
879         
880         DumpVisitor(List<Object> values) {
881                 this.values = values;
882         }
883
884                 @Override
885                 public Status needsField(FieldInfo fieldInfo) throws IOException {
886                         return Status.YES;
887                 }
888                 
889                 @Override
890                 public void longField(FieldInfo fieldInfo, long value) throws IOException {
891                         values.add(value);
892                 }
893                 
894                 @Override
895                 public void stringField(FieldInfo fieldInfo, String value) throws IOException {
896                         values.add(value);
897                 }
898
899     }
900
901     List<Resource> doSearchResources(IProgressMonitor monitor, RequestProcessor processor, String search, int maxResultCount) throws ParseException, IOException,
902     DatabaseException {
903
904         // An empty search string will crash QueryParser
905         // Just return no results for empty queries.
906         //System.out.println("search: '" + search + "'");
907         if (search.isEmpty())
908             return Collections.emptyList();
909
910         assertAccessOpen(false);
911
912         Query query = Queries.parse(search, schema);
913
914         long start = System.nanoTime();
915
916         maxResultCount = Math.min(maxResultCount, searcher.getIndexReader().numDocs());
917         if (maxResultCount == 0)
918             return Collections.emptyList();
919         
920         final TopDocs docs = searcher.search(query, null, maxResultCount);
921         
922 //        for(Object[] o : debugDocs(monitor)) {
923 //            System.err.println("-" + Arrays.toString(o));
924 //        }
925         
926         if (IndexPolicy.PERF_INDEX_QUERY) {
927             long end = System.nanoTime();
928             System.out.println(getDescriptor() + "search(" + search + ", " + maxResultCount + ") into index at " + indexPath + " took " + (1e-9 * (end-start)) + " seconds.");
929         }
930
931         if (docs.totalHits == 0) {
932             return Collections.emptyList();
933         }
934         
935         return processor.syncRequest(new Read<List<Resource>>() {
936
937             @Override
938             public List<Resource> perform(ReadGraph graph) throws DatabaseException {
939
940                 CollectionSupport cs = graph.getService(CollectionSupport.class);
941                 SerialisationSupport support = graph.getService(SerialisationSupport.class);
942                 
943                 List<Resource> result = cs.createList();
944                 
945                 ResourceVisitor visitor = new ResourceVisitor();
946                 
947                 for (ScoreDoc scoreDoc : docs.scoreDocs) {
948                     try {
949                         reader.document(scoreDoc.doc, visitor);
950                         result.add(support.getResource(visitor.id));
951                     } catch (CorruptIndexException e) {
952                         getLogger().error("Index is corrupted! {}", this, e);
953                         throw new DatabaseException(e);
954                     } catch (IOException e) {
955                         getLogger().error("Index is in problematic state! {}", this, e);
956                         throw new DatabaseException(e);
957                     }
958                 }
959                 return result;
960             }
961         });
962     }
963
964     List<Object> doList(IProgressMonitor monitor, RequestProcessor processor) throws ParseException, IOException,
965     DatabaseException {
966
967         assertAccessOpen(false);
968
969         Query query = new MatchAllDocsQuery(); 
970
971         final TopDocs docs = searcher.search(query, Integer.MAX_VALUE);
972         
973         ArrayList<Object> result = new ArrayList<Object>();
974         
975         DumpVisitor visitor = new DumpVisitor(result);
976                 
977         for (ScoreDoc scoreDoc : docs.scoreDocs) {
978
979                 try {
980
981                         reader.document(scoreDoc.doc, visitor);
982
983                 } catch (CorruptIndexException e) {
984                     getLogger().error("Index is corrupted! {}", this, e);
985                         throw new DatabaseException(e);
986                 } catch (IOException e) {
987                     getLogger().error("Index is in problematic state! {}", this, e);
988                         throw new DatabaseException(e);
989                 }
990
991         }
992
993         return result;
994
995     }
996     
997     protected static Path getIndexDirectory(Session session, Resource relation, Resource input) {
998         Path path = DatabaseIndexing.getIndexLocation(session, relation, input);
999 //        System.out.println("getIndexDirectory = " + path);
1000         return path;
1001     }
1002
1003     private static void createDirectory(Path path) throws IOException {
1004         if (Files.exists(path) && !Files.isDirectory(path))
1005             throw new IOException("Could not create index directory " + path + ", a file by that name already exists");
1006         Files.createDirectories(path);
1007     }
1008
1009     Path getIndexPath() {
1010         return indexPath;
1011     }
1012
1013     boolean isIndexAvailable() {
1014         return (Files.exists(indexPath) && Files.isDirectory(indexPath));
1015     }
1016     
1017     abstract Throwable bestEffortClear(IProgressMonitor monitor, Session session);
1018
1019     /*
1020      * Start from scratch. Clear all caches and rebuild the index. 
1021      */
1022     Throwable clearDirectory(IProgressMonitor monitor, Session session) {
1023         
1024                 Path file = getIndexPath();
1025
1026         try {
1027                         FileUtils.delete(file);
1028         } catch (Throwable t) {
1029                 getLogger().error("Could not delete directory {}", file.toAbsolutePath(), t);
1030                 return t;
1031         }
1032         if (Files.exists(file))
1033             return new IllegalStateException("Failed to delete directory " + file.toAbsolutePath());
1034         return null;
1035     }
1036
1037     private Field[] setFields(Field[] fs, Object[] result) {
1038         for (int i = 0; i < result.length; i++) {
1039             Object value = result[i];
1040             if (value instanceof String) {
1041                 if (IndexPolicy.DEBUG_INDEX_INIT)
1042                     System.out.println(getDescriptor() + "index " + fs[i].name() + " = " + value + " : String");
1043                 fs[i].setStringValue((String) value);
1044             } else if (value instanceof Long) {
1045                 if (IndexPolicy.DEBUG_INDEX_INIT)
1046                     System.out.println(getDescriptor() + "index " + fs[i].name() + " = " + value + " : Long");
1047                 fs[i].setLongValue((Long) value);
1048             } else {
1049                 getLogger().error("Can only index Long and String fields, encountered " + value);
1050                 return null;
1051             }
1052         }
1053         return fs;
1054     }
1055
1056     protected abstract Logger getLogger();
1057     
1058     @Override
1059     public String toString() {
1060         return getClass().getSimpleName() + " [" + String.valueOf(schema) + ", " + String.valueOf(relation) + ", " + String.valueOf(input) + ", " + String.valueOf(indexPath) + ", " + String.valueOf(directory) + ", " + String.valueOf(state) + "]";
1061     }
1062 }