]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.db.indexing/src/org/simantics/db/indexing/Queries.java
Improvements to Lucene indexing
[simantics/platform.git] / bundles / org.simantics.db.indexing / src / org / simantics / db / indexing / Queries.java
index 1ea64d0f2bf159418c942d4b3c3cdd01fc130924..21a25d7d63a79caba3742c5457005222d3845e3b 100644 (file)
@@ -16,12 +16,16 @@ import java.io.Reader;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
+import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
+import org.apache.lucene.analysis.pattern.PatternTokenizer;
 import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
@@ -90,6 +94,18 @@ public class Queries {
         }
     }
 
+    static final class TypeStringAnalyzer extends Analyzer {
+
+               @Override
+               protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+                       Tokenizer tokenizer = new PatternTokenizer(reader, Pattern.compile("(([^\\\\ ]|\\\\\\\\|\\\\ )+)( *)"), 1);
+                       TokenFilter filter = new PatternReplaceFilter(tokenizer, Pattern.compile("(\\\\(\\\\| ))"), "$2", true);
+                       
+                       return new TokenStreamComponents(tokenizer, filter);
+               }
+               
+       }
+
     private static AtomicReference<Pair<Query, String>> queryCache = new AtomicReference<>();
 
     final static PerFieldAnalyzerWrapper analyzer = createAnalyzer();
@@ -101,6 +117,8 @@ public class Queries {
        analyzerPerField.put("Parent", new KeywordAnalyzer());
        analyzerPerField.put("Resource", new KeywordAnalyzer());
        analyzerPerField.put("GUID", new KeywordAnalyzer());
+       analyzerPerField.put("Name", new KeywordAnalyzer());
+       analyzerPerField.put("Types", new TypeStringAnalyzer());
        
         PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9), analyzerPerField);
         return analyzer;
@@ -118,6 +136,7 @@ public class Queries {
 
         //System.err.println("parse " + search + " (cached=" + (cachedQuery != null ? cachedQuery.second : "null") + ")" );
         CustomQueryParser parser = new CustomQueryParser(Version.LUCENE_4_9, "Name", getAnalyzer(), schema);
+        parser.setLowercaseExpandedTerms(false);
         Query query = parser.parse(search);
 
         queryCache.set(Pair.make(query, search));