1 /*******************************************************************************
2 * Copyright (c) 2014, 2015 Association for Decentralized Information Management
4 * All rights reserved. This program and the accompanying materials
5 * are made available under the terms of the Eclipse Public License v1.0
6 * which accompanies this distribution, and is available at
7 * http://www.eclipse.org/legal/epl-v10.html
10 * Semantum Oy - initial API and implementation
11 * Semantum Oy - improvements for Simantics issue #6053
12 *******************************************************************************/
13 package org.simantics.db.indexing;
15 import java.io.IOException;
16 import java.io.Reader;
17 import java.util.HashMap;
19 import java.util.concurrent.atomic.AtomicReference;
20 import java.util.regex.Pattern;
22 import org.apache.lucene.analysis.Analyzer;
23 import org.apache.lucene.analysis.TokenFilter;
24 import org.apache.lucene.analysis.TokenStream;
25 import org.apache.lucene.analysis.Tokenizer;
26 import org.apache.lucene.analysis.core.KeywordAnalyzer;
27 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
28 import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
29 import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
30 import org.apache.lucene.analysis.pattern.PatternTokenizer;
31 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
32 import org.apache.lucene.analysis.util.CharTokenizer;
33 import org.apache.lucene.queryparser.classic.ParseException;
34 import org.apache.lucene.queryparser.classic.QueryParser;
35 import org.apache.lucene.search.NumericRangeQuery;
36 import org.apache.lucene.search.Query;
37 import org.apache.lucene.util.AttributeFactory;
38 import org.apache.lucene.util.Version;
39 import org.simantics.databoard.util.ObjectUtils;
40 import org.simantics.db.layer0.genericrelation.Dependencies;
41 import org.simantics.utils.datastructures.Pair;
43 public class Queries {
45 static final class LowerCaseWhitespaceTokenizer extends CharTokenizer {
47 * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version
48 * to match See {@link <a href="#version">above</a>}
51 * the input to split up into tokens
53 public LowerCaseWhitespaceTokenizer(Version matchVersion, Reader in) {
54 super(matchVersion, in);
58 * Construct a new WhitespaceTokenizer using a given
59 * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
62 * matchVersion Lucene version to match See
63 * {@link <a href="#version">above</a>}
65 * the attribute factory to use for this {@link Tokenizer}
67 * the input to split up into tokens
69 public LowerCaseWhitespaceTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
70 super(matchVersion, factory, in);
74 protected int normalize(int c) {
75 return Character.toLowerCase(c);
78 protected boolean isTokenChar(int c) {
79 return !Character.isWhitespace(c);
83 static final class LowerCaseWhitespaceAnalyzer extends Analyzer {
85 private final Version matchVersion;
88 * Creates a new {@link WhitespaceAnalyzer}
89 * @param matchVersion Lucene version to match See {@link <a href="#version">above</a>}
91 public LowerCaseWhitespaceAnalyzer(Version matchVersion) {
92 this.matchVersion = matchVersion;
96 protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
97 return new TokenStreamComponents(new LowerCaseWhitespaceTokenizer(matchVersion, reader));
101 final static class LowercaseFilter extends TokenFilter {
102 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
104 public LowercaseFilter(TokenStream in) {
109 public boolean incrementToken() throws IOException {
110 if (!input.incrementToken()) return false;
111 String lowercase = termAtt.toString().toLowerCase();
112 termAtt.setEmpty().append(lowercase);
117 static final class TypeStringAnalyzer extends Analyzer {
118 private boolean lowercase;
120 public TypeStringAnalyzer(Boolean lowercase) {
121 this.lowercase = lowercase;
125 protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
126 Tokenizer tokenizer = new PatternTokenizer(reader, Pattern.compile("(([^\\\\ ]|\\\\\\\\|\\\\ )+)( *)"), 1);
127 TokenFilter filter = new PatternReplaceFilter(tokenizer, Pattern.compile("(\\\\(\\\\| ))"), "$2", true);
129 return new TokenStreamComponents(tokenizer, lowercase ? new LowercaseFilter(filter) : filter);
134 private static AtomicReference<Pair<Query, String>> queryCache = new AtomicReference<>();
136 final static PerFieldAnalyzerWrapper analyzer = createAnalyzer();
138 static PerFieldAnalyzerWrapper createAnalyzer() {
140 Map<String,Analyzer> analyzerPerField = new HashMap<>();
141 analyzerPerField.put(Dependencies.FIELD_MODEL, new KeywordAnalyzer());
142 analyzerPerField.put(Dependencies.FIELD_PARENT, new KeywordAnalyzer());
143 analyzerPerField.put(Dependencies.FIELD_RESOURCE, new KeywordAnalyzer());
144 analyzerPerField.put(Dependencies.FIELD_GUID, new KeywordAnalyzer());
145 analyzerPerField.put(Dependencies.FIELD_NAME, new KeywordAnalyzer());
146 analyzerPerField.put(Dependencies.FIELD_TYPES, new TypeStringAnalyzer(false));
147 analyzerPerField.put(Dependencies.FIELD_NAME_SEARCH, new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9));
148 analyzerPerField.put(Dependencies.FIELD_TYPES_SEARCH, new TypeStringAnalyzer(true));
149 analyzerPerField.put(Dependencies.FIELD_TYPE_RESOURCE, new WhitespaceAnalyzer(Version.LUCENE_4_9));
151 PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9), analyzerPerField);
156 static PerFieldAnalyzerWrapper getAnalyzer() {
160 static Query parse(String search, IndexSchema schema) throws ParseException {
161 Pair<Query, String> cachedQuery = queryCache.get();
162 if (cachedQuery != null && search.equals(cachedQuery.second))
163 return cachedQuery.first;
165 //System.err.println("parse " + search + " (cached=" + (cachedQuery != null ? cachedQuery.second : "null") + ")" );
166 CustomQueryParser parser = new CustomQueryParser(Version.LUCENE_4_9, "Name", getAnalyzer(), schema);
167 parser.setLowercaseExpandedTerms(false);
168 Query query = parser.parse(search);
170 queryCache.set(Pair.make(query, search));
175 public static class CustomQueryParser extends QueryParser {
177 protected final IndexSchema schema;
179 public CustomQueryParser(Version version, String field, Analyzer analyzer, IndexSchema schema) {
180 super(version, field, analyzer);
181 this.schema = schema;
182 setAllowLeadingWildcard(true);
186 protected Query getRangeQuery(
190 boolean startInclusive,
191 boolean endInclusive) throws ParseException
193 IndexSchema.Type type = schema.typeMap.get(field);
194 if (IndexSchema.NUMERIC_TYPES.contains(type)) {
195 boolean equalParts = ObjectUtils.objectEquals(part1, part2);
199 Integer min = part1 != null ? ( Integer.valueOf(part1)) : null;
200 Integer max = part2 != null ? (equalParts ? min : Integer.valueOf(part2)) : null;
201 return NumericRangeQuery.newIntRange(field, min, max, startInclusive, endInclusive);
204 Long min = part1 != null ? ( Long.valueOf(part1)) : null;
205 Long max = part2 != null ? (equalParts ? min : Long.valueOf(part2)) : null;
206 return NumericRangeQuery.newLongRange(field, min, max, startInclusive, endInclusive);
209 Float min = part1 != null ? ( Float.valueOf(part1)) : null;
210 Float max = part2 != null ? (equalParts ? min : Float.valueOf(part2)) : null;
211 return NumericRangeQuery.newFloatRange(field, min, max, startInclusive, endInclusive);
214 Double min = part1 != null ? ( Double.valueOf(part1)) : null;
215 Double max = part2 != null ? (equalParts ? min : Double.valueOf(part2)) : null;
216 return NumericRangeQuery.newDoubleRange(field, min, max, startInclusive, endInclusive);
219 throw new ParseException("Unrecognized numeric field type '" + type + "' for field '" + field + "'");
221 } catch (NumberFormatException e) {
222 throw new ParseException(e.getMessage());
225 return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);