X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Forg.simantics.db.indexing%2Fsrc%2Forg%2Fsimantics%2Fdb%2Findexing%2FQueries.java;h=21b32fa1ecd14d1ae0a5238095365d4dbd56f6ff;hp=5d071d323e590daa5a9df64874467c897d93dfc2;hb=5e340942bc7de041b75c6ba281617eb0c800f30a;hpb=969bd23cab98a79ca9101af33334000879fb60c5 diff --git a/bundles/org.simantics.db.indexing/src/org/simantics/db/indexing/Queries.java b/bundles/org.simantics.db.indexing/src/org/simantics/db/indexing/Queries.java index 5d071d323..21b32fa1e 100644 --- a/bundles/org.simantics.db.indexing/src/org/simantics/db/indexing/Queries.java +++ b/bundles/org.simantics.db.indexing/src/org/simantics/db/indexing/Queries.java @@ -1,183 +1,229 @@ -/******************************************************************************* - * Copyright (c) 2014, 2015 Association for Decentralized Information Management - * in Industry THTH ry. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * Semantum Oy - initial API and implementation - * Semantum Oy - improvements for Simantics issue #6053 - *******************************************************************************/ -package org.simantics.db.indexing; - -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.atomic.AtomicReference; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.KeywordAnalyzer; -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; -import org.apache.lucene.analysis.util.CharTokenizer; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.NumericRangeQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.util.AttributeFactory; -import org.apache.lucene.util.Version; -import org.simantics.databoard.util.ObjectUtils; -import org.simantics.utils.datastructures.Pair; - -public class Queries { - - static final class LowerCaseWhitespaceTokenizer extends CharTokenizer { - /** - * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version - * to match See {@link above} - * - * @param in - * the input to split up into tokens - */ - public LowerCaseWhitespaceTokenizer(Version matchVersion, Reader in) { - super(matchVersion, in); - } - - /** - * Construct a new WhitespaceTokenizer using a given - * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. - * - * @param - * matchVersion Lucene version to match See - * {@link above} - * @param factory - * the attribute factory to use for this {@link Tokenizer} - * @param in - * the input to split up into tokens - */ - public LowerCaseWhitespaceTokenizer(Version matchVersion, AttributeFactory factory, Reader in) { - super(matchVersion, factory, in); - } - - @Override - protected int normalize(int c) { - return Character.toLowerCase(c); - } - - protected boolean isTokenChar(int c) { - return !Character.isWhitespace(c); - } - } - - static final class LowerCaseWhitespaceAnalyzer extends Analyzer { - - private final Version matchVersion; - - /** - * Creates a new {@link WhitespaceAnalyzer} - * @param matchVersion Lucene version to match See {@link above} - */ - public LowerCaseWhitespaceAnalyzer(Version matchVersion) { - this.matchVersion = matchVersion; - } - - @Override - protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { - return new TokenStreamComponents(new LowerCaseWhitespaceTokenizer(matchVersion, reader)); - } - } - - private static AtomicReference> queryCache = new AtomicReference<>(); - - final static PerFieldAnalyzerWrapper analyzer = createAnalyzer(); - - static PerFieldAnalyzerWrapper createAnalyzer() { - - Map analyzerPerField = new HashMap<>(); - analyzerPerField.put("Model", new KeywordAnalyzer()); - analyzerPerField.put("Parent", new KeywordAnalyzer()); - analyzerPerField.put("Resource", new KeywordAnalyzer()); - analyzerPerField.put("GUID", new KeywordAnalyzer()); - - PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9), analyzerPerField); - return analyzer; - - } - - static PerFieldAnalyzerWrapper getAnalyzer() { - return analyzer; - } - - static Query parse(String search, IndexSchema schema) throws ParseException { - Pair cachedQuery = queryCache.get(); - if (cachedQuery != null && search.equals(cachedQuery.second)) - return cachedQuery.first; - - //System.err.println("parse " + search + " (cached=" + (cachedQuery != null ? cachedQuery.second : "null") + ")" ); - CustomQueryParser parser = new CustomQueryParser(Version.LUCENE_4_9, "Name", getAnalyzer(), schema); - Query query = parser.parse(search); - - queryCache.set(Pair.make(query, search)); - return query; - } - - - public static class CustomQueryParser extends QueryParser { - - protected final IndexSchema schema; - - public CustomQueryParser(Version version, String field, Analyzer analyzer, IndexSchema schema) { - super(version, field, analyzer); - this.schema = schema; - setAllowLeadingWildcard(true); - } - - @Override - protected Query getRangeQuery( - String field, - String part1, - String part2, - boolean startInclusive, - boolean endInclusive) throws ParseException - { - IndexSchema.Type type = schema.typeMap.get(field); - if (IndexSchema.NUMERIC_TYPES.contains(type)) { - boolean equalParts = ObjectUtils.objectEquals(part1, part2); - try { - switch (type) { - case INT: { - Integer min = part1 != null ? ( Integer.valueOf(part1)) : null; - Integer max = part2 != null ? (equalParts ? min : Integer.valueOf(part2)) : null; - return NumericRangeQuery.newIntRange(field, min, max, startInclusive, endInclusive); - } - case LONG: { - Long min = part1 != null ? ( Long.valueOf(part1)) : null; - Long max = part2 != null ? (equalParts ? min : Long.valueOf(part2)) : null; - return NumericRangeQuery.newLongRange(field, min, max, startInclusive, endInclusive); - } - case FLOAT: { - Float min = part1 != null ? ( Float.valueOf(part1)) : null; - Float max = part2 != null ? (equalParts ? min : Float.valueOf(part2)) : null; - return NumericRangeQuery.newFloatRange(field, min, max, startInclusive, endInclusive); - } - case DOUBLE: { - Double min = part1 != null ? ( Double.valueOf(part1)) : null; - Double max = part2 != null ? (equalParts ? min : Double.valueOf(part2)) : null; - return NumericRangeQuery.newDoubleRange(field, min, max, startInclusive, endInclusive); - } - default: - throw new ParseException("Unrecognized numeric field type '" + type + "' for field '" + field + "'"); - } - } catch (NumberFormatException e) { - throw new ParseException(e.getMessage()); - } - } - return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive); - } - - } - -} +/******************************************************************************* + * Copyright (c) 2014, 2015 Association for Decentralized Information Management + * in Industry THTH ry. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Semantum Oy - initial API and implementation + * Semantum Oy - improvements for Simantics issue #6053 + *******************************************************************************/ +package org.simantics.db.indexing; + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; +import java.util.regex.Pattern; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.pattern.PatternReplaceFilter; +import org.apache.lucene.analysis.pattern.PatternTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.util.CharTokenizer; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.Version; +import org.simantics.databoard.util.ObjectUtils; +import org.simantics.db.layer0.genericrelation.Dependencies; +import org.simantics.utils.datastructures.Pair; + +public class Queries { + + static final class LowerCaseWhitespaceTokenizer extends CharTokenizer { + /** + * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version + * to match See {@link above} + * + * @param in + * the input to split up into tokens + */ + public LowerCaseWhitespaceTokenizer(Version matchVersion, Reader in) { + super(matchVersion, in); + } + + /** + * Construct a new WhitespaceTokenizer using a given + * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. + * + * @param + * matchVersion Lucene version to match See + * {@link above} + * @param factory + * the attribute factory to use for this {@link Tokenizer} + * @param in + * the input to split up into tokens + */ + public LowerCaseWhitespaceTokenizer(Version matchVersion, AttributeFactory factory, Reader in) { + super(matchVersion, factory, in); + } + + @Override + protected int normalize(int c) { + return Character.toLowerCase(c); + } + + protected boolean isTokenChar(int c) { + return !Character.isWhitespace(c); + } + } + + static final class LowerCaseWhitespaceAnalyzer extends Analyzer { + + private final Version matchVersion; + + /** + * Creates a new {@link WhitespaceAnalyzer} + * @param matchVersion Lucene version to match See {@link above} + */ + public LowerCaseWhitespaceAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; + } + + @Override + protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { + return new TokenStreamComponents(new LowerCaseWhitespaceTokenizer(matchVersion, reader)); + } + } + + final static class LowercaseFilter extends TokenFilter { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + public LowercaseFilter(TokenStream in) { + super(in); + } + + @Override + public boolean incrementToken() throws IOException { + if (!input.incrementToken()) return false; + String lowercase = termAtt.toString().toLowerCase(); + termAtt.setEmpty().append(lowercase); + return true; + } + } + + static final class TypeStringAnalyzer extends Analyzer { + private boolean lowercase; + + public TypeStringAnalyzer(Boolean lowercase) { + this.lowercase = lowercase; + } + + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new PatternTokenizer(reader, Pattern.compile("(([^\\\\ ]|\\\\\\\\|\\\\ )+)( *)"), 1); + TokenFilter filter = new PatternReplaceFilter(tokenizer, Pattern.compile("(\\\\(\\\\| ))"), "$2", true); + + return new TokenStreamComponents(tokenizer, lowercase ? new LowercaseFilter(filter) : filter); + } + + } + + private static AtomicReference> queryCache = new AtomicReference<>(); + + final static PerFieldAnalyzerWrapper analyzer = createAnalyzer(); + + static PerFieldAnalyzerWrapper createAnalyzer() { + + Map analyzerPerField = new HashMap<>(); + analyzerPerField.put("Model", new KeywordAnalyzer()); + analyzerPerField.put("Parent", new KeywordAnalyzer()); + analyzerPerField.put("Resource", new KeywordAnalyzer()); + analyzerPerField.put("GUID", new KeywordAnalyzer()); + analyzerPerField.put("Name", new KeywordAnalyzer()); + analyzerPerField.put("Types", new TypeStringAnalyzer(false)); + analyzerPerField.put(Dependencies.FIELD_NAME_SEARCH, new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9)); + analyzerPerField.put(Dependencies.FIELD_TYPES_SEARCH, new TypeStringAnalyzer(true)); + + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9), analyzerPerField); + return analyzer; + + } + + static PerFieldAnalyzerWrapper getAnalyzer() { + return analyzer; + } + + static Query parse(String search, IndexSchema schema) throws ParseException { + Pair cachedQuery = queryCache.get(); + if (cachedQuery != null && search.equals(cachedQuery.second)) + return cachedQuery.first; + + //System.err.println("parse " + search + " (cached=" + (cachedQuery != null ? cachedQuery.second : "null") + ")" ); + CustomQueryParser parser = new CustomQueryParser(Version.LUCENE_4_9, "Name", getAnalyzer(), schema); + parser.setLowercaseExpandedTerms(false); + Query query = parser.parse(search); + + queryCache.set(Pair.make(query, search)); + return query; + } + + + public static class CustomQueryParser extends QueryParser { + + protected final IndexSchema schema; + + public CustomQueryParser(Version version, String field, Analyzer analyzer, IndexSchema schema) { + super(version, field, analyzer); + this.schema = schema; + setAllowLeadingWildcard(true); + } + + @Override + protected Query getRangeQuery( + String field, + String part1, + String part2, + boolean startInclusive, + boolean endInclusive) throws ParseException + { + IndexSchema.Type type = schema.typeMap.get(field); + if (IndexSchema.NUMERIC_TYPES.contains(type)) { + boolean equalParts = ObjectUtils.objectEquals(part1, part2); + try { + switch (type) { + case INT: { + Integer min = part1 != null ? ( Integer.valueOf(part1)) : null; + Integer max = part2 != null ? (equalParts ? min : Integer.valueOf(part2)) : null; + return NumericRangeQuery.newIntRange(field, min, max, startInclusive, endInclusive); + } + case LONG: { + Long min = part1 != null ? ( Long.valueOf(part1)) : null; + Long max = part2 != null ? (equalParts ? min : Long.valueOf(part2)) : null; + return NumericRangeQuery.newLongRange(field, min, max, startInclusive, endInclusive); + } + case FLOAT: { + Float min = part1 != null ? ( Float.valueOf(part1)) : null; + Float max = part2 != null ? (equalParts ? min : Float.valueOf(part2)) : null; + return NumericRangeQuery.newFloatRange(field, min, max, startInclusive, endInclusive); + } + case DOUBLE: { + Double min = part1 != null ? ( Double.valueOf(part1)) : null; + Double max = part2 != null ? (equalParts ? min : Double.valueOf(part2)) : null; + return NumericRangeQuery.newDoubleRange(field, min, max, startInclusive, endInclusive); + } + default: + throw new ParseException("Unrecognized numeric field type '" + type + "' for field '" + field + "'"); + } + } catch (NumberFormatException e) { + throw new ParseException(e.getMessage()); + } + } + return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive); + } + + } + +}