/******************************************************************************* * Copyright (c) 2014, 2015 Association for Decentralized Information Management * in Industry THTH ry. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Semantum Oy - initial API and implementation * Semantum Oy - improvements for Simantics issue #6053 *******************************************************************************/ package org.simantics.db.indexing; import java.io.IOException; import java.io.Reader; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.pattern.PatternReplaceFilter; import org.apache.lucene.analysis.pattern.PatternTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.Version; import org.simantics.databoard.util.ObjectUtils; import org.simantics.db.layer0.genericrelation.Dependencies; import org.simantics.utils.datastructures.Pair; public class Queries { static final class LowerCaseWhitespaceTokenizer extends CharTokenizer { /** * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version * to match See {@link above} * * @param in * the input to split up into tokens */ public LowerCaseWhitespaceTokenizer(Version matchVersion, Reader in) { super(matchVersion, in); } /** * Construct a new WhitespaceTokenizer using a given * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. * * @param * matchVersion Lucene version to match See * {@link above} * @param factory * the attribute factory to use for this {@link Tokenizer} * @param in * the input to split up into tokens */ public LowerCaseWhitespaceTokenizer(Version matchVersion, AttributeFactory factory, Reader in) { super(matchVersion, factory, in); } @Override protected int normalize(int c) { return Character.toLowerCase(c); } protected boolean isTokenChar(int c) { return !Character.isWhitespace(c); } } static final class LowerCaseWhitespaceAnalyzer extends Analyzer { private final Version matchVersion; /** * Creates a new {@link WhitespaceAnalyzer} * @param matchVersion Lucene version to match See {@link above} */ public LowerCaseWhitespaceAnalyzer(Version matchVersion) { this.matchVersion = matchVersion; } @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { return new TokenStreamComponents(new LowerCaseWhitespaceTokenizer(matchVersion, reader)); } } final static class LowercaseFilter extends TokenFilter { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public LowercaseFilter(TokenStream in) { super(in); } @Override public boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; String lowercase = termAtt.toString().toLowerCase(); termAtt.setEmpty().append(lowercase); return true; } } static final class TypeStringAnalyzer extends Analyzer { private boolean lowercase; public TypeStringAnalyzer(Boolean lowercase) { this.lowercase = lowercase; } @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new PatternTokenizer(reader, Pattern.compile("(([^\\\\ ]|\\\\\\\\|\\\\ )+)( *)"), 1); TokenFilter filter = new PatternReplaceFilter(tokenizer, Pattern.compile("(\\\\(\\\\| ))"), "$2", true); return new TokenStreamComponents(tokenizer, lowercase ? new LowercaseFilter(filter) : filter); } } private static AtomicReference> queryCache = new AtomicReference<>(); final static PerFieldAnalyzerWrapper analyzer = createAnalyzer(); static PerFieldAnalyzerWrapper createAnalyzer() { Map analyzerPerField = new HashMap<>(); analyzerPerField.put("Model", new KeywordAnalyzer()); analyzerPerField.put("Parent", new KeywordAnalyzer()); analyzerPerField.put("Resource", new KeywordAnalyzer()); analyzerPerField.put("GUID", new KeywordAnalyzer()); analyzerPerField.put("Name", new KeywordAnalyzer()); analyzerPerField.put("Types", new TypeStringAnalyzer(false)); analyzerPerField.put(Dependencies.FIELD_NAME_SEARCH, new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9)); analyzerPerField.put(Dependencies.FIELD_TYPES_SEARCH, new TypeStringAnalyzer(true)); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9), analyzerPerField); return analyzer; } static PerFieldAnalyzerWrapper getAnalyzer() { return analyzer; } static Query parse(String search, IndexSchema schema) throws ParseException { Pair cachedQuery = queryCache.get(); if (cachedQuery != null && search.equals(cachedQuery.second)) return cachedQuery.first; //System.err.println("parse " + search + " (cached=" + (cachedQuery != null ? cachedQuery.second : "null") + ")" ); CustomQueryParser parser = new CustomQueryParser(Version.LUCENE_4_9, "Name", getAnalyzer(), schema); parser.setLowercaseExpandedTerms(false); Query query = parser.parse(search); queryCache.set(Pair.make(query, search)); return query; } public static class CustomQueryParser extends QueryParser { protected final IndexSchema schema; public CustomQueryParser(Version version, String field, Analyzer analyzer, IndexSchema schema) { super(version, field, analyzer); this.schema = schema; setAllowLeadingWildcard(true); } @Override protected Query getRangeQuery( String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { IndexSchema.Type type = schema.typeMap.get(field); if (IndexSchema.NUMERIC_TYPES.contains(type)) { boolean equalParts = ObjectUtils.objectEquals(part1, part2); try { switch (type) { case INT: { Integer min = part1 != null ? ( Integer.valueOf(part1)) : null; Integer max = part2 != null ? (equalParts ? min : Integer.valueOf(part2)) : null; return NumericRangeQuery.newIntRange(field, min, max, startInclusive, endInclusive); } case LONG: { Long min = part1 != null ? ( Long.valueOf(part1)) : null; Long max = part2 != null ? (equalParts ? min : Long.valueOf(part2)) : null; return NumericRangeQuery.newLongRange(field, min, max, startInclusive, endInclusive); } case FLOAT: { Float min = part1 != null ? ( Float.valueOf(part1)) : null; Float max = part2 != null ? (equalParts ? min : Float.valueOf(part2)) : null; return NumericRangeQuery.newFloatRange(field, min, max, startInclusive, endInclusive); } case DOUBLE: { Double min = part1 != null ? ( Double.valueOf(part1)) : null; Double max = part2 != null ? (equalParts ? min : Double.valueOf(part2)) : null; return NumericRangeQuery.newDoubleRange(field, min, max, startInclusive, endInclusive); } default: throw new ParseException("Unrecognized numeric field type '" + type + "' for field '" + field + "'"); } } catch (NumberFormatException e) { throw new ParseException(e.getMessage()); } } return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive); } } }