/*******************************************************************************
* Copyright (c) 2014, 2015 Association for Decentralized Information Management
* in Industry THTH ry.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Semantum Oy - initial API and implementation
* Semantum Oy - improvements for Simantics issue #6053
*******************************************************************************/
package org.simantics.db.indexing;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.Version;
import org.simantics.databoard.util.ObjectUtils;
import org.simantics.utils.datastructures.Pair;
public class Queries {
static final class LowerCaseWhitespaceTokenizer extends CharTokenizer {
/**
* Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version
* to match See {@link above}
*
* @param in
* the input to split up into tokens
*/
public LowerCaseWhitespaceTokenizer(Version matchVersion, Reader in) {
super(matchVersion, in);
}
/**
* Construct a new WhitespaceTokenizer using a given
* {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
*
* @param
* matchVersion Lucene version to match See
* {@link above}
* @param factory
* the attribute factory to use for this {@link Tokenizer}
* @param in
* the input to split up into tokens
*/
public LowerCaseWhitespaceTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
super(matchVersion, factory, in);
}
@Override
protected int normalize(int c) {
return Character.toLowerCase(c);
}
protected boolean isTokenChar(int c) {
return !Character.isWhitespace(c);
}
}
static final class LowerCaseWhitespaceAnalyzer extends Analyzer {
private final Version matchVersion;
/**
* Creates a new {@link WhitespaceAnalyzer}
* @param matchVersion Lucene version to match See {@link above}
*/
public LowerCaseWhitespaceAnalyzer(Version matchVersion) {
this.matchVersion = matchVersion;
}
@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
return new TokenStreamComponents(new LowerCaseWhitespaceTokenizer(matchVersion, reader));
}
}
static final class TypeStringAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new PatternTokenizer(reader, Pattern.compile("(([^\\\\ ]|\\\\\\\\|\\\\ )+)( *)"), 1);
TokenFilter filter = new PatternReplaceFilter(tokenizer, Pattern.compile("(\\\\(\\\\| ))"), "$2", true);
return new TokenStreamComponents(tokenizer, filter);
}
}
private static AtomicReference> queryCache = new AtomicReference<>();
final static PerFieldAnalyzerWrapper analyzer = createAnalyzer();
static PerFieldAnalyzerWrapper createAnalyzer() {
Map analyzerPerField = new HashMap<>();
analyzerPerField.put("Model", new KeywordAnalyzer());
analyzerPerField.put("Parent", new KeywordAnalyzer());
analyzerPerField.put("Resource", new KeywordAnalyzer());
analyzerPerField.put("GUID", new KeywordAnalyzer());
analyzerPerField.put("Name", new KeywordAnalyzer());
analyzerPerField.put("Types", new TypeStringAnalyzer());
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new LowerCaseWhitespaceAnalyzer(Version.LUCENE_4_9), analyzerPerField);
return analyzer;
}
static PerFieldAnalyzerWrapper getAnalyzer() {
return analyzer;
}
static Query parse(String search, IndexSchema schema) throws ParseException {
Pair cachedQuery = queryCache.get();
if (cachedQuery != null && search.equals(cachedQuery.second))
return cachedQuery.first;
//System.err.println("parse " + search + " (cached=" + (cachedQuery != null ? cachedQuery.second : "null") + ")" );
CustomQueryParser parser = new CustomQueryParser(Version.LUCENE_4_9, "Name", getAnalyzer(), schema);
parser.setLowercaseExpandedTerms(false);
Query query = parser.parse(search);
queryCache.set(Pair.make(query, search));
return query;
}
public static class CustomQueryParser extends QueryParser {
protected final IndexSchema schema;
public CustomQueryParser(Version version, String field, Analyzer analyzer, IndexSchema schema) {
super(version, field, analyzer);
this.schema = schema;
setAllowLeadingWildcard(true);
}
@Override
protected Query getRangeQuery(
String field,
String part1,
String part2,
boolean startInclusive,
boolean endInclusive) throws ParseException
{
IndexSchema.Type type = schema.typeMap.get(field);
if (IndexSchema.NUMERIC_TYPES.contains(type)) {
boolean equalParts = ObjectUtils.objectEquals(part1, part2);
try {
switch (type) {
case INT: {
Integer min = part1 != null ? ( Integer.valueOf(part1)) : null;
Integer max = part2 != null ? (equalParts ? min : Integer.valueOf(part2)) : null;
return NumericRangeQuery.newIntRange(field, min, max, startInclusive, endInclusive);
}
case LONG: {
Long min = part1 != null ? ( Long.valueOf(part1)) : null;
Long max = part2 != null ? (equalParts ? min : Long.valueOf(part2)) : null;
return NumericRangeQuery.newLongRange(field, min, max, startInclusive, endInclusive);
}
case FLOAT: {
Float min = part1 != null ? ( Float.valueOf(part1)) : null;
Float max = part2 != null ? (equalParts ? min : Float.valueOf(part2)) : null;
return NumericRangeQuery.newFloatRange(field, min, max, startInclusive, endInclusive);
}
case DOUBLE: {
Double min = part1 != null ? ( Double.valueOf(part1)) : null;
Double max = part2 != null ? (equalParts ? min : Double.valueOf(part2)) : null;
return NumericRangeQuery.newDoubleRange(field, min, max, startInclusive, endInclusive);
}
default:
throw new ParseException("Unrecognized numeric field type '" + type + "' for field '" + field + "'");
}
} catch (NumberFormatException e) {
throw new ParseException(e.getMessage());
}
}
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
}
}
}