package org.simantics.db.layer0.genericrelation;
/**
* This class contains utilities related to queries made into Lucene indexes,
* such as escaping search terms.
*
* @author Tuukka Lehtonen
*/
public class IndexQueries {
/**
* Same as calling {@link #escape(String, boolean, boolean)} with
* escapeKeywords set to true
.
*
* @param s
* @param escapeWildcards
* @return escaped string
*/
public static String escape(String s, boolean escapeWildcards) {
return escape(s, escapeWildcards, true);
}
/**
* Returns a String where those characters that QueryParser expects to be
* escaped are escaped by a preceding \
.
*
* Copied from
* {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but
* disabled escaping of wildcard characters '*' and '?'. Clients must escape
* wildcards themselves to allow use of wildcards in queries.
*
* @param s
* lucene query to escape
* @param escapeWildcards
* true
to escape also wildcard characters
* @param escapeKeywords
* true
to escape keywords like AND, OR, etc.
* @return escaped string
*/
public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {
if (!needsEscaping(s, escapeWildcards, escapeKeywords))
return s;
StringBuilder sb = new StringBuilder(s.length() + 8);
int len = s.length();
// The beginning of the line is the same as the last character being
// whitespace.
boolean lastWhitespace = true;
for (int i = 0; i < len;) {
char c = s.charAt(i);
// These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
sb.append('\\');
sb.append(c);
lastWhitespace = false;
} else if (Character.isWhitespace(c)) {
sb.append(c);
lastWhitespace = true;
} else {
if (escapeKeywords && lastWhitespace) {
int reslen = processReservedWords(s, i, sb);
if (reslen > 0) {
i += reslen;
lastWhitespace = false;
continue;
}
}
sb.append(c);
lastWhitespace = false;
}
++i;
}
return sb.toString();
}
/**
* Same logic as in {@link #escape(String, boolean, boolean)} but this one
* simply checks whether the input string needs escaping at all or not.
*
* @param s
* @param escapeWildcards
* @param escapeKeywords
* @return
*/
private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {
int len = s.length();
// The beginning of the line is the same as the last character being
// whitespace.
boolean lastWhitespace = true;
for (int i = 0; i < len;) {
char c = s.charAt(i);
// These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
return true;
} else if (Character.isWhitespace(c)) {
lastWhitespace = true;
} else {
if (escapeKeywords && lastWhitespace) {
int reslen = processReservedWords(s, i, null);
if (reslen > 0)
return true;
}
lastWhitespace = false;
}
++i;
}
return false;
}
private static final String[] RESERVED_WORDS = {
"AND", "\\AND",
"OR", "\\OR",
"NOT", "\\NOT",
};
/**
* Lucene reserved words are case-sensitive for its query parser. Therefore
* only case-sensitive hits need to be looked for.
*
* @param s
* @param fromIndex
* @return length of the reserved word in the input or 0 if no reserved word
* in the input
*/
private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {
final int total = RESERVED_WORDS.length;
for (int w = 0; w < total; w += 2) {
String word = RESERVED_WORDS[w];
int len = word.length();
if (s.regionMatches(false, fromIndex, word, 0, len)) {
if (sb != null) {
String replacement = RESERVED_WORDS[w+1];
sb.append(replacement);
}
return len;
}
}
return 0;
}
/**
* Returns a String where those characters that QueryParser expects to be
* escaped are escaped by a preceding \
.
*/
public static String escape(String s) {
return escape(s, false);
}
public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {
if (field != null)
result.append(field).append(':');
result.append( escape(term, escapeWildcards) );
return result;
}
public static String escapeTerm(String field, String term, boolean escapeWildcards) {
StringBuilder sb = new StringBuilder();
return escapeTerm(field, term, escapeWildcards, sb).toString();
}
public static String quoteTerm(String term) {
StringBuilder sb = new StringBuilder();
sb.append("\"");
sb.append(term.replaceAll("(\"|\\\\)", "\\\\$0"));
sb.append("\"");
return sb.toString();
}
// public static void main(String[] args) {
// System.out.println("esc: " + escape("AND01", true, true));
// System.out.println("esc: " + escape("AND 01", true, true));
// System.out.println("esc: " + escape(" AND 01", true, true));
// }
}