package org.simantics.db.layer0.genericrelation;
import java.util.Collection;
import org.simantics.datatypes.literal.GUID;
import org.simantics.db.Resource;
/**
* This class contains utilities related to queries made into Lucene indexes,
* such as escaping search terms.
*
* @author Tuukka Lehtonen
*/
public class IndexQueries {
/**
* Same as calling {@link #escape(String, boolean, boolean)} with
* escapeKeywords set to true
.
*
* @param s
* @param escapeWildcards
* @return escaped string
*/
public static String escape(String s, boolean escapeWildcards) {
return escape(s, escapeWildcards, true);
}
/**
* Returns a String where those characters that QueryParser expects to be
* escaped are escaped by a preceding \
.
*
* Copied from
* {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but
* disabled escaping of wildcard characters '*' and '?'. Clients must escape
* wildcards themselves to allow use of wildcards in queries.
*
* @param s
* lucene query to escape
* @param escapeWildcards
* true
to escape also wildcard characters
* @param escapeKeywords
* true
to escape keywords like AND, OR, etc.
* @return escaped string
*/
public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {
if (!needsEscaping(s, escapeWildcards, escapeKeywords))
return s;
StringBuilder sb = new StringBuilder(s.length() + 8);
int len = s.length();
// The beginning of the line is the same as the last character being
// whitespace.
boolean lastWhitespace = true;
for (int i = 0; i < len;) {
char c = s.charAt(i);
// These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
sb.append('\\');
sb.append(c);
lastWhitespace = false;
} else if (Character.isWhitespace(c)) {
sb.append(c);
lastWhitespace = true;
} else {
if (escapeKeywords && lastWhitespace) {
int reslen = processReservedWords(s, i, sb);
if (reslen > 0) {
i += reslen;
lastWhitespace = false;
continue;
}
}
sb.append(c);
lastWhitespace = false;
}
++i;
}
return sb.toString();
}
/**
* Same logic as in {@link #escape(String, boolean, boolean)} but this one
* simply checks whether the input string needs escaping at all or not.
*
* @param s
* @param escapeWildcards
* @param escapeKeywords
* @return
*/
private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {
int len = s.length();
// The beginning of the line is the same as the last character being
// whitespace.
boolean lastWhitespace = true;
for (int i = 0; i < len;) {
char c = s.charAt(i);
// These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
return true;
} else if (Character.isWhitespace(c)) {
lastWhitespace = true;
} else {
if (escapeKeywords && lastWhitespace) {
int reslen = processReservedWords(s, i, null);
if (reslen > 0)
return true;
}
lastWhitespace = false;
}
++i;
}
return false;
}
private static final String[] RESERVED_WORDS = {
"AND", "\\AND",
"OR", "\\OR",
"NOT", "\\NOT",
};
/**
* Lucene reserved words are case-sensitive for its query parser. Therefore
* only case-sensitive hits need to be looked for.
*
* @param s
* @param fromIndex
* @return length of the reserved word in the input or 0 if no reserved word
* in the input
*/
private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {
final int total = RESERVED_WORDS.length;
for (int w = 0; w < total; w += 2) {
String word = RESERVED_WORDS[w];
int len = word.length();
if (s.regionMatches(false, fromIndex, word, 0, len)) {
if (sb != null) {
String replacement = RESERVED_WORDS[w+1];
sb.append(replacement);
}
return len;
}
}
return 0;
}
/**
* Returns a String where those characters that QueryParser expects to be
* escaped are escaped by a preceding \
.
*/
public static String escape(String s) {
return escape(s, false);
}
public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {
if (field != null)
result.append(field).append(':');
result.append( escape(term, escapeWildcards) );
return result;
}
public static String escapeTerm(String field, String term, boolean escapeWildcards) {
return escapeTerm(field, term, escapeWildcards, new StringBuilder()).toString();
}
public static StringBuilder quoteTerm(String field, String term, StringBuilder result) {
if (field != null)
result.append(field).append(':');
result.append("\"");
result.append(term.replaceAll("(\"|\\\\)", "\\\\$0"));
result.append("\"");
return result;
}
public static String quoteTerm(String term) {
return quoteTerm(null, term, new StringBuilder(term.length()*2)).toString();
}
public static String quoteTerm(String field, String term) {
return quoteTerm(field, term,
new StringBuilder(
term.length()*2
+ (field != null ? field.length() + 1 : 0))
).toString();
}
public static StringBuilder appendLongTerm(StringBuilder sb, String field, long term) {
return sb.append(field).append(':').append(term);
}
public static String longTerm(String field, long term) {
return appendLongTerm(new StringBuilder(), field, term).toString();
}
public static StringBuilder appendResourceIdTerm(StringBuilder sb, String field, Resource term) {
return appendLongTerm(sb, field, term.getResourceId());
}
public static String resourceIdTerm(String field, Resource term) {
return appendLongTerm(new StringBuilder(), field, term.getResourceId()).toString();
}
private static String join(String withString, String... exps) {
if (exps.length == 0)
return "";
StringBuilder sb = new StringBuilder(128);
for (int i = 0; i < exps.length - 1; ++i) {
sb.append(exps[i]).append(withString);
}
sb.append(exps[exps.length - 1]);
return sb.toString();
}
public static String and(String exp1, String exp2) {
return exp1 + " AND " + exp2;
}
public static String and(String... exps) {
return join(" AND ", exps);
}
public static String or(String exp1, String exp2) {
return exp1 + " OR " + exp2;
}
public static String or(String... exps) {
return join(" OR ", exps);
}
public static String idFromGUID(GUID guid) {
return guid != null ? guid.indexString() : "";
}
public static String toResourceIdString(Resource r, Collection rs) {
StringBuilder sb = new StringBuilder();
sb.append(r.getResourceId());
for (Resource rr : rs)
sb.append(' ').append(rr.getResourceId());
return sb.toString();
}
public static String toResourceIdString(Collection rs) {
if (rs.isEmpty())
return "";
StringBuilder sb = new StringBuilder();
boolean first = true;
for (Resource rr : rs) {
if (!first)
sb.append(' ');
first = false;
sb.append(rr.getResourceId());
}
return sb.toString();
}
// public static void main(String[] args) {
// System.out.println("esc: " + escape("AND01", true, true));
// System.out.println("esc: " + escape("AND 01", true, true));
// System.out.println("esc: " + escape(" AND 01", true, true));
// }
}