package org.simantics.db.layer0.genericrelation; /** * This class contains utilities related to queries made into Lucene indexes, * such as escaping search terms. * * @author Tuukka Lehtonen */ public class IndexQueries { /** * Same as calling {@link #escape(String, boolean, boolean)} with * escapeKeywords set to true. * * @param s * @param escapeWildcards * @return escaped string */ public static String escape(String s, boolean escapeWildcards) { return escape(s, escapeWildcards, true); } /** * Returns a String where those characters that QueryParser expects to be * escaped are escaped by a preceding \. * * Copied from * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but * disabled escaping of wildcard characters '*' and '?'. Clients must escape * wildcards themselves to allow use of wildcards in queries. * * @param s * lucene query to escape * @param escapeWildcards * true to escape also wildcard characters * @param escapeKeywords * true to escape keywords like AND, OR, etc. * @return escaped string */ public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) { if (!needsEscaping(s, escapeWildcards, escapeKeywords)) return s; StringBuilder sb = new StringBuilder(s.length() + 8); int len = s.length(); // The beginning of the line is the same as the last character being // whitespace. boolean lastWhitespace = true; for (int i = 0; i < len;) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) { sb.append('\\'); sb.append(c); lastWhitespace = false; } else if (Character.isWhitespace(c)) { sb.append(c); lastWhitespace = true; } else { if (escapeKeywords && lastWhitespace) { int reslen = processReservedWords(s, i, sb); if (reslen > 0) { i += reslen; lastWhitespace = false; continue; } } sb.append(c); lastWhitespace = false; } ++i; } return sb.toString(); } /** * Same logic as in {@link #escape(String, boolean, boolean)} but this one * simply checks whether the input string needs escaping at all or not. * * @param s * @param escapeWildcards * @param escapeKeywords * @return */ private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) { int len = s.length(); // The beginning of the line is the same as the last character being // whitespace. boolean lastWhitespace = true; for (int i = 0; i < len;) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) { return true; } else if (Character.isWhitespace(c)) { lastWhitespace = true; } else { if (escapeKeywords && lastWhitespace) { int reslen = processReservedWords(s, i, null); if (reslen > 0) return true; } lastWhitespace = false; } ++i; } return false; } private static final String[] RESERVED_WORDS = { "AND", "\\AND", "OR", "\\OR", "NOT", "\\NOT", }; /** * Lucene reserved words are case-sensitive for its query parser. Therefore * only case-sensitive hits need to be looked for. * * @param s * @param fromIndex * @return length of the reserved word in the input or 0 if no reserved word * in the input */ private static int processReservedWords(String s, int fromIndex, StringBuilder sb) { final int total = RESERVED_WORDS.length; for (int w = 0; w < total; w += 2) { String word = RESERVED_WORDS[w]; int len = word.length(); if (s.regionMatches(false, fromIndex, word, 0, len)) { if (sb != null) { String replacement = RESERVED_WORDS[w+1]; sb.append(replacement); } return len; } } return 0; } /** * Returns a String where those characters that QueryParser expects to be * escaped are escaped by a preceding \. */ public static String escape(String s) { return escape(s, false); } public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) { if (field != null) result.append(field).append(':'); result.append( escape(term, escapeWildcards) ); return result; } public static String escapeTerm(String field, String term, boolean escapeWildcards) { return escapeTerm(field, term, escapeWildcards, new StringBuilder()).toString(); } public static StringBuilder quoteTerm(String field, String term, StringBuilder result) { if (field != null) result.append(field).append(':'); result.append("\""); result.append(term.replaceAll("(\"|\\\\)", "\\\\$0")); result.append("\""); return result; } public static String quoteTerm(String term) { return quoteTerm(null, term, new StringBuilder(term.length()*2)).toString(); } public static String quoteTerm(String field, String term) { return quoteTerm(field, term, new StringBuilder( term.length()*2 + (field != null ? field.length() + 1 : 0)) ).toString(); } private static String join(String withString, String... exps) { if (exps.length == 0) return ""; StringBuilder sb = new StringBuilder(128); for (int i = 0; i < exps.length - 1; ++i) { sb.append(exps[i]).append(withString); } sb.append(exps[exps.length - 1]); return sb.toString(); } public static String and(String exp1, String exp2) { return exp1 + " AND " + exp2; } public static String and(String... exps) { return join(" AND ", exps); } public static String or(String exp1, String exp2) { return exp1 + " OR " + exp2; } public static String or(String... exps) { return join(" OR ", exps); } // public static void main(String[] args) { // System.out.println("esc: " + escape("AND01", true, true)); // System.out.println("esc: " + escape("AND 01", true, true)); // System.out.println("esc: " + escape(" AND 01", true, true)); // } }