package org.simantics.db.layer0.genericrelation; /** * This class contains utilities related to queries made into Lucene indexes, * such as escaping search terms. * * @author Tuukka Lehtonen */ public class IndexQueries { /** * Same as calling {@link #escape(String, boolean, boolean)} with * escapeKeywords set to true. * * @param s * @param escapeWildcards * @return escaped string */ public static String escape(String s, boolean escapeWildcards) { return escape(s, escapeWildcards, true); } /** * Returns a String where those characters that QueryParser expects to be * escaped are escaped by a preceding \. * * Copied from * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but * disabled escaping of wildcard characters '*' and '?'. Clients must escape * wildcards themselves to allow use of wildcards in queries. * * @param s * lucene query to escape * @param escapeWildcards * true to escape also wildcard characters * @param escapeKeywords * true to escape keywords like AND, OR, etc. * @return escaped string */ public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) { if (!needsEscaping(s, escapeWildcards, escapeKeywords)) return s; StringBuilder sb = new StringBuilder(s.length() + 8); int len = s.length(); // The beginning of the line is the same as the last character being // whitespace. boolean lastWhitespace = true; for (int i = 0; i < len;) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) { sb.append('\\'); sb.append(c); lastWhitespace = false; } else if (Character.isWhitespace(c)) { sb.append(c); lastWhitespace = true; } else { if (escapeKeywords && lastWhitespace) { int reslen = processReservedWords(s, i, sb); if (reslen > 0) { i += reslen; lastWhitespace = false; continue; } } sb.append(c); lastWhitespace = false; } ++i; } return sb.toString(); } /** * Same logic as in {@link #escape(String, boolean, boolean)} but this one * simply checks whether the input string needs escaping at all or not. * * @param s * @param escapeWildcards * @param escapeKeywords * @return */ private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) { int len = s.length(); // The beginning of the line is the same as the last character being // whitespace. boolean lastWhitespace = true; for (int i = 0; i < len;) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) { return true; } else if (Character.isWhitespace(c)) { lastWhitespace = true; } else { if (escapeKeywords && lastWhitespace) { int reslen = processReservedWords(s, i, null); if (reslen > 0) return true; } lastWhitespace = false; } ++i; } return false; } private static final String[] RESERVED_WORDS = { "AND", "and", "OR", "or" }; /** * Lucene reserved words are case-sensitive for its query parser. Therefore * only case-sensitive hits need to be looked for. * * @param s * @param fromIndex * @return length of the reserved word in the input or 0 if no reserved word * in the input */ private static int processReservedWords(String s, int fromIndex, StringBuilder sb) { final int total = RESERVED_WORDS.length; for (int w = 0; w < total; w += 2) { String word = RESERVED_WORDS[w]; int len = word.length(); if (s.regionMatches(false, fromIndex, word, 0, len)) { if (sb != null) { String replacement = RESERVED_WORDS[w+1]; sb.append(replacement); } return len; } } return 0; } /** * Returns a String where those characters that QueryParser expects to be * escaped are escaped by a preceding \. */ public static String escape(String s) { return escape(s, false); } public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) { if (field != null) result.append(field).append(':'); result.append( escape(term, escapeWildcards) ); return result; } public static String escapeTerm(String field, String term, boolean escapeWildcards) { StringBuilder sb = new StringBuilder(); return escapeTerm(field, term, escapeWildcards, sb).toString(); } // public static void main(String[] args) { // System.out.println("esc: " + escape("AND01", true, true)); // System.out.println("esc: " + escape("AND 01", true, true)); // System.out.println("esc: " + escape(" AND 01", true, true)); // } }