--- /dev/null
+package org.simantics.db.layer0.genericrelation;\r
+\r
+\r
+/**\r
+ * This class contains utilities related to queries made into Lucene indexes,\r
+ * such as escaping search terms.\r
+ * \r
+ * @author Tuukka Lehtonen\r
+ */\r
+public class IndexQueries {\r
+\r
+ /**\r
+ * Same as calling {@link #escape(String, boolean, boolean)} with\r
+ * escapeKeywords set to <code>true</code>.\r
+ * \r
+ * @param s\r
+ * @param escapeWildcards\r
+ * @return escaped string\r
+ */\r
+ public static String escape(String s, boolean escapeWildcards) {\r
+ return escape(s, escapeWildcards, true);\r
+ }\r
+\r
+ /**\r
+ * Returns a String where those characters that QueryParser expects to be\r
+ * escaped are escaped by a preceding <code>\</code>.\r
+ * \r
+ * Copied from\r
+ * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but\r
+ * disabled escaping of wildcard characters '*' and '?'. Clients must escape\r
+ * wildcards themselves to allow use of wildcards in queries.\r
+ * \r
+ * @param s\r
+ * lucene query to escape\r
+ * @param escapeWildcards\r
+ * <code>true</code> to escape also wildcard characters\r
+ * @param escapeKeywords\r
+ * <code>true</code> to escape keywords like AND, OR, etc.\r
+ * @return escaped string\r
+ */\r
+ public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {\r
+ if (!needsEscaping(s, escapeWildcards, escapeKeywords))\r
+ return s;\r
+\r
+ StringBuilder sb = new StringBuilder(s.length() + 8);\r
+ int len = s.length();\r
+ // The beginning of the line is the same as the last character being\r
+ // whitespace.\r
+ boolean lastWhitespace = true;\r
+ for (int i = 0; i < len;) {\r
+ char c = s.charAt(i);\r
+ // These characters are part of the query syntax and must be escaped\r
+ if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'\r
+ || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'\r
+ || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {\r
+ sb.append('\\');\r
+ sb.append(c);\r
+ lastWhitespace = false;\r
+ } else if (Character.isWhitespace(c)) {\r
+ sb.append(c);\r
+ lastWhitespace = true;\r
+ } else {\r
+ if (escapeKeywords && lastWhitespace) {\r
+ int reslen = processReservedWords(s, i, sb);\r
+ if (reslen > 0) {\r
+ i += reslen;\r
+ lastWhitespace = false;\r
+ continue;\r
+ }\r
+ }\r
+ sb.append(c);\r
+ lastWhitespace = false;\r
+ }\r
+ ++i;\r
+ }\r
+ return sb.toString();\r
+ }\r
+\r
+ /**\r
+ * Same logic as in {@link #escape(String, boolean, boolean)} but this one\r
+ * simply checks whether the input string needs escaping at all or not.\r
+ * \r
+ * @param s\r
+ * @param escapeWildcards\r
+ * @param escapeKeywords\r
+ * @return\r
+ */\r
+ private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {\r
+ int len = s.length();\r
+ // The beginning of the line is the same as the last character being\r
+ // whitespace.\r
+ boolean lastWhitespace = true;\r
+ for (int i = 0; i < len;) {\r
+ char c = s.charAt(i);\r
+ // These characters are part of the query syntax and must be escaped\r
+ if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'\r
+ || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'\r
+ || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {\r
+ return true;\r
+ } else if (Character.isWhitespace(c)) {\r
+ lastWhitespace = true;\r
+ } else {\r
+ if (escapeKeywords && lastWhitespace) {\r
+ int reslen = processReservedWords(s, i, null);\r
+ if (reslen > 0)\r
+ return true;\r
+ }\r
+ lastWhitespace = false;\r
+ }\r
+ ++i;\r
+ }\r
+ return false;\r
+ }\r
+\r
+ private static final String[] RESERVED_WORDS = {\r
+ "AND", "and",\r
+ "OR", "or"\r
+ };\r
+\r
+ /**\r
+ * Lucene reserved words are case-sensitive for its query parser. Therefore\r
+ * only case-sensitive hits need to be looked for.\r
+ * \r
+ * @param s\r
+ * @param fromIndex\r
+ * @return length of the reserved word in the input or 0 if no reserved word\r
+ * in the input\r
+ */\r
+ private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {\r
+ final int total = RESERVED_WORDS.length;\r
+ for (int w = 0; w < total; w += 2) {\r
+ String word = RESERVED_WORDS[w];\r
+ int len = word.length();\r
+ if (s.regionMatches(false, fromIndex, word, 0, len)) {\r
+ if (sb != null) {\r
+ String replacement = RESERVED_WORDS[w+1];\r
+ sb.append(replacement);\r
+ }\r
+ return len;\r
+ }\r
+ }\r
+ return 0;\r
+ }\r
+\r
+ /**\r
+ * Returns a String where those characters that QueryParser expects to be\r
+ * escaped are escaped by a preceding <code>\</code>.\r
+ */\r
+ public static String escape(String s) {\r
+ return escape(s, false);\r
+ }\r
+\r
+ public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {\r
+ if (field != null)\r
+ result.append(field).append(':');\r
+ result.append( escape(term, escapeWildcards) );\r
+ return result;\r
+ }\r
+\r
+ public static String escapeTerm(String field, String term, boolean escapeWildcards) {\r
+ StringBuilder sb = new StringBuilder();\r
+ return escapeTerm(field, term, escapeWildcards, sb).toString();\r
+ }\r
+\r
+// public static void main(String[] args) {\r
+// System.out.println("esc: " + escape("AND01", true, true));\r
+// System.out.println("esc: " + escape("AND 01", true, true));\r
+// System.out.println("esc: " + escape(" AND 01", true, true));\r
+// }\r
+\r
+}\r