]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.db.layer0/src/org/simantics/db/layer0/genericrelation/IndexQueries.java
Migrated source code from Simantics SVN
[simantics/platform.git] / bundles / org.simantics.db.layer0 / src / org / simantics / db / layer0 / genericrelation / IndexQueries.java
diff --git a/bundles/org.simantics.db.layer0/src/org/simantics/db/layer0/genericrelation/IndexQueries.java b/bundles/org.simantics.db.layer0/src/org/simantics/db/layer0/genericrelation/IndexQueries.java
new file mode 100644 (file)
index 0000000..851497d
--- /dev/null
@@ -0,0 +1,171 @@
+package org.simantics.db.layer0.genericrelation;\r
+\r
+\r
+/**\r
+ * This class contains utilities related to queries made into Lucene indexes,\r
+ * such as escaping search terms.\r
+ * \r
+ * @author Tuukka Lehtonen\r
+ */\r
+public class IndexQueries {\r
+\r
+       /**\r
+        * Same as calling {@link #escape(String, boolean, boolean)} with\r
+        * escapeKeywords set to <code>true</code>.\r
+        * \r
+        * @param s\r
+        * @param escapeWildcards\r
+        * @return escaped string\r
+        */\r
+       public static String escape(String s, boolean escapeWildcards) {\r
+               return escape(s, escapeWildcards, true);\r
+       }\r
+\r
+       /**\r
+        * Returns a String where those characters that QueryParser expects to be\r
+        * escaped are escaped by a preceding <code>\</code>.\r
+        * \r
+        * Copied from\r
+        * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but\r
+        * disabled escaping of wildcard characters '*' and '?'. Clients must escape\r
+        * wildcards themselves to allow use of wildcards in queries.\r
+        * \r
+        * @param s\r
+        *            lucene query to escape\r
+        * @param escapeWildcards\r
+        *            <code>true</code> to escape also wildcard characters\r
+        * @param escapeKeywords\r
+        *            <code>true</code> to escape keywords like AND, OR, etc.\r
+        * @return escaped string\r
+        */\r
+       public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {\r
+               if (!needsEscaping(s, escapeWildcards, escapeKeywords))\r
+                       return s;\r
+\r
+               StringBuilder sb = new StringBuilder(s.length() + 8);\r
+               int len = s.length();\r
+               // The beginning of the line is the same as the last character being\r
+               // whitespace.\r
+               boolean lastWhitespace = true;\r
+               for (int i = 0; i < len;) {\r
+                       char c = s.charAt(i);\r
+                       // These characters are part of the query syntax and must be escaped\r
+                       if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'\r
+                                       || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'\r
+                                       || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {\r
+                               sb.append('\\');\r
+                               sb.append(c);\r
+                               lastWhitespace = false;\r
+                       } else if (Character.isWhitespace(c)) {\r
+                               sb.append(c);\r
+                               lastWhitespace = true;\r
+                       } else {\r
+                               if (escapeKeywords && lastWhitespace) {\r
+                                       int reslen = processReservedWords(s, i, sb);\r
+                                       if (reslen > 0) {\r
+                                               i += reslen;\r
+                                               lastWhitespace = false;\r
+                                               continue;\r
+                                       }\r
+                               }\r
+                               sb.append(c);\r
+                               lastWhitespace = false;\r
+                       }\r
+                       ++i;\r
+               }\r
+               return sb.toString();\r
+       }\r
+\r
+       /**\r
+        * Same logic as in {@link #escape(String, boolean, boolean)} but this one\r
+        * simply checks whether the input string needs escaping at all or not.\r
+        * \r
+        * @param s\r
+        * @param escapeWildcards\r
+        * @param escapeKeywords\r
+        * @return\r
+        */\r
+       private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {\r
+               int len = s.length();\r
+               // The beginning of the line is the same as the last character being\r
+               // whitespace.\r
+               boolean lastWhitespace = true;\r
+               for (int i = 0; i < len;) {\r
+                       char c = s.charAt(i);\r
+                       // These characters are part of the query syntax and must be escaped\r
+                       if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'\r
+                                       || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'\r
+                                       || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {\r
+                               return true;\r
+                       } else if (Character.isWhitespace(c)) {\r
+                               lastWhitespace = true;\r
+                       } else {\r
+                               if (escapeKeywords && lastWhitespace) {\r
+                                       int reslen = processReservedWords(s, i, null);\r
+                                       if (reslen > 0)\r
+                                               return true;\r
+                               }\r
+                               lastWhitespace = false;\r
+                       }\r
+                       ++i;\r
+               }\r
+               return false;\r
+       }\r
+\r
+       private static final String[] RESERVED_WORDS = {\r
+               "AND", "and",\r
+               "OR", "or"\r
+       };\r
+\r
+       /**\r
+        * Lucene reserved words are case-sensitive for its query parser. Therefore\r
+        * only case-sensitive hits need to be looked for.\r
+        * \r
+        * @param s\r
+        * @param fromIndex\r
+        * @return length of the reserved word in the input or 0 if no reserved word\r
+        *         in the input\r
+        */\r
+       private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {\r
+               final int total = RESERVED_WORDS.length;\r
+               for (int w = 0; w < total; w += 2) {\r
+                       String word = RESERVED_WORDS[w];\r
+                       int len = word.length();\r
+                       if (s.regionMatches(false, fromIndex, word, 0, len)) {\r
+                               if (sb != null) {\r
+                                       String replacement = RESERVED_WORDS[w+1];\r
+                                       sb.append(replacement);\r
+                               }\r
+                               return len;\r
+                       }\r
+               }\r
+               return 0;\r
+       }\r
+\r
+       /**\r
+        * Returns a String where those characters that QueryParser expects to be\r
+        * escaped are escaped by a preceding <code>\</code>.\r
+        */\r
+       public static String escape(String s) {\r
+               return escape(s, false);\r
+       }\r
+\r
+       public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {\r
+               if (field != null)\r
+                       result.append(field).append(':');\r
+               result.append( escape(term, escapeWildcards) );\r
+               return result;\r
+       }\r
+\r
+       public static String escapeTerm(String field, String term, boolean escapeWildcards) {\r
+               StringBuilder sb = new StringBuilder();\r
+               return escapeTerm(field, term, escapeWildcards, sb).toString();\r
+       }\r
+\r
+//     public static void main(String[] args) {\r
+//             System.out.println("esc: " + escape("AND01", true, true));\r
+//             System.out.println("esc: " + escape("AND 01", true, true));\r
+//             System.out.println("esc: " + escape(" AND 01", true, true));\r
+//     }\r
+\r
+}\r