]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.db.layer0/src/org/simantics/db/layer0/genericrelation/IndexQueries.java
Migrated source code from Simantics SVN
[simantics/platform.git] / bundles / org.simantics.db.layer0 / src / org / simantics / db / layer0 / genericrelation / IndexQueries.java
1 package org.simantics.db.layer0.genericrelation;\r
2 \r
3 \r
4 /**\r
5  * This class contains utilities related to queries made into Lucene indexes,\r
6  * such as escaping search terms.\r
7  * \r
8  * @author Tuukka Lehtonen\r
9  */\r
10 public class IndexQueries {\r
11 \r
12         /**\r
13          * Same as calling {@link #escape(String, boolean, boolean)} with\r
14          * escapeKeywords set to <code>true</code>.\r
15          * \r
16          * @param s\r
17          * @param escapeWildcards\r
18          * @return escaped string\r
19          */\r
20         public static String escape(String s, boolean escapeWildcards) {\r
21                 return escape(s, escapeWildcards, true);\r
22         }\r
23 \r
24         /**\r
25          * Returns a String where those characters that QueryParser expects to be\r
26          * escaped are escaped by a preceding <code>\</code>.\r
27          * \r
28          * Copied from\r
29          * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but\r
30          * disabled escaping of wildcard characters '*' and '?'. Clients must escape\r
31          * wildcards themselves to allow use of wildcards in queries.\r
32          * \r
33          * @param s\r
34          *            lucene query to escape\r
35          * @param escapeWildcards\r
36          *            <code>true</code> to escape also wildcard characters\r
37          * @param escapeKeywords\r
38          *            <code>true</code> to escape keywords like AND, OR, etc.\r
39          * @return escaped string\r
40          */\r
41         public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {\r
42                 if (!needsEscaping(s, escapeWildcards, escapeKeywords))\r
43                         return s;\r
44 \r
45                 StringBuilder sb = new StringBuilder(s.length() + 8);\r
46                 int len = s.length();\r
47                 // The beginning of the line is the same as the last character being\r
48                 // whitespace.\r
49                 boolean lastWhitespace = true;\r
50                 for (int i = 0; i < len;) {\r
51                         char c = s.charAt(i);\r
52                         // These characters are part of the query syntax and must be escaped\r
53                         if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'\r
54                                         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'\r
55                                         || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {\r
56                                 sb.append('\\');\r
57                                 sb.append(c);\r
58                                 lastWhitespace = false;\r
59                         } else if (Character.isWhitespace(c)) {\r
60                                 sb.append(c);\r
61                                 lastWhitespace = true;\r
62                         } else {\r
63                                 if (escapeKeywords && lastWhitespace) {\r
64                                         int reslen = processReservedWords(s, i, sb);\r
65                                         if (reslen > 0) {\r
66                                                 i += reslen;\r
67                                                 lastWhitespace = false;\r
68                                                 continue;\r
69                                         }\r
70                                 }\r
71                                 sb.append(c);\r
72                                 lastWhitespace = false;\r
73                         }\r
74                         ++i;\r
75                 }\r
76                 return sb.toString();\r
77         }\r
78 \r
79         /**\r
80          * Same logic as in {@link #escape(String, boolean, boolean)} but this one\r
81          * simply checks whether the input string needs escaping at all or not.\r
82          * \r
83          * @param s\r
84          * @param escapeWildcards\r
85          * @param escapeKeywords\r
86          * @return\r
87          */\r
88         private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {\r
89                 int len = s.length();\r
90                 // The beginning of the line is the same as the last character being\r
91                 // whitespace.\r
92                 boolean lastWhitespace = true;\r
93                 for (int i = 0; i < len;) {\r
94                         char c = s.charAt(i);\r
95                         // These characters are part of the query syntax and must be escaped\r
96                         if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'\r
97                                         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'\r
98                                         || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {\r
99                                 return true;\r
100                         } else if (Character.isWhitespace(c)) {\r
101                                 lastWhitespace = true;\r
102                         } else {\r
103                                 if (escapeKeywords && lastWhitespace) {\r
104                                         int reslen = processReservedWords(s, i, null);\r
105                                         if (reslen > 0)\r
106                                                 return true;\r
107                                 }\r
108                                 lastWhitespace = false;\r
109                         }\r
110                         ++i;\r
111                 }\r
112                 return false;\r
113         }\r
114 \r
115         private static final String[] RESERVED_WORDS = {\r
116                 "AND", "and",\r
117                 "OR", "or"\r
118         };\r
119 \r
120         /**\r
121          * Lucene reserved words are case-sensitive for its query parser. Therefore\r
122          * only case-sensitive hits need to be looked for.\r
123          * \r
124          * @param s\r
125          * @param fromIndex\r
126          * @return length of the reserved word in the input or 0 if no reserved word\r
127          *         in the input\r
128          */\r
129         private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {\r
130                 final int total = RESERVED_WORDS.length;\r
131                 for (int w = 0; w < total; w += 2) {\r
132                         String word = RESERVED_WORDS[w];\r
133                         int len = word.length();\r
134                         if (s.regionMatches(false, fromIndex, word, 0, len)) {\r
135                                 if (sb != null) {\r
136                                         String replacement = RESERVED_WORDS[w+1];\r
137                                         sb.append(replacement);\r
138                                 }\r
139                                 return len;\r
140                         }\r
141                 }\r
142                 return 0;\r
143         }\r
144 \r
145         /**\r
146          * Returns a String where those characters that QueryParser expects to be\r
147          * escaped are escaped by a preceding <code>\</code>.\r
148          */\r
149         public static String escape(String s) {\r
150                 return escape(s, false);\r
151         }\r
152 \r
153         public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {\r
154                 if (field != null)\r
155                         result.append(field).append(':');\r
156                 result.append( escape(term, escapeWildcards) );\r
157                 return result;\r
158         }\r
159 \r
160         public static String escapeTerm(String field, String term, boolean escapeWildcards) {\r
161                 StringBuilder sb = new StringBuilder();\r
162                 return escapeTerm(field, term, escapeWildcards, sb).toString();\r
163         }\r
164 \r
165 //      public static void main(String[] args) {\r
166 //              System.out.println("esc: " + escape("AND01", true, true));\r
167 //              System.out.println("esc: " + escape("AND 01", true, true));\r
168 //              System.out.println("esc: " + escape(" AND 01", true, true));\r
169 //      }\r
170 \r
171 }\r