Index query fixes after commit 5e340942
[simantics/platform.git] / bundles / org.simantics.db.layer0 / src / org / simantics / db / layer0 / genericrelation / IndexQueries.java
1 package org.simantics.db.layer0.genericrelation;
2
3
4 /**
5  * This class contains utilities related to queries made into Lucene indexes,
6  * such as escaping search terms.
7  * 
8  * @author Tuukka Lehtonen
9  */
10 public class IndexQueries {
11
12         /**
13          * Same as calling {@link #escape(String, boolean, boolean)} with
14          * escapeKeywords set to <code>true</code>.
15          * 
16          * @param s
17          * @param escapeWildcards
18          * @return escaped string
19          */
20         public static String escape(String s, boolean escapeWildcards) {
21                 return escape(s, escapeWildcards, true);
22         }
23
24         /**
25          * Returns a String where those characters that QueryParser expects to be
26          * escaped are escaped by a preceding <code>\</code>.
27          * 
28          * Copied from
29          * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but
30          * disabled escaping of wildcard characters '*' and '?'. Clients must escape
31          * wildcards themselves to allow use of wildcards in queries.
32          * 
33          * @param s
34          *            lucene query to escape
35          * @param escapeWildcards
36          *            <code>true</code> to escape also wildcard characters
37          * @param escapeKeywords
38          *            <code>true</code> to escape keywords like AND, OR, etc.
39          * @return escaped string
40          */
41         public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {
42                 if (!needsEscaping(s, escapeWildcards, escapeKeywords))
43                         return s;
44
45                 StringBuilder sb = new StringBuilder(s.length() + 8);
46                 int len = s.length();
47                 // The beginning of the line is the same as the last character being
48                 // whitespace.
49                 boolean lastWhitespace = true;
50                 for (int i = 0; i < len;) {
51                         char c = s.charAt(i);
52                         // These characters are part of the query syntax and must be escaped
53                         if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
54                                         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
55                                         || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
56                                 sb.append('\\');
57                                 sb.append(c);
58                                 lastWhitespace = false;
59                         } else if (Character.isWhitespace(c)) {
60                                 sb.append(c);
61                                 lastWhitespace = true;
62                         } else {
63                                 if (escapeKeywords && lastWhitespace) {
64                                         int reslen = processReservedWords(s, i, sb);
65                                         if (reslen > 0) {
66                                                 i += reslen;
67                                                 lastWhitespace = false;
68                                                 continue;
69                                         }
70                                 }
71                                 sb.append(c);
72                                 lastWhitespace = false;
73                         }
74                         ++i;
75                 }
76                 return sb.toString();
77         }
78
79         /**
80          * Same logic as in {@link #escape(String, boolean, boolean)} but this one
81          * simply checks whether the input string needs escaping at all or not.
82          * 
83          * @param s
84          * @param escapeWildcards
85          * @param escapeKeywords
86          * @return
87          */
88         private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {
89                 int len = s.length();
90                 // The beginning of the line is the same as the last character being
91                 // whitespace.
92                 boolean lastWhitespace = true;
93                 for (int i = 0; i < len;) {
94                         char c = s.charAt(i);
95                         // These characters are part of the query syntax and must be escaped
96                         if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
97                                         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
98                                         || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
99                                 return true;
100                         } else if (Character.isWhitespace(c)) {
101                                 lastWhitespace = true;
102                         } else {
103                                 if (escapeKeywords && lastWhitespace) {
104                                         int reslen = processReservedWords(s, i, null);
105                                         if (reslen > 0)
106                                                 return true;
107                                 }
108                                 lastWhitespace = false;
109                         }
110                         ++i;
111                 }
112                 return false;
113         }
114
115         private static final String[] RESERVED_WORDS = {
116                 "AND", "\\AND",
117                 "OR", "\\OR",
118                 "NOT", "\\NOT",
119         };
120
121         /**
122          * Lucene reserved words are case-sensitive for its query parser. Therefore
123          * only case-sensitive hits need to be looked for.
124          * 
125          * @param s
126          * @param fromIndex
127          * @return length of the reserved word in the input or 0 if no reserved word
128          *         in the input
129          */
130         private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {
131                 final int total = RESERVED_WORDS.length;
132                 for (int w = 0; w < total; w += 2) {
133                         String word = RESERVED_WORDS[w];
134                         int len = word.length();
135                         if (s.regionMatches(false, fromIndex, word, 0, len)) {
136                                 if (sb != null) {
137                                         String replacement = RESERVED_WORDS[w+1];
138                                         sb.append(replacement);
139                                 }
140                                 return len;
141                         }
142                 }
143                 return 0;
144         }
145
146         /**
147          * Returns a String where those characters that QueryParser expects to be
148          * escaped are escaped by a preceding <code>\</code>.
149          */
150         public static String escape(String s) {
151                 return escape(s, false);
152         }
153
154         public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {
155                 if (field != null)
156                         result.append(field).append(':');
157                 result.append( escape(term, escapeWildcards) );
158                 return result;
159         }
160
161         public static String escapeTerm(String field, String term, boolean escapeWildcards) {
162                 return escapeTerm(field, term, escapeWildcards, new StringBuilder()).toString();
163         }
164
165         public static StringBuilder quoteTerm(String field, String term, StringBuilder result) {
166                 if (field != null)
167                         result.append(field).append(':');
168                 result.append("\"");
169                 result.append(term.replaceAll("(\"|\\\\)", "\\\\$0"));
170                 result.append("\"");
171                 return result;
172         }
173
174         public static String quoteTerm(String term) {
175                 return quoteTerm(null, term, new StringBuilder(term.length()*2)).toString();
176         }
177
178         public static String quoteTerm(String field, String term) {
179                 return quoteTerm(field, term,
180                                 new StringBuilder(
181                                                 term.length()*2
182                                                 + (field != null ? field.length() + 1 : 0))
183                                 ).toString();
184         }
185
186         private static String join(String withString, String... exps) {
187                 if (exps.length == 0)
188                         return "";
189                 StringBuilder sb = new StringBuilder(128);
190                 for (int i = 0; i < exps.length - 1; ++i) {
191                         sb.append(exps[i]).append(withString);
192                 }
193                 sb.append(exps[exps.length - 1]);
194                 return sb.toString();
195         }
196
197         public static String and(String exp1, String exp2) {
198                 return exp1 + " AND " + exp2;
199         }
200
201         public static String and(String... exps) {
202                 return join(" AND ", exps);
203         }
204
205         public static String or(String exp1, String exp2) {
206                 return exp1 + " OR " + exp2;
207         }
208
209         public static String or(String... exps) {
210                 return join(" OR ", exps);
211         }
212
213 //      public static void main(String[] args) {
214 //              System.out.println("esc: " + escape("AND01", true, true));
215 //              System.out.println("esc: " + escape("AND 01", true, true));
216 //              System.out.println("esc: " + escape(" AND 01", true, true));
217 //      }
218
219 }