]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.db.layer0/src/org/simantics/db/layer0/genericrelation/IndexQueries.java
Fail safe import fixes made by Antti
[simantics/platform.git] / bundles / org.simantics.db.layer0 / src / org / simantics / db / layer0 / genericrelation / IndexQueries.java
1 package org.simantics.db.layer0.genericrelation;
2
3
4 /**
5  * This class contains utilities related to queries made into Lucene indexes,
6  * such as escaping search terms.
7  * 
8  * @author Tuukka Lehtonen
9  */
10 public class IndexQueries {
11
12         /**
13          * Same as calling {@link #escape(String, boolean, boolean)} with
14          * escapeKeywords set to <code>true</code>.
15          * 
16          * @param s
17          * @param escapeWildcards
18          * @return escaped string
19          */
20         public static String escape(String s, boolean escapeWildcards) {
21                 return escape(s, escapeWildcards, true);
22         }
23
24         /**
25          * Returns a String where those characters that QueryParser expects to be
26          * escaped are escaped by a preceding <code>\</code>.
27          * 
28          * Copied from
29          * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but
30          * disabled escaping of wildcard characters '*' and '?'. Clients must escape
31          * wildcards themselves to allow use of wildcards in queries.
32          * 
33          * @param s
34          *            lucene query to escape
35          * @param escapeWildcards
36          *            <code>true</code> to escape also wildcard characters
37          * @param escapeKeywords
38          *            <code>true</code> to escape keywords like AND, OR, etc.
39          * @return escaped string
40          */
41         public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {
42                 if (!needsEscaping(s, escapeWildcards, escapeKeywords))
43                         return s;
44
45                 StringBuilder sb = new StringBuilder(s.length() + 8);
46                 int len = s.length();
47                 // The beginning of the line is the same as the last character being
48                 // whitespace.
49                 boolean lastWhitespace = true;
50                 for (int i = 0; i < len;) {
51                         char c = s.charAt(i);
52                         // These characters are part of the query syntax and must be escaped
53                         if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
54                                         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
55                                         || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {
56                                 sb.append('\\');
57                                 sb.append(c);
58                                 lastWhitespace = false;
59                         } else if (Character.isWhitespace(c)) {
60                                 sb.append(c);
61                                 lastWhitespace = true;
62                         } else {
63                                 if (escapeKeywords && lastWhitespace) {
64                                         int reslen = processReservedWords(s, i, sb);
65                                         if (reslen > 0) {
66                                                 i += reslen;
67                                                 lastWhitespace = false;
68                                                 continue;
69                                         }
70                                 }
71                                 sb.append(c);
72                                 lastWhitespace = false;
73                         }
74                         ++i;
75                 }
76                 return sb.toString();
77         }
78
79         /**
80          * Same logic as in {@link #escape(String, boolean, boolean)} but this one
81          * simply checks whether the input string needs escaping at all or not.
82          * 
83          * @param s
84          * @param escapeWildcards
85          * @param escapeKeywords
86          * @return
87          */
88         private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {
89                 int len = s.length();
90                 // The beginning of the line is the same as the last character being
91                 // whitespace.
92                 boolean lastWhitespace = true;
93                 for (int i = 0; i < len;) {
94                         char c = s.charAt(i);
95                         // These characters are part of the query syntax and must be escaped
96                         if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
97                                         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
98                                         || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {
99                                 return true;
100                         } else if (Character.isWhitespace(c)) {
101                                 lastWhitespace = true;
102                         } else {
103                                 if (escapeKeywords && lastWhitespace) {
104                                         int reslen = processReservedWords(s, i, null);
105                                         if (reslen > 0)
106                                                 return true;
107                                 }
108                                 lastWhitespace = false;
109                         }
110                         ++i;
111                 }
112                 return false;
113         }
114
115         private static final String[] RESERVED_WORDS = {
116                 "AND", "and",
117                 "OR", "or"
118         };
119
120         /**
121          * Lucene reserved words are case-sensitive for its query parser. Therefore
122          * only case-sensitive hits need to be looked for.
123          * 
124          * @param s
125          * @param fromIndex
126          * @return length of the reserved word in the input or 0 if no reserved word
127          *         in the input
128          */
129         private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {
130                 final int total = RESERVED_WORDS.length;
131                 for (int w = 0; w < total; w += 2) {
132                         String word = RESERVED_WORDS[w];
133                         int len = word.length();
134                         if (s.regionMatches(false, fromIndex, word, 0, len)) {
135                                 if (sb != null) {
136                                         String replacement = RESERVED_WORDS[w+1];
137                                         sb.append(replacement);
138                                 }
139                                 return len;
140                         }
141                 }
142                 return 0;
143         }
144
145         /**
146          * Returns a String where those characters that QueryParser expects to be
147          * escaped are escaped by a preceding <code>\</code>.
148          */
149         public static String escape(String s) {
150                 return escape(s, false);
151         }
152
153         public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {
154                 if (field != null)
155                         result.append(field).append(':');
156                 result.append( escape(term, escapeWildcards) );
157                 return result;
158         }
159
160         public static String escapeTerm(String field, String term, boolean escapeWildcards) {
161                 StringBuilder sb = new StringBuilder();
162                 return escapeTerm(field, term, escapeWildcards, sb).toString();
163         }
164
165 //      public static void main(String[] args) {
166 //              System.out.println("esc: " + escape("AND01", true, true));
167 //              System.out.println("esc: " + escape("AND 01", true, true));
168 //              System.out.println("esc: " + escape(" AND 01", true, true));
169 //      }
170
171 }