1 package org.simantics.db.layer0.genericrelation;
3 import java.util.Collection;
5 import org.simantics.datatypes.literal.GUID;
6 import org.simantics.db.Resource;
9 * This class contains utilities related to queries made into Lucene indexes,
10 * such as escaping search terms.
12 * @author Tuukka Lehtonen
14 public class IndexQueries {
17 * Same as calling {@link #escape(String, boolean, boolean)} with
18 * escapeKeywords set to <code>true</code>.
21 * @param escapeWildcards
22 * @return escaped string
24 public static String escape(String s, boolean escapeWildcards) {
25 return escape(s, escapeWildcards, true);
29 * Returns a String where those characters that QueryParser expects to be
30 * escaped are escaped by a preceding <code>\</code>.
33 * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but
34 * disabled escaping of wildcard characters '*' and '?'. Clients must escape
35 * wildcards themselves to allow use of wildcards in queries.
38 * lucene query to escape
39 * @param escapeWildcards
40 * <code>true</code> to escape also wildcard characters
41 * @param escapeKeywords
42 * <code>true</code> to escape keywords like AND, OR, etc.
43 * @return escaped string
45 public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {
46 if (!needsEscaping(s, escapeWildcards, escapeKeywords))
49 StringBuilder sb = new StringBuilder(s.length() + 8);
51 // The beginning of the line is the same as the last character being
53 boolean lastWhitespace = true;
54 for (int i = 0; i < len;) {
56 // These characters are part of the query syntax and must be escaped
57 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
58 || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
59 || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
62 lastWhitespace = false;
63 } else if (Character.isWhitespace(c)) {
65 lastWhitespace = true;
67 if (escapeKeywords && lastWhitespace) {
68 int reslen = processReservedWords(s, i, sb);
71 lastWhitespace = false;
76 lastWhitespace = false;
84 * Same logic as in {@link #escape(String, boolean, boolean)} but this one
85 * simply checks whether the input string needs escaping at all or not.
88 * @param escapeWildcards
89 * @param escapeKeywords
92 private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {
94 // The beginning of the line is the same as the last character being
96 boolean lastWhitespace = true;
97 for (int i = 0; i < len;) {
99 // These characters are part of the query syntax and must be escaped
100 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
101 || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
102 || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
104 } else if (Character.isWhitespace(c)) {
105 lastWhitespace = true;
107 if (escapeKeywords && lastWhitespace) {
108 int reslen = processReservedWords(s, i, null);
112 lastWhitespace = false;
119 private static final String[] RESERVED_WORDS = {
126 * Lucene reserved words are case-sensitive for its query parser. Therefore
127 * only case-sensitive hits need to be looked for.
131 * @return length of the reserved word in the input or 0 if no reserved word
134 private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {
135 final int total = RESERVED_WORDS.length;
136 for (int w = 0; w < total; w += 2) {
137 String word = RESERVED_WORDS[w];
138 int len = word.length();
139 if (s.regionMatches(false, fromIndex, word, 0, len)) {
141 String replacement = RESERVED_WORDS[w+1];
142 sb.append(replacement);
151 * Returns a String where those characters that QueryParser expects to be
152 * escaped are escaped by a preceding <code>\</code>.
154 public static String escape(String s) {
155 return escape(s, false);
158 public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {
160 result.append(field).append(':');
161 result.append( escape(term, escapeWildcards) );
165 public static String escapeTerm(String field, String term, boolean escapeWildcards) {
166 return escapeTerm(field, term, escapeWildcards, new StringBuilder()).toString();
169 public static StringBuilder quoteTerm(String field, String term, StringBuilder result) {
171 result.append(field).append(':');
173 result.append(term.replaceAll("(\"|\\\\)", "\\\\$0"));
178 public static String quoteTerm(String term) {
179 return quoteTerm(null, term, new StringBuilder(term.length()*2)).toString();
182 public static String quoteTerm(String field, String term) {
183 return quoteTerm(field, term,
186 + (field != null ? field.length() + 1 : 0))
190 public static StringBuilder appendLongTerm(StringBuilder sb, String field, long term) {
191 return sb.append(field).append(':').append(term);
194 public static String longTerm(String field, long term) {
195 return appendLongTerm(new StringBuilder(), field, term).toString();
198 public static StringBuilder appendResourceIdTerm(StringBuilder sb, String field, Resource term) {
199 return appendLongTerm(sb, field, term.getResourceId());
202 public static String resourceIdTerm(String field, Resource term) {
203 return appendLongTerm(new StringBuilder(), field, term.getResourceId()).toString();
206 private static String join(String withString, String... exps) {
207 if (exps.length == 0)
209 StringBuilder sb = new StringBuilder(128);
210 for (int i = 0; i < exps.length - 1; ++i) {
211 sb.append(exps[i]).append(withString);
213 sb.append(exps[exps.length - 1]);
214 return sb.toString();
217 public static String and(String exp1, String exp2) {
218 return exp1 + " AND " + exp2;
221 public static String and(String... exps) {
222 return join(" AND ", exps);
225 public static String or(String exp1, String exp2) {
226 return exp1 + " OR " + exp2;
229 public static String or(String... exps) {
230 return join(" OR ", exps);
233 public static String idFromGUID(GUID guid) {
234 return guid != null ? guid.indexString() : "";
237 public static String toResourceIdString(Resource r, Collection<Resource> rs) {
238 StringBuilder sb = new StringBuilder();
239 sb.append(r.getResourceId());
240 for (Resource rr : rs)
241 sb.append(' ').append(rr.getResourceId());
242 return sb.toString();
245 public static String toResourceIdString(Collection<Resource> rs) {
248 StringBuilder sb = new StringBuilder();
249 boolean first = true;
250 for (Resource rr : rs) {
254 sb.append(rr.getResourceId());
256 return sb.toString();
259 // public static void main(String[] args) {
260 // System.out.println("esc: " + escape("AND01", true, true));
261 // System.out.println("esc: " + escape("AND 01", true, true));
262 // System.out.println("esc: " + escape(" AND 01", true, true));