1 package org.simantics.db.layer0.genericrelation;
5 * This class contains utilities related to queries made into Lucene indexes,
6 * such as escaping search terms.
8 * @author Tuukka Lehtonen
10 public class IndexQueries {
13 * Same as calling {@link #escape(String, boolean, boolean)} with
14 * escapeKeywords set to <code>true</code>.
17 * @param escapeWildcards
18 * @return escaped string
20 public static String escape(String s, boolean escapeWildcards) {
21 return escape(s, escapeWildcards, true);
25 * Returns a String where those characters that QueryParser expects to be
26 * escaped are escaped by a preceding <code>\</code>.
29 * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but
30 * disabled escaping of wildcard characters '*' and '?'. Clients must escape
31 * wildcards themselves to allow use of wildcards in queries.
34 * lucene query to escape
35 * @param escapeWildcards
36 * <code>true</code> to escape also wildcard characters
37 * @param escapeKeywords
38 * <code>true</code> to escape keywords like AND, OR, etc.
39 * @return escaped string
41 public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {
42 if (!needsEscaping(s, escapeWildcards, escapeKeywords))
45 StringBuilder sb = new StringBuilder(s.length() + 8);
47 // The beginning of the line is the same as the last character being
49 boolean lastWhitespace = true;
50 for (int i = 0; i < len;) {
52 // These characters are part of the query syntax and must be escaped
53 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
54 || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
55 || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
58 lastWhitespace = false;
59 } else if (Character.isWhitespace(c)) {
61 lastWhitespace = true;
63 if (escapeKeywords && lastWhitespace) {
64 int reslen = processReservedWords(s, i, sb);
67 lastWhitespace = false;
72 lastWhitespace = false;
80 * Same logic as in {@link #escape(String, boolean, boolean)} but this one
81 * simply checks whether the input string needs escaping at all or not.
84 * @param escapeWildcards
85 * @param escapeKeywords
88 private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {
90 // The beginning of the line is the same as the last character being
92 boolean lastWhitespace = true;
93 for (int i = 0; i < len;) {
95 // These characters are part of the query syntax and must be escaped
96 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
97 || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
98 || c == '|' || c == '&' || c == '/' || c == ' ' || (escapeWildcards && (c == '*' || c == '?'))) {
100 } else if (Character.isWhitespace(c)) {
101 lastWhitespace = true;
103 if (escapeKeywords && lastWhitespace) {
104 int reslen = processReservedWords(s, i, null);
108 lastWhitespace = false;
115 private static final String[] RESERVED_WORDS = {
122 * Lucene reserved words are case-sensitive for its query parser. Therefore
123 * only case-sensitive hits need to be looked for.
127 * @return length of the reserved word in the input or 0 if no reserved word
130 private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {
131 final int total = RESERVED_WORDS.length;
132 for (int w = 0; w < total; w += 2) {
133 String word = RESERVED_WORDS[w];
134 int len = word.length();
135 if (s.regionMatches(false, fromIndex, word, 0, len)) {
137 String replacement = RESERVED_WORDS[w+1];
138 sb.append(replacement);
147 * Returns a String where those characters that QueryParser expects to be
148 * escaped are escaped by a preceding <code>\</code>.
150 public static String escape(String s) {
151 return escape(s, false);
154 public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {
156 result.append(field).append(':');
157 result.append( escape(term, escapeWildcards) );
161 public static String escapeTerm(String field, String term, boolean escapeWildcards) {
162 return escapeTerm(field, term, escapeWildcards, new StringBuilder()).toString();
165 public static StringBuilder quoteTerm(String field, String term, StringBuilder result) {
167 result.append(field).append(':');
169 result.append(term.replaceAll("(\"|\\\\)", "\\\\$0"));
174 public static String quoteTerm(String term) {
175 return quoteTerm(null, term, new StringBuilder(term.length()*2)).toString();
178 public static String quoteTerm(String field, String term) {
179 return quoteTerm(field, term,
182 + (field != null ? field.length() + 1 : 0))
186 private static String join(String withString, String... exps) {
187 if (exps.length == 0)
189 StringBuilder sb = new StringBuilder(128);
190 for (int i = 0; i < exps.length - 1; ++i) {
191 sb.append(exps[i]).append(withString);
193 sb.append(exps[exps.length - 1]);
194 return sb.toString();
197 public static String and(String exp1, String exp2) {
198 return exp1 + " AND " + exp2;
201 public static String and(String... exps) {
202 return join(" AND ", exps);
205 public static String or(String exp1, String exp2) {
206 return exp1 + " OR " + exp2;
209 public static String or(String... exps) {
210 return join(" OR ", exps);
213 // public static void main(String[] args) {
214 // System.out.println("esc: " + escape("AND01", true, true));
215 // System.out.println("esc: " + escape("AND 01", true, true));
216 // System.out.println("esc: " + escape(" AND 01", true, true));