1 package org.simantics.db.layer0.genericrelation;
\r
5 * This class contains utilities related to queries made into Lucene indexes,
\r
6 * such as escaping search terms.
\r
8 * @author Tuukka Lehtonen
\r
10 public class IndexQueries {
\r
13 * Same as calling {@link #escape(String, boolean, boolean)} with
\r
14 * escapeKeywords set to <code>true</code>.
\r
17 * @param escapeWildcards
\r
18 * @return escaped string
\r
20 public static String escape(String s, boolean escapeWildcards) {
\r
21 return escape(s, escapeWildcards, true);
\r
25 * Returns a String where those characters that QueryParser expects to be
\r
26 * escaped are escaped by a preceding <code>\</code>.
\r
29 * {@link org.apache.lucene.queryParser.QueryParser#escape(String)} but
\r
30 * disabled escaping of wildcard characters '*' and '?'. Clients must escape
\r
31 * wildcards themselves to allow use of wildcards in queries.
\r
34 * lucene query to escape
\r
35 * @param escapeWildcards
\r
36 * <code>true</code> to escape also wildcard characters
\r
37 * @param escapeKeywords
\r
38 * <code>true</code> to escape keywords like AND, OR, etc.
\r
39 * @return escaped string
\r
41 public static String escape(String s, boolean escapeWildcards, boolean escapeKeywords) {
\r
42 if (!needsEscaping(s, escapeWildcards, escapeKeywords))
\r
45 StringBuilder sb = new StringBuilder(s.length() + 8);
\r
46 int len = s.length();
\r
47 // The beginning of the line is the same as the last character being
\r
49 boolean lastWhitespace = true;
\r
50 for (int i = 0; i < len;) {
\r
51 char c = s.charAt(i);
\r
52 // These characters are part of the query syntax and must be escaped
\r
53 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
\r
54 || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
\r
55 || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {
\r
58 lastWhitespace = false;
\r
59 } else if (Character.isWhitespace(c)) {
\r
61 lastWhitespace = true;
\r
63 if (escapeKeywords && lastWhitespace) {
\r
64 int reslen = processReservedWords(s, i, sb);
\r
67 lastWhitespace = false;
\r
72 lastWhitespace = false;
\r
76 return sb.toString();
\r
80 * Same logic as in {@link #escape(String, boolean, boolean)} but this one
\r
81 * simply checks whether the input string needs escaping at all or not.
\r
84 * @param escapeWildcards
\r
85 * @param escapeKeywords
\r
88 private static boolean needsEscaping(String s, boolean escapeWildcards, boolean escapeKeywords) {
\r
89 int len = s.length();
\r
90 // The beginning of the line is the same as the last character being
\r
92 boolean lastWhitespace = true;
\r
93 for (int i = 0; i < len;) {
\r
94 char c = s.charAt(i);
\r
95 // These characters are part of the query syntax and must be escaped
\r
96 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
\r
97 || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
\r
98 || c == '|' || c == '&' || c == '/' || (escapeWildcards && (c == '*' || c == '?'))) {
\r
100 } else if (Character.isWhitespace(c)) {
\r
101 lastWhitespace = true;
\r
103 if (escapeKeywords && lastWhitespace) {
\r
104 int reslen = processReservedWords(s, i, null);
\r
108 lastWhitespace = false;
\r
115 private static final String[] RESERVED_WORDS = {
\r
121 * Lucene reserved words are case-sensitive for its query parser. Therefore
\r
122 * only case-sensitive hits need to be looked for.
\r
126 * @return length of the reserved word in the input or 0 if no reserved word
\r
129 private static int processReservedWords(String s, int fromIndex, StringBuilder sb) {
\r
130 final int total = RESERVED_WORDS.length;
\r
131 for (int w = 0; w < total; w += 2) {
\r
132 String word = RESERVED_WORDS[w];
\r
133 int len = word.length();
\r
134 if (s.regionMatches(false, fromIndex, word, 0, len)) {
\r
136 String replacement = RESERVED_WORDS[w+1];
\r
137 sb.append(replacement);
\r
146 * Returns a String where those characters that QueryParser expects to be
\r
147 * escaped are escaped by a preceding <code>\</code>.
\r
149 public static String escape(String s) {
\r
150 return escape(s, false);
\r
153 public static StringBuilder escapeTerm(String field, String term, boolean escapeWildcards, StringBuilder result) {
\r
155 result.append(field).append(':');
\r
156 result.append( escape(term, escapeWildcards) );
\r
160 public static String escapeTerm(String field, String term, boolean escapeWildcards) {
\r
161 StringBuilder sb = new StringBuilder();
\r
162 return escapeTerm(field, term, escapeWildcards, sb).toString();
\r
165 // public static void main(String[] args) {
\r
166 // System.out.println("esc: " + escape("AND01", true, true));
\r
167 // System.out.println("esc: " + escape("AND 01", true, true));
\r
168 // System.out.println("esc: " + escape(" AND 01", true, true));
\r