Implementing Java camelCase breaking in SCL module editor
[simantics/platform.git] / bundles / org.simantics.scl.ui / src / org / simantics / scl / ui / editor2 / iterator / JavaBreakIterator.java
1 package org.simantics.scl.ui.editor2.iterator;
2
3 import java.text.CharacterIterator;
4
5 import com.ibm.icu.text.BreakIterator;
6
7 import org.eclipse.core.runtime.Assert;
8
9
10
11 /**
12  * A java break iterator. It returns all breaks, including before and after
13  * whitespace, and it returns all camel case breaks.
14  * <p>
15  * A line break may be any of "\n", "\r", "\r\n", "\n\r".
16  * </p>
17  *
18  * @since 3.0
19  */
20 public class JavaBreakIterator extends BreakIterator {
21
22         /**
23          * A run of common characters.
24          */
25         protected static abstract class Run {
26                 /** The length of this run. */
27                 protected int length;
28
29                 public Run() {
30                         init();
31                 }
32
33                 /**
34                  * Returns <code>true</code> if this run consumes <code>ch</code>,
35                  * <code>false</code> otherwise. If <code>true</code> is returned,
36                  * the length of the receiver is adjusted accordingly.
37                  *
38                  * @param ch the character to test
39                  * @return <code>true</code> if <code>ch</code> was consumed
40                  */
41                 protected boolean consume(char ch) {
42                         if (isValid(ch)) {
43                                 length++;
44                                 return true;
45                         }
46                         return false;
47                 }
48
49                 /**
50                  * Whether this run accepts that character; does not update state. Called
51                  * from the default implementation of <code>consume</code>.
52                  *
53                  * @param ch the character to test
54                  * @return <code>true</code> if <code>ch</code> is accepted
55                  */
56                 protected abstract boolean isValid(char ch);
57
58                 /**
59                  * Resets this run to the initial state.
60                  */
61                 protected void init() {
62                         length= 0;
63                 }
64         }
65
66         static final class Whitespace extends Run {
67                 @Override
68                 protected boolean isValid(char ch) {
69                         return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
70                 }
71         }
72
73         static final class LineDelimiter extends Run {
74                 /** State: INIT -> delimiter -> EXIT. */
75                 private char fState;
76                 private static final char INIT= '\0';
77                 private static final char EXIT= '\1';
78
79                 /*
80                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
81                  */
82                 @Override
83                 protected void init() {
84                         super.init();
85                         fState= INIT;
86                 }
87
88                 /*
89                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
90                  */
91                 @Override
92                 protected boolean consume(char ch) {
93                         if (!isValid(ch) || fState == EXIT)
94                                 return false;
95
96                         if (fState == INIT) {
97                                 fState= ch;
98                                 length++;
99                                 return true;
100                         } else if (fState != ch) {
101                                 fState= EXIT;
102                                 length++;
103                                 return true;
104                         } else {
105                                 return false;
106                         }
107                 }
108
109                 @Override
110                 protected boolean isValid(char ch) {
111                         return ch == '\n' || ch == '\r';
112                 }
113         }
114
115         static final class Identifier extends Run {
116                 /*
117                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
118                  */
119                 @Override
120                 protected boolean isValid(char ch) {
121                         return Character.isJavaIdentifierPart(ch);
122                 }
123         }
124
125         static final class CamelCaseIdentifier extends Run {
126                 /* states */
127                 private static final int S_INIT= 0;
128                 private static final int S_LOWER= 1;
129                 private static final int S_ONE_CAP= 2;
130                 private static final int S_ALL_CAPS= 3;
131                 private static final int S_EXIT= 4;
132                 private static final int S_EXIT_MINUS_ONE= 5;
133
134                 /* character types */
135                 private static final int K_INVALID= 0;
136                 private static final int K_LOWER= 1;
137                 private static final int K_UPPER= 2;
138                 private static final int K_OTHER= 3;
139
140                 private int fState;
141
142                 private final static int[][] MATRIX= new int[][] {
143                                 // K_INVALID, K_LOWER,           K_UPPER,    K_OTHER
144                                 {  S_EXIT,    S_LOWER,           S_ONE_CAP,  S_LOWER }, // S_INIT
145                                 {  S_EXIT,    S_LOWER,           S_EXIT,     S_LOWER }, // S_LOWER
146                                 {  S_EXIT,    S_LOWER,           S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
147                                 {  S_EXIT,    S_EXIT_MINUS_ONE,  S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
148                 };
149
150                 /*
151                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
152                  */
153                 @Override
154                 protected void init() {
155                         super.init();
156                         fState= S_INIT;
157                 }
158
159                 /*
160                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
161                  */
162                 @Override
163                 protected boolean consume(char ch) {
164                         int kind= getKind(ch);
165                         fState= MATRIX[fState][kind];
166                         switch (fState) {
167                                 case S_LOWER:
168                                 case S_ONE_CAP:
169                                 case S_ALL_CAPS:
170                                         length++;
171                                         return true;
172                                 case S_EXIT:
173                                         return false;
174                                 case S_EXIT_MINUS_ONE:
175                                         length--;
176                                         return false;
177                                 default:
178                                         Assert.isTrue(false);
179                                         return false;
180                         }
181                 }
182
183                 /**
184                  * Determines the kind of a character.
185                  *
186                  * @param ch the character to test
187                  */
188                 private int getKind(char ch) {
189                         if (Character.isUpperCase(ch))
190                                 return K_UPPER;
191                         if (Character.isLowerCase(ch))
192                                 return K_LOWER;
193                         if (Character.isJavaIdentifierPart(ch)) // _, digits...
194                                 return K_OTHER;
195                         return K_INVALID;
196                 }
197
198                 /*
199                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
200                  */
201                 @Override
202                 protected boolean isValid(char ch) {
203                         return Character.isJavaIdentifierPart(ch);
204                 }
205         }
206
207         static final class Other extends Run {
208                 /*
209                  * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
210                  */
211                 @Override
212                 protected boolean isValid(char ch) {
213                         return !Character.isWhitespace(ch) && !Character.isJavaIdentifierPart(ch);
214                 }
215         }
216
217         private static final Run WHITESPACE= new Whitespace();
218         private static final Run DELIMITER= new LineDelimiter();
219         private static final Run CAMELCASE= new CamelCaseIdentifier(); // new Identifier();
220         private static final Run OTHER= new Other();
221
222         /** The platform break iterator (word instance) used as a base. */
223         protected final BreakIterator fIterator;
224         /** The text we operate on. */
225         protected CharSequence fText;
226         /** our current position for the stateful methods. */
227         private int fIndex;
228
229
230         /**
231          * Creates a new break iterator.
232          */
233         public JavaBreakIterator() {
234                 fIterator= BreakIterator.getWordInstance();
235                 fIndex= fIterator.current();
236         }
237
238         /*
239          * @see java.text.BreakIterator#current()
240          */
241         @Override
242         public int current() {
243                 return fIndex;
244         }
245
246         /*
247          * @see java.text.BreakIterator#first()
248          */
249         @Override
250         public int first() {
251                 fIndex= fIterator.first();
252                 return fIndex;
253         }
254
255         /*
256          * @see java.text.BreakIterator#following(int)
257          */
258         @Override
259         public int following(int offset) {
260                 // work around too eager IAEs in standard implementation
261                 if (offset == getText().getEndIndex())
262                         return DONE;
263
264                 int next= fIterator.following(offset);
265                 if (next == DONE)
266                         return DONE;
267
268                 // TODO deal with complex script word boundaries
269                 // Math.min(offset + run.length, next) does not work
270                 // since BreakIterator.getWordInstance considers _ as boundaries
271                 // seems to work fine, however
272                 Run run= consumeRun(offset);
273                 return offset + run.length;
274
275         }
276
277         /**
278          * Consumes a run of characters at the limits of which we introduce a break.
279          * @param offset the offset to start at
280          * @return the run that was consumed
281          */
282         private Run consumeRun(int offset) {
283                 // assert offset < length
284
285                 char ch= fText.charAt(offset);
286                 int length= fText.length();
287                 Run run= getRun(ch);
288                 while (run.consume(ch) && offset < length - 1) {
289                         offset++;
290                         ch= fText.charAt(offset);
291                 }
292
293                 return run;
294         }
295
296         /**
297          * Returns a run based on a character.
298          *
299          * @param ch the character to test
300          * @return the correct character given <code>ch</code>
301          */
302         private Run getRun(char ch) {
303                 Run run;
304                 if (WHITESPACE.isValid(ch))
305                         run= WHITESPACE;
306                 else if (DELIMITER.isValid(ch))
307                         run= DELIMITER;
308                 else if (CAMELCASE.isValid(ch))
309                         run= CAMELCASE;
310                 else if (OTHER.isValid(ch))
311                         run= OTHER;
312                 else {
313                         Assert.isTrue(false);
314                         return null;
315                 }
316
317                 run.init();
318                 return run;
319         }
320
321         /*
322          * @see java.text.BreakIterator#getText()
323          */
324         @Override
325         public CharacterIterator getText() {
326                 return fIterator.getText();
327         }
328
329         /*
330          * @see java.text.BreakIterator#isBoundary(int)
331          */
332         @Override
333         public boolean isBoundary(int offset) {
334         if (offset == getText().getBeginIndex())
335             return true;
336         else
337             return following(offset - 1) == offset;
338         }
339
340         /*
341          * @see java.text.BreakIterator#last()
342          */
343         @Override
344         public int last() {
345                 fIndex= fIterator.last();
346                 return fIndex;
347         }
348
349         /*
350          * @see java.text.BreakIterator#next()
351          */
352         @Override
353         public int next() {
354                 fIndex= following(fIndex);
355                 return fIndex;
356         }
357
358         /*
359          * @see java.text.BreakIterator#next(int)
360          */
361         @Override
362         public int next(int n) {
363                 return fIterator.next(n);
364         }
365
366         /*
367          * @see java.text.BreakIterator#preceding(int)
368          */
369         @Override
370         public int preceding(int offset) {
371                 if (offset == getText().getBeginIndex())
372                         return DONE;
373
374                 if (isBoundary(offset - 1))
375                         return offset - 1;
376
377                 int previous= offset - 1;
378                 do {
379                         previous= fIterator.preceding(previous);
380                 } while (!isBoundary(previous));
381
382                 int last= DONE;
383                 while (previous < offset) {
384                         last= previous;
385                         previous= following(previous);
386                 }
387
388                 return last;
389         }
390
391         /*
392          * @see java.text.BreakIterator#previous()
393          */
394         @Override
395         public int previous() {
396                 fIndex= preceding(fIndex);
397                 return fIndex;
398         }
399
400         /*
401          * @see java.text.BreakIterator#setText(java.lang.String)
402          */
403         @Override
404         public void setText(String newText) {
405                 setText((CharSequence) newText);
406         }
407
408         /**
409          * Creates a break iterator given a char sequence.
410          * @param newText the new text
411          */
412         public void setText(CharSequence newText) {
413                 fText= newText;
414                 fIterator.setText(new SequenceCharacterIterator(newText));
415                 first();
416         }
417
418         /*
419          * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
420          */
421         @Override
422         public void setText(CharacterIterator newText) {
423                 if (newText instanceof CharSequence) {
424                         fText= (CharSequence) newText;
425                         fIterator.setText(newText);
426                         first();
427                 } else {
428                         throw new UnsupportedOperationException("CharacterIterator not supported"); //$NON-NLS-1$
429                 }
430         }
431 }