1 package org.simantics.scl.ui.editor2.iterator;
3 import java.text.CharacterIterator;
5 import com.ibm.icu.text.BreakIterator;
7 import org.eclipse.core.runtime.Assert;
12 * A java break iterator. It returns all breaks, including before and after
13 * whitespace, and it returns all camel case breaks.
15 * A line break may be any of "\n", "\r", "\r\n", "\n\r".
20 public class JavaBreakIterator extends BreakIterator {
23 * A run of common characters.
25 protected static abstract class Run {
26 /** The length of this run. */
34 * Returns <code>true</code> if this run consumes <code>ch</code>,
35 * <code>false</code> otherwise. If <code>true</code> is returned,
36 * the length of the receiver is adjusted accordingly.
38 * @param ch the character to test
39 * @return <code>true</code> if <code>ch</code> was consumed
41 protected boolean consume(char ch) {
50 * Whether this run accepts that character; does not update state. Called
51 * from the default implementation of <code>consume</code>.
53 * @param ch the character to test
54 * @return <code>true</code> if <code>ch</code> is accepted
56 protected abstract boolean isValid(char ch);
59 * Resets this run to the initial state.
61 protected void init() {
66 static final class Whitespace extends Run {
68 protected boolean isValid(char ch) {
69 return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
73 static final class LineDelimiter extends Run {
74 /** State: INIT -> delimiter -> EXIT. */
76 private static final char INIT= '\0';
77 private static final char EXIT= '\1';
80 * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
83 protected void init() {
89 * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
92 protected boolean consume(char ch) {
93 if (!isValid(ch) || fState == EXIT)
100 } else if (fState != ch) {
110 protected boolean isValid(char ch) {
111 return ch == '\n' || ch == '\r';
115 static final class Identifier extends Run {
117 * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
120 protected boolean isValid(char ch) {
121 return Character.isJavaIdentifierPart(ch);
125 static final class CamelCaseIdentifier extends Run {
127 private static final int S_INIT= 0;
128 private static final int S_LOWER= 1;
129 private static final int S_ONE_CAP= 2;
130 private static final int S_ALL_CAPS= 3;
131 private static final int S_EXIT= 4;
132 private static final int S_EXIT_MINUS_ONE= 5;
134 /* character types */
135 private static final int K_INVALID= 0;
136 private static final int K_LOWER= 1;
137 private static final int K_UPPER= 2;
138 private static final int K_OTHER= 3;
142 private final static int[][] MATRIX= new int[][] {
143 // K_INVALID, K_LOWER, K_UPPER, K_OTHER
144 { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT
145 { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER
146 { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
147 { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
151 * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
154 protected void init() {
160 * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
163 protected boolean consume(char ch) {
164 int kind= getKind(ch);
165 fState= MATRIX[fState][kind];
174 case S_EXIT_MINUS_ONE:
178 Assert.isTrue(false);
184 * Determines the kind of a character.
186 * @param ch the character to test
188 private int getKind(char ch) {
189 if (Character.isUpperCase(ch))
191 if (Character.isLowerCase(ch))
193 if (Character.isJavaIdentifierPart(ch)) // _, digits...
199 * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
202 protected boolean isValid(char ch) {
203 return Character.isJavaIdentifierPart(ch);
207 static final class Other extends Run {
209 * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
212 protected boolean isValid(char ch) {
213 return !Character.isWhitespace(ch) && !Character.isJavaIdentifierPart(ch);
217 private static final Run WHITESPACE= new Whitespace();
218 private static final Run DELIMITER= new LineDelimiter();
219 private static final Run CAMELCASE= new CamelCaseIdentifier(); // new Identifier();
220 private static final Run OTHER= new Other();
222 /** The platform break iterator (word instance) used as a base. */
223 protected final BreakIterator fIterator;
224 /** The text we operate on. */
225 protected CharSequence fText;
226 /** our current position for the stateful methods. */
231 * Creates a new break iterator.
233 public JavaBreakIterator() {
234 fIterator= BreakIterator.getWordInstance();
235 fIndex= fIterator.current();
239 * @see java.text.BreakIterator#current()
242 public int current() {
247 * @see java.text.BreakIterator#first()
251 fIndex= fIterator.first();
256 * @see java.text.BreakIterator#following(int)
259 public int following(int offset) {
260 // work around too eager IAEs in standard implementation
261 if (offset == getText().getEndIndex())
264 int next= fIterator.following(offset);
268 // TODO deal with complex script word boundaries
269 // Math.min(offset + run.length, next) does not work
270 // since BreakIterator.getWordInstance considers _ as boundaries
271 // seems to work fine, however
272 Run run= consumeRun(offset);
273 return offset + run.length;
278 * Consumes a run of characters at the limits of which we introduce a break.
279 * @param offset the offset to start at
280 * @return the run that was consumed
282 private Run consumeRun(int offset) {
283 // assert offset < length
285 char ch= fText.charAt(offset);
286 int length= fText.length();
288 while (run.consume(ch) && offset < length - 1) {
290 ch= fText.charAt(offset);
297 * Returns a run based on a character.
299 * @param ch the character to test
300 * @return the correct character given <code>ch</code>
302 private Run getRun(char ch) {
304 if (WHITESPACE.isValid(ch))
306 else if (DELIMITER.isValid(ch))
308 else if (CAMELCASE.isValid(ch))
310 else if (OTHER.isValid(ch))
313 Assert.isTrue(false);
322 * @see java.text.BreakIterator#getText()
325 public CharacterIterator getText() {
326 return fIterator.getText();
330 * @see java.text.BreakIterator#isBoundary(int)
333 public boolean isBoundary(int offset) {
334 if (offset == getText().getBeginIndex())
337 return following(offset - 1) == offset;
341 * @see java.text.BreakIterator#last()
345 fIndex= fIterator.last();
350 * @see java.text.BreakIterator#next()
354 fIndex= following(fIndex);
359 * @see java.text.BreakIterator#next(int)
362 public int next(int n) {
363 return fIterator.next(n);
367 * @see java.text.BreakIterator#preceding(int)
370 public int preceding(int offset) {
371 if (offset == getText().getBeginIndex())
374 if (isBoundary(offset - 1))
377 int previous= offset - 1;
379 previous= fIterator.preceding(previous);
380 } while (!isBoundary(previous));
383 while (previous < offset) {
385 previous= following(previous);
392 * @see java.text.BreakIterator#previous()
395 public int previous() {
396 fIndex= preceding(fIndex);
401 * @see java.text.BreakIterator#setText(java.lang.String)
404 public void setText(String newText) {
405 setText((CharSequence) newText);
409 * Creates a break iterator given a char sequence.
410 * @param newText the new text
412 public void setText(CharSequence newText) {
414 fIterator.setText(new SequenceCharacterIterator(newText));
419 * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
422 public void setText(CharacterIterator newText) {
423 if (newText instanceof CharSequence) {
424 fText= (CharSequence) newText;
425 fIterator.setText(newText);
428 throw new UnsupportedOperationException("CharacterIterator not supported"); //$NON-NLS-1$