X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Forg.simantics.scl.ui%2Fsrc%2Forg%2Fsimantics%2Fscl%2Fui%2Feditor2%2Fiterator%2FJavaBreakIterator.java;fp=bundles%2Forg.simantics.scl.ui%2Fsrc%2Forg%2Fsimantics%2Fscl%2Fui%2Feditor2%2Fiterator%2FJavaBreakIterator.java;h=f02087896570398e2a20929feafef4ce0819b467;hp=0000000000000000000000000000000000000000;hb=3286205a29ade556a0ac3e19c68ac6ebb3c8a62d;hpb=e81aaf01a022b7f9fd54924f843a8f8594682126 diff --git a/bundles/org.simantics.scl.ui/src/org/simantics/scl/ui/editor2/iterator/JavaBreakIterator.java b/bundles/org.simantics.scl.ui/src/org/simantics/scl/ui/editor2/iterator/JavaBreakIterator.java new file mode 100644 index 000000000..f02087896 --- /dev/null +++ b/bundles/org.simantics.scl.ui/src/org/simantics/scl/ui/editor2/iterator/JavaBreakIterator.java @@ -0,0 +1,431 @@ +package org.simantics.scl.ui.editor2.iterator; + +import java.text.CharacterIterator; + +import com.ibm.icu.text.BreakIterator; + +import org.eclipse.core.runtime.Assert; + + + +/** + * A java break iterator. It returns all breaks, including before and after + * whitespace, and it returns all camel case breaks. + *

+ * A line break may be any of "\n", "\r", "\r\n", "\n\r". + *

+ * + * @since 3.0 + */ +public class JavaBreakIterator extends BreakIterator { + + /** + * A run of common characters. + */ + protected static abstract class Run { + /** The length of this run. */ + protected int length; + + public Run() { + init(); + } + + /** + * Returns true if this run consumes ch, + * false otherwise. If true is returned, + * the length of the receiver is adjusted accordingly. + * + * @param ch the character to test + * @return true if ch was consumed + */ + protected boolean consume(char ch) { + if (isValid(ch)) { + length++; + return true; + } + return false; + } + + /** + * Whether this run accepts that character; does not update state. Called + * from the default implementation of consume. + * + * @param ch the character to test + * @return true if ch is accepted + */ + protected abstract boolean isValid(char ch); + + /** + * Resets this run to the initial state. + */ + protected void init() { + length= 0; + } + } + + static final class Whitespace extends Run { + @Override + protected boolean isValid(char ch) { + return Character.isWhitespace(ch) && ch != '\n' && ch != '\r'; + } + } + + static final class LineDelimiter extends Run { + /** State: INIT -> delimiter -> EXIT. */ + private char fState; + private static final char INIT= '\0'; + private static final char EXIT= '\1'; + + /* + * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init() + */ + @Override + protected void init() { + super.init(); + fState= INIT; + } + + /* + * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consume(char) + */ + @Override + protected boolean consume(char ch) { + if (!isValid(ch) || fState == EXIT) + return false; + + if (fState == INIT) { + fState= ch; + length++; + return true; + } else if (fState != ch) { + fState= EXIT; + length++; + return true; + } else { + return false; + } + } + + @Override + protected boolean isValid(char ch) { + return ch == '\n' || ch == '\r'; + } + } + + static final class Identifier extends Run { + /* + * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) + */ + @Override + protected boolean isValid(char ch) { + return Character.isJavaIdentifierPart(ch); + } + } + + static final class CamelCaseIdentifier extends Run { + /* states */ + private static final int S_INIT= 0; + private static final int S_LOWER= 1; + private static final int S_ONE_CAP= 2; + private static final int S_ALL_CAPS= 3; + private static final int S_EXIT= 4; + private static final int S_EXIT_MINUS_ONE= 5; + + /* character types */ + private static final int K_INVALID= 0; + private static final int K_LOWER= 1; + private static final int K_UPPER= 2; + private static final int K_OTHER= 3; + + private int fState; + + private final static int[][] MATRIX= new int[][] { + // K_INVALID, K_LOWER, K_UPPER, K_OTHER + { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT + { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER + { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP + { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS + }; + + /* + * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init() + */ + @Override + protected void init() { + super.init(); + fState= S_INIT; + } + + /* + * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consumes(char) + */ + @Override + protected boolean consume(char ch) { + int kind= getKind(ch); + fState= MATRIX[fState][kind]; + switch (fState) { + case S_LOWER: + case S_ONE_CAP: + case S_ALL_CAPS: + length++; + return true; + case S_EXIT: + return false; + case S_EXIT_MINUS_ONE: + length--; + return false; + default: + Assert.isTrue(false); + return false; + } + } + + /** + * Determines the kind of a character. + * + * @param ch the character to test + */ + private int getKind(char ch) { + if (Character.isUpperCase(ch)) + return K_UPPER; + if (Character.isLowerCase(ch)) + return K_LOWER; + if (Character.isJavaIdentifierPart(ch)) // _, digits... + return K_OTHER; + return K_INVALID; + } + + /* + * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) + */ + @Override + protected boolean isValid(char ch) { + return Character.isJavaIdentifierPart(ch); + } + } + + static final class Other extends Run { + /* + * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char) + */ + @Override + protected boolean isValid(char ch) { + return !Character.isWhitespace(ch) && !Character.isJavaIdentifierPart(ch); + } + } + + private static final Run WHITESPACE= new Whitespace(); + private static final Run DELIMITER= new LineDelimiter(); + private static final Run CAMELCASE= new CamelCaseIdentifier(); // new Identifier(); + private static final Run OTHER= new Other(); + + /** The platform break iterator (word instance) used as a base. */ + protected final BreakIterator fIterator; + /** The text we operate on. */ + protected CharSequence fText; + /** our current position for the stateful methods. */ + private int fIndex; + + + /** + * Creates a new break iterator. + */ + public JavaBreakIterator() { + fIterator= BreakIterator.getWordInstance(); + fIndex= fIterator.current(); + } + + /* + * @see java.text.BreakIterator#current() + */ + @Override + public int current() { + return fIndex; + } + + /* + * @see java.text.BreakIterator#first() + */ + @Override + public int first() { + fIndex= fIterator.first(); + return fIndex; + } + + /* + * @see java.text.BreakIterator#following(int) + */ + @Override + public int following(int offset) { + // work around too eager IAEs in standard implementation + if (offset == getText().getEndIndex()) + return DONE; + + int next= fIterator.following(offset); + if (next == DONE) + return DONE; + + // TODO deal with complex script word boundaries + // Math.min(offset + run.length, next) does not work + // since BreakIterator.getWordInstance considers _ as boundaries + // seems to work fine, however + Run run= consumeRun(offset); + return offset + run.length; + + } + + /** + * Consumes a run of characters at the limits of which we introduce a break. + * @param offset the offset to start at + * @return the run that was consumed + */ + private Run consumeRun(int offset) { + // assert offset < length + + char ch= fText.charAt(offset); + int length= fText.length(); + Run run= getRun(ch); + while (run.consume(ch) && offset < length - 1) { + offset++; + ch= fText.charAt(offset); + } + + return run; + } + + /** + * Returns a run based on a character. + * + * @param ch the character to test + * @return the correct character given ch + */ + private Run getRun(char ch) { + Run run; + if (WHITESPACE.isValid(ch)) + run= WHITESPACE; + else if (DELIMITER.isValid(ch)) + run= DELIMITER; + else if (CAMELCASE.isValid(ch)) + run= CAMELCASE; + else if (OTHER.isValid(ch)) + run= OTHER; + else { + Assert.isTrue(false); + return null; + } + + run.init(); + return run; + } + + /* + * @see java.text.BreakIterator#getText() + */ + @Override + public CharacterIterator getText() { + return fIterator.getText(); + } + + /* + * @see java.text.BreakIterator#isBoundary(int) + */ + @Override + public boolean isBoundary(int offset) { + if (offset == getText().getBeginIndex()) + return true; + else + return following(offset - 1) == offset; + } + + /* + * @see java.text.BreakIterator#last() + */ + @Override + public int last() { + fIndex= fIterator.last(); + return fIndex; + } + + /* + * @see java.text.BreakIterator#next() + */ + @Override + public int next() { + fIndex= following(fIndex); + return fIndex; + } + + /* + * @see java.text.BreakIterator#next(int) + */ + @Override + public int next(int n) { + return fIterator.next(n); + } + + /* + * @see java.text.BreakIterator#preceding(int) + */ + @Override + public int preceding(int offset) { + if (offset == getText().getBeginIndex()) + return DONE; + + if (isBoundary(offset - 1)) + return offset - 1; + + int previous= offset - 1; + do { + previous= fIterator.preceding(previous); + } while (!isBoundary(previous)); + + int last= DONE; + while (previous < offset) { + last= previous; + previous= following(previous); + } + + return last; + } + + /* + * @see java.text.BreakIterator#previous() + */ + @Override + public int previous() { + fIndex= preceding(fIndex); + return fIndex; + } + + /* + * @see java.text.BreakIterator#setText(java.lang.String) + */ + @Override + public void setText(String newText) { + setText((CharSequence) newText); + } + + /** + * Creates a break iterator given a char sequence. + * @param newText the new text + */ + public void setText(CharSequence newText) { + fText= newText; + fIterator.setText(new SequenceCharacterIterator(newText)); + first(); + } + + /* + * @see java.text.BreakIterator#setText(java.text.CharacterIterator) + */ + @Override + public void setText(CharacterIterator newText) { + if (newText instanceof CharSequence) { + fText= (CharSequence) newText; + fIterator.setText(newText); + first(); + } else { + throw new UnsupportedOperationException("CharacterIterator not supported"); //$NON-NLS-1$ + } + } +}