--- /dev/null
+package org.simantics.scl.compiler.completions.parsing;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class RobustModuleSplitter {
+ // state ids
+ private static final int NORMAL_START_OF_LINE = 0;
+ private static final int NORMAL = 1;
+ private static final int NORMAL_1QUOTE = 2;
+ private static final int NORMAL_2QUOTE = 3;
+ private static final int SHORT_STRING = 4;
+ private static final int SHORT_STRING_BACKSLASH = 5;
+ private static final int LONG_STRING = 6;
+ private static final int LONG_STRING_1QUOTE = 7;
+ private static final int LONG_STRING_2QUOTE = 8;
+ private static final int CHAR_LITERAL = 9;
+ private static final int CHAR_LITERAL_BACKSLASH = 10;
+ private static final int NORMAL_1SLASH = 11;
+ private static final int C_COMMENT = 12;
+ private static final int C_COMMENT_STAR = 13;
+ private static final int CPP_COMMENT = 14;
+
+ private final String sourceText;
+ private ArrayList<ModuleSegment> segments = new ArrayList<ModuleSegment>();
+
+ private RobustModuleSplitter(String sourceText) {
+ this.sourceText = sourceText;
+ }
+
+ private static boolean isLineEnd(char c) {
+ return c == '\n' || c == 0;
+ }
+
+ private void split() {
+ int state = NORMAL;
+ int begin = 0, pos = 0, curEntityBegin = 0, parenthesesBalance = 0;
+ boolean hasErrors = false;
+ int length = sourceText.length();
+ loop: while(true) {
+ char c = pos == length ? 0 : sourceText.charAt(pos++);
+ if(c == '\r')
+ c = '\n';
+ switch(state) {
+ case NORMAL_START_OF_LINE:
+ if(c == '\n') // Don't care about empty lines
+ break;
+ if(c != ' ') {
+ int end = c == 0 ? pos : pos-1;
+ segments.add(new ModuleSegment(begin, end, parenthesesBalance, hasErrors));
+ parenthesesBalance = 0;
+ hasErrors = false;
+ begin = end;
+ }
+ state = NORMAL;
+ case NORMAL:
+ if(c == '"')
+ state = NORMAL_1QUOTE;
+ else if(c == '/')
+ state = NORMAL_1SLASH;
+ else if(c == '\'')
+ state = CHAR_LITERAL;
+ else if(c == '(' || c == '[' || c == '{')
+ ++parenthesesBalance;
+ else if(c == ')' || c == ']' || c == '}')
+ --parenthesesBalance;
+ else if(c == '\n')
+ state = NORMAL_START_OF_LINE;
+ else if(c == 0)
+ break loop;
+ break;
+ case NORMAL_1QUOTE:
+ if(c == '"')
+ state = NORMAL_2QUOTE;
+ else if(c == '\\')
+ state = SHORT_STRING_BACKSLASH;
+ else if(c == 0)
+ break loop;
+ else
+ state = SHORT_STRING;
+ break;
+ case NORMAL_2QUOTE:
+ if(c == '"')
+ state = LONG_STRING;
+ else {
+ state = NORMAL;
+ if(c != 0)
+ --pos;
+ }
+ break;
+ case SHORT_STRING:
+ if(c == '\\')
+ state = SHORT_STRING_BACKSLASH;
+ else if(c == '"' || isLineEnd(c) /* unclosed string */) {
+ if(c == '\n')
+ state = NORMAL_START_OF_LINE;
+ else
+ state = NORMAL;
+ hasErrors = c != '"';
+ }
+ break;
+ case SHORT_STRING_BACKSLASH:
+ if(isLineEnd(c) /* unclosed string */)
+ state = NORMAL_START_OF_LINE;
+ else
+ state = SHORT_STRING;
+ break;
+ case LONG_STRING:
+ if(c == '"')
+ state = LONG_STRING_1QUOTE;
+ else if(c == 0) {
+ // Unclosed long string
+ curEntityBegin = pos;
+ state = NORMAL;
+ hasErrors = true;
+ }
+ break;
+ case LONG_STRING_1QUOTE:
+ if(c == '"')
+ state = LONG_STRING_2QUOTE;
+ else
+ state = LONG_STRING;
+ break;
+ case LONG_STRING_2QUOTE:
+ if(c == '"')
+ state = NORMAL;
+ else
+ state = LONG_STRING;
+ break;
+ case CHAR_LITERAL:
+ if(c == '\'' || isLineEnd(c) /* unclosed char literal */) {
+ if(c == '\n')
+ state = NORMAL_START_OF_LINE;
+ else
+ state = NORMAL;
+ hasErrors = c != '\'';
+ }
+ else if(c == '\\')
+ state = CHAR_LITERAL_BACKSLASH;
+ break;
+ case CHAR_LITERAL_BACKSLASH:
+ if(isLineEnd(c) /* unclosed char literal */) {
+ state = NORMAL_START_OF_LINE;
+ hasErrors = true;
+ }
+ else
+ state = CHAR_LITERAL;
+ break;
+ case NORMAL_1SLASH:
+ if(c == '/')
+ state = CPP_COMMENT;
+ else if(c == '*') {
+ state = C_COMMENT;
+ curEntityBegin = pos;
+ }
+ else {
+ state = NORMAL;
+ if(c != 0)
+ --pos;
+ }
+ break;
+ case C_COMMENT:
+ if(c == '*')
+ state = C_COMMENT_STAR;
+ else if(c == 0) {
+ // Unclosed C comment
+ pos = curEntityBegin;
+ state = NORMAL;
+ hasErrors = true;
+ }
+ break;
+ case C_COMMENT_STAR:
+ if(c == '/') {
+ state = NORMAL;
+ }
+ else
+ state = C_COMMENT;
+ break;
+ case CPP_COMMENT:
+ if(isLineEnd(c))
+ state = NORMAL_START_OF_LINE;
+ break;
+ }
+ }
+ if(begin != length)
+ segments.add(new ModuleSegment(begin, length, parenthesesBalance, hasErrors));
+ }
+
+ private void combineByParenthesesBalance() {
+ ArrayList<ModuleSegment> segmentStack = null;
+ for(ModuleSegment segment : segments)
+ if(segment.parenthesesBalance > 0) {
+ if(segmentStack == null)
+ segmentStack = new ArrayList<ModuleSegment>();
+ for(int i=0;i<segment.parenthesesBalance;++i)
+ segmentStack.add(segment);
+ }
+ else if(segment.parenthesesBalance < 0) {
+ if(segmentStack == null) {
+ segment.parenthesesBalance = 0;
+ segment.hasErrors = true;
+ }
+ else {
+ int r = -segment.parenthesesBalance;
+ while(r > 0 && !segmentStack.isEmpty()) {
+ segmentStack.remove(segmentStack.size()-1);
+ --r;
+ }
+ if(r > 0) {
+ segment.parenthesesBalance += r;
+ segment.hasErrors = true;
+ }
+ }
+ }
+ if(segmentStack == null)
+ return;
+ for(ModuleSegment segment : segmentStack) {
+ --segment.parenthesesBalance;
+ segment.hasErrors = true;
+ }
+
+ ArrayList<ModuleSegment> oldSegments = segments;
+ segments = new ArrayList<ModuleSegment>(oldSegments.size());
+
+ int currentBalance = 0;
+ int begin = 0;
+ boolean hasErrors = false;
+ for(ModuleSegment segment : oldSegments) {
+ if(currentBalance == 0) {
+ if(segment.parenthesesBalance == 0)
+ segments.add(segment);
+ else {
+ begin = segment.begin;
+ currentBalance = segment.parenthesesBalance;
+ hasErrors = segment.hasErrors;
+ }
+ }
+ else {
+ currentBalance += segment.parenthesesBalance;
+ hasErrors |= segment.hasErrors;
+ if(currentBalance == 0)
+ segments.add(new ModuleSegment(begin, segment.end, 0, hasErrors));
+ }
+ }
+ }
+
+ public static List<ModuleSegment> split(String sourceText) {
+ RobustModuleSplitter splitter = new RobustModuleSplitter(sourceText);
+ splitter.split();
+ splitter.combineByParenthesesBalance();
+ return splitter.segments;
+ }
+}
+