package org.simantics.scl.compiler.completions.parsing; import java.util.ArrayList; import java.util.List; public class RobustModuleSplitter { // state ids private static final int NORMAL_START_OF_LINE = 0; private static final int NORMAL = 1; private static final int NORMAL_1QUOTE = 2; private static final int NORMAL_2QUOTE = 3; private static final int SHORT_STRING = 4; private static final int SHORT_STRING_BACKSLASH = 5; private static final int LONG_STRING = 6; private static final int LONG_STRING_1QUOTE = 7; private static final int LONG_STRING_2QUOTE = 8; private static final int CHAR_LITERAL = 9; private static final int CHAR_LITERAL_BACKSLASH = 10; private static final int NORMAL_1SLASH = 11; private static final int C_COMMENT = 12; private static final int C_COMMENT_STAR = 13; private static final int CPP_COMMENT = 14; private final String sourceText; private ArrayList segments = new ArrayList(); private RobustModuleSplitter(String sourceText) { this.sourceText = sourceText; } private static boolean isLineEnd(char c) { return c == '\n' || c == 0; } private void split() { int state = NORMAL; int begin = 0, pos = 0, curEntityBegin = 0, parenthesesBalance = 0; boolean hasErrors = false; int length = sourceText.length(); loop: while(true) { char c = pos == length ? 0 : sourceText.charAt(pos++); if(c == '\r') c = '\n'; switch(state) { case NORMAL_START_OF_LINE: if(c == '\n') // Don't care about empty lines break; if(c != ' ') { int end = c == 0 ? pos : pos-1; segments.add(new ModuleSegment(begin, end, parenthesesBalance, hasErrors)); parenthesesBalance = 0; hasErrors = false; begin = end; } state = NORMAL; case NORMAL: if(c == '"') state = NORMAL_1QUOTE; else if(c == '/') state = NORMAL_1SLASH; else if(c == '\'') state = CHAR_LITERAL; else if(c == '(' || c == '[' || c == '{') ++parenthesesBalance; else if(c == ')' || c == ']' || c == '}') --parenthesesBalance; else if(c == '\n') state = NORMAL_START_OF_LINE; else if(c == 0) break loop; break; case NORMAL_1QUOTE: if(c == '"') state = NORMAL_2QUOTE; else if(c == '\\') state = SHORT_STRING_BACKSLASH; else if(c == 0) break loop; else state = SHORT_STRING; break; case NORMAL_2QUOTE: if(c == '"') state = LONG_STRING; else { state = NORMAL; if(c != 0) --pos; } break; case SHORT_STRING: if(c == '\\') state = SHORT_STRING_BACKSLASH; else if(c == '"' || isLineEnd(c) /* unclosed string */) { if(c == '\n') state = NORMAL_START_OF_LINE; else state = NORMAL; hasErrors = c != '"'; } break; case SHORT_STRING_BACKSLASH: if(isLineEnd(c) /* unclosed string */) state = NORMAL_START_OF_LINE; else state = SHORT_STRING; break; case LONG_STRING: if(c == '"') state = LONG_STRING_1QUOTE; else if(c == 0) { // Unclosed long string curEntityBegin = pos; state = NORMAL; hasErrors = true; } break; case LONG_STRING_1QUOTE: if(c == '"') state = LONG_STRING_2QUOTE; else state = LONG_STRING; break; case LONG_STRING_2QUOTE: if(c == '"') state = NORMAL; else state = LONG_STRING; break; case CHAR_LITERAL: if(c == '\'' || isLineEnd(c) /* unclosed char literal */) { if(c == '\n') state = NORMAL_START_OF_LINE; else state = NORMAL; hasErrors = c != '\''; } else if(c == '\\') state = CHAR_LITERAL_BACKSLASH; break; case CHAR_LITERAL_BACKSLASH: if(isLineEnd(c) /* unclosed char literal */) { state = NORMAL_START_OF_LINE; hasErrors = true; } else state = CHAR_LITERAL; break; case NORMAL_1SLASH: if(c == '/') state = CPP_COMMENT; else if(c == '*') { state = C_COMMENT; curEntityBegin = pos; } else { state = NORMAL; if(c != 0) --pos; } break; case C_COMMENT: if(c == '*') state = C_COMMENT_STAR; else if(c == 0) { // Unclosed C comment pos = curEntityBegin; state = NORMAL; hasErrors = true; } break; case C_COMMENT_STAR: if(c == '/') { state = NORMAL; } else state = C_COMMENT; break; case CPP_COMMENT: if(isLineEnd(c)) state = NORMAL_START_OF_LINE; break; } } if(begin != length) segments.add(new ModuleSegment(begin, length, parenthesesBalance, hasErrors)); } private void combineByParenthesesBalance() { ArrayList segmentStack = null; for(ModuleSegment segment : segments) if(segment.parenthesesBalance > 0) { if(segmentStack == null) segmentStack = new ArrayList(); for(int i=0;i 0 && !segmentStack.isEmpty()) { segmentStack.remove(segmentStack.size()-1); --r; } if(r > 0) { segment.parenthesesBalance += r; segment.hasErrors = true; } } } if(segmentStack == null) return; for(ModuleSegment segment : segmentStack) { --segment.parenthesesBalance; segment.hasErrors = true; } ArrayList oldSegments = segments; segments = new ArrayList(oldSegments.size()); int currentBalance = 0; int begin = 0; boolean hasErrors = false; for(ModuleSegment segment : oldSegments) { if(currentBalance == 0) { if(segment.parenthesesBalance == 0) segments.add(segment); else { begin = segment.begin; currentBalance = segment.parenthesesBalance; hasErrors = segment.hasErrors; } } else { currentBalance += segment.parenthesesBalance; hasErrors |= segment.hasErrors; if(currentBalance == 0) segments.add(new ModuleSegment(begin, segment.end, 0, hasErrors)); } } } public static List split(String sourceText) { RobustModuleSplitter splitter = new RobustModuleSplitter(sourceText); splitter.split(); splitter.combineByParenthesesBalance(); return splitter.segments; } }