]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/completions/parsing/RobustModuleSplitter.java
New SCL completion implementation
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / completions / parsing / RobustModuleSplitter.java
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/completions/parsing/RobustModuleSplitter.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/completions/parsing/RobustModuleSplitter.java
new file mode 100644 (file)
index 0000000..283af61
--- /dev/null
@@ -0,0 +1,254 @@
+package org.simantics.scl.compiler.completions.parsing;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class RobustModuleSplitter {
+    // state ids
+    private static final int NORMAL_START_OF_LINE   = 0;
+    private static final int NORMAL                 = 1;
+    private static final int NORMAL_1QUOTE          = 2;
+    private static final int NORMAL_2QUOTE          = 3;
+    private static final int SHORT_STRING           = 4;
+    private static final int SHORT_STRING_BACKSLASH = 5;
+    private static final int LONG_STRING            = 6;
+    private static final int LONG_STRING_1QUOTE     = 7;
+    private static final int LONG_STRING_2QUOTE     = 8;
+    private static final int CHAR_LITERAL           = 9;
+    private static final int CHAR_LITERAL_BACKSLASH = 10;
+    private static final int NORMAL_1SLASH          = 11;
+    private static final int C_COMMENT              = 12;
+    private static final int C_COMMENT_STAR         = 13;
+    private static final int CPP_COMMENT            = 14;
+    
+    private final String sourceText;
+    private ArrayList<ModuleSegment> segments = new ArrayList<ModuleSegment>();
+    
+    private RobustModuleSplitter(String sourceText) {
+        this.sourceText = sourceText;
+    }
+
+    private static boolean isLineEnd(char c) {
+        return c == '\n' || c == 0;
+    }
+    
+    private void split() {
+        int state = NORMAL;
+        int begin = 0, pos = 0, curEntityBegin = 0, parenthesesBalance = 0;
+        boolean hasErrors = false;
+        int length = sourceText.length();
+        loop: while(true) {
+            char c = pos == length ? 0 : sourceText.charAt(pos++);
+            if(c == '\r')
+                c = '\n';
+            switch(state) {
+            case NORMAL_START_OF_LINE:
+                if(c == '\n') // Don't care about empty lines
+                    break;
+                if(c != ' ') {
+                    int end = c == 0 ? pos : pos-1;
+                    segments.add(new ModuleSegment(begin, end, parenthesesBalance, hasErrors));
+                    parenthesesBalance = 0;
+                    hasErrors = false;
+                    begin = end;
+                }
+                state = NORMAL;
+            case NORMAL:
+                if(c == '"')
+                    state = NORMAL_1QUOTE;
+                else if(c == '/')
+                    state = NORMAL_1SLASH;
+                else if(c == '\'')
+                    state = CHAR_LITERAL;
+                else if(c == '(' || c == '[' || c == '{') 
+                    ++parenthesesBalance;
+                else if(c == ')' || c == ']' || c == '}') 
+                    --parenthesesBalance;
+                else if(c == '\n')
+                    state = NORMAL_START_OF_LINE;
+                else if(c == 0)
+                    break loop;
+                break;
+            case NORMAL_1QUOTE:
+                if(c == '"')
+                    state = NORMAL_2QUOTE;
+                else if(c == '\\')
+                    state = SHORT_STRING_BACKSLASH;
+                else if(c == 0)
+                    break loop;
+                else
+                    state = SHORT_STRING;
+                break;
+            case NORMAL_2QUOTE:
+                if(c == '"')
+                    state = LONG_STRING;
+                else {
+                    state = NORMAL;
+                    if(c != 0)
+                        --pos;
+                }
+                break;
+            case SHORT_STRING:
+                if(c == '\\')
+                    state = SHORT_STRING_BACKSLASH;
+                else if(c == '"' || isLineEnd(c) /* unclosed string */) {
+                    if(c == '\n')
+                        state = NORMAL_START_OF_LINE;
+                    else
+                        state = NORMAL;
+                    hasErrors = c != '"';
+                }
+                break;
+            case SHORT_STRING_BACKSLASH:
+                if(isLineEnd(c) /* unclosed string */)
+                    state = NORMAL_START_OF_LINE;
+                else
+                    state = SHORT_STRING;
+                break;
+            case LONG_STRING:
+                if(c == '"')
+                    state = LONG_STRING_1QUOTE;
+                else if(c == 0) {
+                    // Unclosed long string
+                    curEntityBegin = pos;
+                    state = NORMAL;
+                    hasErrors = true;
+                }
+                break;
+            case LONG_STRING_1QUOTE:
+                if(c == '"')
+                    state = LONG_STRING_2QUOTE;
+                else
+                    state = LONG_STRING;
+                break;
+            case LONG_STRING_2QUOTE:
+                if(c == '"')
+                    state = NORMAL;
+                else
+                    state = LONG_STRING;
+                break;
+            case CHAR_LITERAL:
+                if(c == '\'' || isLineEnd(c) /* unclosed char literal */) {
+                    if(c == '\n')
+                        state = NORMAL_START_OF_LINE;
+                    else
+                        state = NORMAL;
+                    hasErrors = c != '\'';
+                }
+                else if(c == '\\')
+                    state = CHAR_LITERAL_BACKSLASH;
+                break;
+            case CHAR_LITERAL_BACKSLASH:
+                if(isLineEnd(c) /* unclosed char literal */) {
+                    state = NORMAL_START_OF_LINE;
+                    hasErrors = true;
+                }
+                else
+                    state = CHAR_LITERAL;
+                break;
+            case NORMAL_1SLASH:
+                if(c == '/')
+                    state = CPP_COMMENT;
+                else if(c == '*') {
+                    state = C_COMMENT;
+                    curEntityBegin = pos;
+                }
+                else {
+                    state = NORMAL;
+                    if(c != 0)
+                        --pos;
+                }
+                break;
+            case C_COMMENT:
+                if(c == '*')
+                    state = C_COMMENT_STAR;
+                else if(c == 0) {
+                    // Unclosed C comment
+                    pos = curEntityBegin;
+                    state = NORMAL;
+                    hasErrors = true;
+                }
+                break;
+            case C_COMMENT_STAR:
+                if(c == '/') {
+                    state = NORMAL;
+                }
+                else
+                    state = C_COMMENT;
+                break;
+            case CPP_COMMENT:
+                if(isLineEnd(c))
+                    state = NORMAL_START_OF_LINE;
+                break;
+            }
+        }
+        if(begin != length)
+            segments.add(new ModuleSegment(begin, length, parenthesesBalance, hasErrors));
+    }
+    
+    private void combineByParenthesesBalance() {
+        ArrayList<ModuleSegment> segmentStack = null; 
+        for(ModuleSegment segment : segments)
+            if(segment.parenthesesBalance > 0) {
+                if(segmentStack == null)
+                    segmentStack = new ArrayList<ModuleSegment>();
+                for(int i=0;i<segment.parenthesesBalance;++i)
+                    segmentStack.add(segment);
+            }
+            else if(segment.parenthesesBalance < 0) {
+                if(segmentStack == null) {
+                    segment.parenthesesBalance = 0;
+                    segment.hasErrors = true;
+                }
+                else {
+                    int r = -segment.parenthesesBalance;
+                    while(r > 0 && !segmentStack.isEmpty()) {
+                        segmentStack.remove(segmentStack.size()-1);
+                        --r;
+                    }
+                    if(r > 0) {
+                        segment.parenthesesBalance += r;
+                        segment.hasErrors = true;
+                    }
+                }
+            }
+        if(segmentStack == null)
+            return;
+        for(ModuleSegment segment : segmentStack) {
+            --segment.parenthesesBalance;
+            segment.hasErrors = true;
+        }
+        
+        ArrayList<ModuleSegment> oldSegments = segments;
+        segments = new ArrayList<ModuleSegment>(oldSegments.size());
+        
+        int currentBalance = 0;
+        int begin = 0;
+        boolean hasErrors = false;
+        for(ModuleSegment segment : oldSegments) {
+            if(currentBalance == 0) {
+                if(segment.parenthesesBalance == 0)
+                    segments.add(segment);
+                else {
+                    begin = segment.begin;
+                    currentBalance = segment.parenthesesBalance;
+                    hasErrors = segment.hasErrors;
+                }
+            }
+            else {
+                currentBalance += segment.parenthesesBalance;
+                hasErrors |= segment.hasErrors;
+                if(currentBalance == 0)
+                    segments.add(new ModuleSegment(begin, segment.end, 0, hasErrors));
+            }
+        }
+    }
+    
+    public static List<ModuleSegment> split(String sourceText) {
+        RobustModuleSplitter splitter = new RobustModuleSplitter(sourceText);
+        splitter.split();
+        splitter.combineByParenthesesBalance();
+        return splitter.segments;
+    }
+}
+