]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/completions/parsing/RobustModuleSplitter.java
New SCL completion implementation
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / completions / parsing / RobustModuleSplitter.java
1 package org.simantics.scl.compiler.completions.parsing;
2
3 import java.util.ArrayList;
4 import java.util.List;
5
6 public class RobustModuleSplitter {
7     // state ids
8     private static final int NORMAL_START_OF_LINE   = 0;
9     private static final int NORMAL                 = 1;
10     private static final int NORMAL_1QUOTE          = 2;
11     private static final int NORMAL_2QUOTE          = 3;
12     private static final int SHORT_STRING           = 4;
13     private static final int SHORT_STRING_BACKSLASH = 5;
14     private static final int LONG_STRING            = 6;
15     private static final int LONG_STRING_1QUOTE     = 7;
16     private static final int LONG_STRING_2QUOTE     = 8;
17     private static final int CHAR_LITERAL           = 9;
18     private static final int CHAR_LITERAL_BACKSLASH = 10;
19     private static final int NORMAL_1SLASH          = 11;
20     private static final int C_COMMENT              = 12;
21     private static final int C_COMMENT_STAR         = 13;
22     private static final int CPP_COMMENT            = 14;
23     
24     private final String sourceText;
25     private ArrayList<ModuleSegment> segments = new ArrayList<ModuleSegment>();
26     
27     private RobustModuleSplitter(String sourceText) {
28         this.sourceText = sourceText;
29     }
30
31     private static boolean isLineEnd(char c) {
32         return c == '\n' || c == 0;
33     }
34     
35     private void split() {
36         int state = NORMAL;
37         int begin = 0, pos = 0, curEntityBegin = 0, parenthesesBalance = 0;
38         boolean hasErrors = false;
39         int length = sourceText.length();
40         loop: while(true) {
41             char c = pos == length ? 0 : sourceText.charAt(pos++);
42             if(c == '\r')
43                 c = '\n';
44             switch(state) {
45             case NORMAL_START_OF_LINE:
46                 if(c == '\n') // Don't care about empty lines
47                     break;
48                 if(c != ' ') {
49                     int end = c == 0 ? pos : pos-1;
50                     segments.add(new ModuleSegment(begin, end, parenthesesBalance, hasErrors));
51                     parenthesesBalance = 0;
52                     hasErrors = false;
53                     begin = end;
54                 }
55                 state = NORMAL;
56             case NORMAL:
57                 if(c == '"')
58                     state = NORMAL_1QUOTE;
59                 else if(c == '/')
60                     state = NORMAL_1SLASH;
61                 else if(c == '\'')
62                     state = CHAR_LITERAL;
63                 else if(c == '(' || c == '[' || c == '{') 
64                     ++parenthesesBalance;
65                 else if(c == ')' || c == ']' || c == '}') 
66                     --parenthesesBalance;
67                 else if(c == '\n')
68                     state = NORMAL_START_OF_LINE;
69                 else if(c == 0)
70                     break loop;
71                 break;
72             case NORMAL_1QUOTE:
73                 if(c == '"')
74                     state = NORMAL_2QUOTE;
75                 else if(c == '\\')
76                     state = SHORT_STRING_BACKSLASH;
77                 else if(c == 0)
78                     break loop;
79                 else
80                     state = SHORT_STRING;
81                 break;
82             case NORMAL_2QUOTE:
83                 if(c == '"')
84                     state = LONG_STRING;
85                 else {
86                     state = NORMAL;
87                     if(c != 0)
88                         --pos;
89                 }
90                 break;
91             case SHORT_STRING:
92                 if(c == '\\')
93                     state = SHORT_STRING_BACKSLASH;
94                 else if(c == '"' || isLineEnd(c) /* unclosed string */) {
95                     if(c == '\n')
96                         state = NORMAL_START_OF_LINE;
97                     else
98                         state = NORMAL;
99                     hasErrors = c != '"';
100                 }
101                 break;
102             case SHORT_STRING_BACKSLASH:
103                 if(isLineEnd(c) /* unclosed string */)
104                     state = NORMAL_START_OF_LINE;
105                 else
106                     state = SHORT_STRING;
107                 break;
108             case LONG_STRING:
109                 if(c == '"')
110                     state = LONG_STRING_1QUOTE;
111                 else if(c == 0) {
112                     // Unclosed long string
113                     curEntityBegin = pos;
114                     state = NORMAL;
115                     hasErrors = true;
116                 }
117                 break;
118             case LONG_STRING_1QUOTE:
119                 if(c == '"')
120                     state = LONG_STRING_2QUOTE;
121                 else
122                     state = LONG_STRING;
123                 break;
124             case LONG_STRING_2QUOTE:
125                 if(c == '"')
126                     state = NORMAL;
127                 else
128                     state = LONG_STRING;
129                 break;
130             case CHAR_LITERAL:
131                 if(c == '\'' || isLineEnd(c) /* unclosed char literal */) {
132                     if(c == '\n')
133                         state = NORMAL_START_OF_LINE;
134                     else
135                         state = NORMAL;
136                     hasErrors = c != '\'';
137                 }
138                 else if(c == '\\')
139                     state = CHAR_LITERAL_BACKSLASH;
140                 break;
141             case CHAR_LITERAL_BACKSLASH:
142                 if(isLineEnd(c) /* unclosed char literal */) {
143                     state = NORMAL_START_OF_LINE;
144                     hasErrors = true;
145                 }
146                 else
147                     state = CHAR_LITERAL;
148                 break;
149             case NORMAL_1SLASH:
150                 if(c == '/')
151                     state = CPP_COMMENT;
152                 else if(c == '*') {
153                     state = C_COMMENT;
154                     curEntityBegin = pos;
155                 }
156                 else {
157                     state = NORMAL;
158                     if(c != 0)
159                         --pos;
160                 }
161                 break;
162             case C_COMMENT:
163                 if(c == '*')
164                     state = C_COMMENT_STAR;
165                 else if(c == 0) {
166                     // Unclosed C comment
167                     pos = curEntityBegin;
168                     state = NORMAL;
169                     hasErrors = true;
170                 }
171                 break;
172             case C_COMMENT_STAR:
173                 if(c == '/') {
174                     state = NORMAL;
175                 }
176                 else
177                     state = C_COMMENT;
178                 break;
179             case CPP_COMMENT:
180                 if(isLineEnd(c))
181                     state = NORMAL_START_OF_LINE;
182                 break;
183             }
184         }
185         if(begin != length)
186             segments.add(new ModuleSegment(begin, length, parenthesesBalance, hasErrors));
187     }
188     
189     private void combineByParenthesesBalance() {
190         ArrayList<ModuleSegment> segmentStack = null; 
191         for(ModuleSegment segment : segments)
192             if(segment.parenthesesBalance > 0) {
193                 if(segmentStack == null)
194                     segmentStack = new ArrayList<ModuleSegment>();
195                 for(int i=0;i<segment.parenthesesBalance;++i)
196                     segmentStack.add(segment);
197             }
198             else if(segment.parenthesesBalance < 0) {
199                 if(segmentStack == null) {
200                     segment.parenthesesBalance = 0;
201                     segment.hasErrors = true;
202                 }
203                 else {
204                     int r = -segment.parenthesesBalance;
205                     while(r > 0 && !segmentStack.isEmpty()) {
206                         segmentStack.remove(segmentStack.size()-1);
207                         --r;
208                     }
209                     if(r > 0) {
210                         segment.parenthesesBalance += r;
211                         segment.hasErrors = true;
212                     }
213                 }
214             }
215         if(segmentStack == null)
216             return;
217         for(ModuleSegment segment : segmentStack) {
218             --segment.parenthesesBalance;
219             segment.hasErrors = true;
220         }
221         
222         ArrayList<ModuleSegment> oldSegments = segments;
223         segments = new ArrayList<ModuleSegment>(oldSegments.size());
224         
225         int currentBalance = 0;
226         int begin = 0;
227         boolean hasErrors = false;
228         for(ModuleSegment segment : oldSegments) {
229             if(currentBalance == 0) {
230                 if(segment.parenthesesBalance == 0)
231                     segments.add(segment);
232                 else {
233                     begin = segment.begin;
234                     currentBalance = segment.parenthesesBalance;
235                     hasErrors = segment.hasErrors;
236                 }
237             }
238             else {
239                 currentBalance += segment.parenthesesBalance;
240                 hasErrors |= segment.hasErrors;
241                 if(currentBalance == 0)
242                     segments.add(new ModuleSegment(begin, segment.end, 0, hasErrors));
243             }
244         }
245     }
246     
247     public static List<ModuleSegment> split(String sourceText) {
248         RobustModuleSplitter splitter = new RobustModuleSplitter(sourceText);
249         splitter.split();
250         splitter.combineByParenthesesBalance();
251         return splitter.segments;
252     }
253 }
254