]> gerrit.simantics Code Review - simantics/platform.git/blob
9598f49d56860c46c32ddf4d3b5b9db41d0bf168
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import java.io.IOException;
4 import java.util.Arrays;
5
6 import org.simantics.scl.compiler.compilation.CompilationContext;
7 import org.simantics.scl.compiler.errors.Locations;
8 import org.simantics.scl.compiler.internal.parsing.Token;
9 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
10
11 import gnu.trove.list.array.TIntArrayList;
12 import gnu.trove.set.hash.TIntHashSet;
13
14 /**
15  * 
16  * <http://www.haskell.org/onlinereport/haskell2010/haskellch10.html#x17-17800010.3>
17  * @author Hannu Niemistö
18  */
19 public class SCLPostLexer {
20     
21     private static final int PATCH_SIZE = 16;
22     private static final int INITIAL_QUEUE_SIZE = 32;
23         
24     public static TIntHashSet INDENTABLE = new TIntHashSet();
25     public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet();
26     public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet();
27     static {
28         INDENTABLE.add(SCLTerminals.WHERE);
29         INDENTABLE.add(SCLTerminals.QUERY_OP);
30         INDENTABLE.add(SCLTerminals.WITH);
31         INDENTABLE.add(SCLTerminals.DO);
32         INDENTABLE.add(SCLTerminals.MDO);
33         INDENTABLE.add(SCLTerminals.EDO);
34         INDENTABLE.add(SCLTerminals.LET);
35         INDENTABLE.add(SCLTerminals.ENFORCE);
36         INDENTABLE.add(SCLTerminals.WHEN);
37         INDENTABLE.add(SCLTerminals.FOLLOWS);
38         INDENTABLE.add(SCLTerminals.EQ);
39         INDENTABLE.add(SCLTerminals.LAMBDA_MATCH);
40         INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN);
41         
42         NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF);
43         NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL);
44         NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN);
45         NO_SEMICOLON_BEFORE.add(SCLTerminals.ELSE);
46         NO_SEMICOLON_BEFORE.add(SCLTerminals.IN);
47         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE);
48         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET);
49         NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN);
50         NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON);
51         
52         NO_SEMICOLON_AFTER.add(SCLTerminals.EOF);
53         NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL);
54     }
55     
56     SCLLexer lexer;
57     Token[] queue = new Token[INITIAL_QUEUE_SIZE];
58     int queuePos=0, queueSize=0;
59     TIntArrayList indentations = new TIntArrayList();
60     TIntArrayList indentationTokens = new TIntArrayList();
61     Token curToken = null;
62     int lineStart = 0;
63     boolean firstTokenOfLine = true;
64     private SCLParserOptions options;
65     private boolean isFirstToken = true;
66     private CompilationContext context;
67     
68     /**
69      * We are parsing a module header and therefore should process tokens one by one and not by patches.
70      */
71     private boolean isInsideModule = false; 
72             
73     {
74         indentations.add(0);
75         indentationTokens.add(SCLTerminals.EOF);
76     }
77     
78     public SCLPostLexer(SCLLexer lexer) {
79         this.lexer = lexer;
80     }
81     
82     public SCLPostLexer(java.io.Reader in) {
83         this(new SCLLexer(in));
84     }
85     
86     public void setCompilationContext(CompilationContext context) {
87         lexer.setCompilationContext(context);
88         this.context = context;
89     }
90
91     public Token nextToken() throws Exception {
92         while(queuePos == queueSize)
93             fillQueue();
94         return queue[queuePos++];
95     }
96     
97     public Token peekToken() throws Exception {
98         while(queuePos == queueSize)
99             fillQueue();
100         return queue[queuePos];
101     }
102     
103     private void push(Token symbol) {
104         /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" +
105                 " [" 
106                 + Locations.beginOf(symbol.location) + ".." 
107                 + Locations.endOf(symbol.location) + "]");*/
108         if(queueSize == queue.length)
109             queue = Arrays.copyOf(queue, queueSize*2);
110         queue[queueSize++] = symbol;
111     }
112     
113     private void fillQueue() throws Exception {
114         queuePos = 0;
115         queueSize = 0;
116         
117         for(int i=0;i<PATCH_SIZE;++i) {
118             handleToken(lexer.nextToken());
119             if(isInsideModule) {
120                 if(context.header == null)
121                     break;
122                 else
123                     isInsideModule = false;
124             }
125         }
126     }
127     
128     private SCLSyntaxErrorException error(int start, int end, String description) {
129         return new SCLSyntaxErrorException(Locations.location(start, end), description);
130     }
131     
132     private void handleToken(Token symbol) throws IOException {
133         int symbolId = symbol.id;
134         if(symbolId == SCLTerminals.EOL) {
135             lineStart = Locations.endOf(symbol.location);
136             firstTokenOfLine = true;
137             return;
138         }
139         
140         if(symbolId == SCLTerminals.COMMENT) {
141             firstTokenOfLine = false;
142             return;
143         }
144         
145         Token prevToken = curToken;
146         int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id;
147         curToken = symbol;
148         
149         int symbolStart = Locations.beginOf(symbol.location);
150         int symbolEnd = Locations.endOf(symbol.location);
151         
152         if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) {
153             push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {"));
154             int symbolIndentation = symbolStart-lineStart;
155             //System.out.println("symbolIndentation = " + symbolIndentation);
156             indentations.add(symbolIndentation);
157             indentationTokens.add(prevTokenId);
158             firstTokenOfLine = false;
159         }
160         else if(firstTokenOfLine) {
161             if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId))
162                 ;
163             else {
164                 int level = symbolStart - lineStart;
165                 //System.out.println("level = " + level);
166                 if(indentations.get(indentations.size()-1) >= level) {
167                     while(indentations.get(indentations.size()-1) > level) {
168                         indentationTokens.removeAt(indentations.size()-1);
169                         indentations.removeAt(indentations.size()-1);
170                         int loc = Locations.endOf(prevToken.location);
171                         push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }"));
172                     }
173                     if(indentations.get(indentations.size()-1) == level)
174                         push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;"));
175                 }
176             }
177             firstTokenOfLine = false;
178             if(isFirstToken) {
179                 isFirstToken = false;
180                 if(symbol.id == SCLTerminals.ID && symbol.text.equals("module") && options != null && options.isModule) {
181                     push(new Token(SCLTerminals.MODULE, symbol.location, symbol.text));
182                     isInsideModule = true;
183                     return;
184                 }
185             }
186         }
187         
188         switch(symbolId) {
189         case SCLTerminals.LBRACE:
190         case SCLTerminals.LPAREN:
191         case SCLTerminals.LBRACKET:
192         case SCLTerminals.IF:
193         case SCLTerminals.WHEN:
194         case SCLTerminals.LET:
195             indentations.add(-1);
196             indentationTokens.add(symbolId);
197             push(symbol);
198             return;
199         case SCLTerminals.THEN:
200             /*for(int tt : indentationTokens.toArray())
201                     System.out.print(SCLParser.TERMINAL_NAMES[tt] + " ");
202                 System.out.println();*/
203             if(prevTokenId == SCLTerminals.COMMA) {
204                 // for list comprehension syntax
205                 push(symbol);
206                 break;
207             }
208         case SCLTerminals.RBRACE:
209         case SCLTerminals.RPAREN:
210         case SCLTerminals.RBRACKET:
211         case SCLTerminals.ELSE:
212         case SCLTerminals.IN:
213             int removedToken = SCLTerminals.EOF;
214             while(!indentations.isEmpty()) {
215                 removedToken = indentationTokens.removeAt(indentations.size()-1);
216                 //System.out.println("    removed " + SCLParser.TERMINAL_NAMES[removedToken]);
217                 if(indentations.removeAt(indentations.size()-1) < 0)
218                     break;
219                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
220                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
221             }
222             if(indentations.isEmpty())
223                 throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'.");
224             if(symbolId == SCLTerminals.THEN) {
225                 if(removedToken == SCLTerminals.WHEN)
226                     curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text);
227                 else {
228                     indentations.add(-1);
229                     indentationTokens.add(SCLTerminals.THEN);
230                 }
231             }   
232             push(symbol);
233             return;
234         case SCLTerminals.EOF:
235             while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) {
236                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
237                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
238                 indentationTokens.removeAt(indentations.size()-1);
239                 indentations.removeAt(indentations.size()-1);
240             }
241             if(indentations.size() > 1)
242                 throw error(symbolStart, symbolEnd, "Unclosed parentheses.");
243             push(symbol);
244             return;
245         default:
246             push(symbol);
247             return;
248         }
249     }
250
251     public void setParserOptions(SCLParserOptions options) {
252         this.options = options;
253         lexer.options = options;
254     }
255
256 }