1 package org.simantics.scl.compiler.internal.parsing.parser;
3 import java.io.IOException;
4 import java.util.Arrays;
6 import org.simantics.scl.compiler.compilation.CompilationContext;
7 import org.simantics.scl.compiler.errors.Locations;
8 import org.simantics.scl.compiler.internal.parsing.Token;
9 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
11 import gnu.trove.list.array.TIntArrayList;
12 import gnu.trove.set.hash.TIntHashSet;
16 * <http://www.haskell.org/onlinereport/haskell2010/haskellch10.html#x17-17800010.3>
17 * @author Hannu Niemistö
19 public class SCLPostLexer {
21 private static final int PATCH_SIZE = 16;
22 private static final int INITIAL_QUEUE_SIZE = 32;
24 public static TIntHashSet INDENTABLE = new TIntHashSet();
25 public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet();
26 public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet();
28 INDENTABLE.add(SCLTerminals.WHERE);
29 INDENTABLE.add(SCLTerminals.QUERY_OP);
30 INDENTABLE.add(SCLTerminals.WITH);
31 INDENTABLE.add(SCLTerminals.DO);
32 INDENTABLE.add(SCLTerminals.MDO);
33 INDENTABLE.add(SCLTerminals.EDO);
34 INDENTABLE.add(SCLTerminals.LET);
35 INDENTABLE.add(SCLTerminals.ENFORCE);
36 INDENTABLE.add(SCLTerminals.WHEN);
37 INDENTABLE.add(SCLTerminals.FOLLOWS);
38 INDENTABLE.add(SCLTerminals.EQ);
39 INDENTABLE.add(SCLTerminals.LAMBDA_MATCH);
40 INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN);
42 NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF);
43 NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL);
44 NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN);
45 NO_SEMICOLON_BEFORE.add(SCLTerminals.ELSE);
46 NO_SEMICOLON_BEFORE.add(SCLTerminals.IN);
47 NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE);
48 NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET);
49 NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN);
50 NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON);
52 NO_SEMICOLON_AFTER.add(SCLTerminals.EOF);
53 NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL);
57 Token[] queue = new Token[INITIAL_QUEUE_SIZE];
58 int queuePos=0, queueSize=0;
59 TIntArrayList indentations = new TIntArrayList();
60 TIntArrayList indentationTokens = new TIntArrayList();
61 Token curToken = null;
63 boolean firstTokenOfLine = true;
64 private SCLParserOptions options;
65 private boolean isFirstToken = true;
66 private CompilationContext context;
69 * We are parsing a module header and therefore should process tokens one by one and not by patches.
71 private boolean isInsideModule = false;
75 indentationTokens.add(SCLTerminals.EOF);
78 public SCLPostLexer(SCLLexer lexer) {
82 public SCLPostLexer(java.io.Reader in) {
83 this(new SCLLexer(in));
86 public void setCompilationContext(CompilationContext context) {
87 lexer.setCompilationContext(context);
88 this.context = context;
91 public Token nextToken() throws Exception {
92 while(queuePos == queueSize)
94 return queue[queuePos++];
97 public Token peekToken() throws Exception {
98 while(queuePos == queueSize)
100 return queue[queuePos];
103 private void push(Token symbol) {
104 /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" +
106 + Locations.beginOf(symbol.location) + ".."
107 + Locations.endOf(symbol.location) + "]");*/
108 if(queueSize == queue.length)
109 queue = Arrays.copyOf(queue, queueSize*2);
110 queue[queueSize++] = symbol;
113 private void fillQueue() throws Exception {
117 for(int i=0;i<PATCH_SIZE;++i) {
118 handleToken(lexer.nextToken());
120 if(context.header == null)
123 isInsideModule = false;
128 private SCLSyntaxErrorException error(int start, int end, String description) {
129 return new SCLSyntaxErrorException(Locations.location(start, end), description);
132 private void handleToken(Token symbol) throws IOException {
133 int symbolId = symbol.id;
134 if(symbolId == SCLTerminals.EOL) {
135 lineStart = Locations.endOf(symbol.location);
136 firstTokenOfLine = true;
140 if(symbolId == SCLTerminals.COMMENT) {
141 firstTokenOfLine = false;
145 Token prevToken = curToken;
146 int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id;
149 int symbolStart = Locations.beginOf(symbol.location);
150 int symbolEnd = Locations.endOf(symbol.location);
152 if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) {
153 push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {"));
154 int symbolIndentation = symbolStart-lineStart;
155 //System.out.println("symbolIndentation = " + symbolIndentation);
156 indentations.add(symbolIndentation);
157 indentationTokens.add(prevTokenId);
158 firstTokenOfLine = false;
160 else if(firstTokenOfLine) {
161 if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId))
164 int level = symbolStart - lineStart;
165 //System.out.println("level = " + level);
166 if(indentations.get(indentations.size()-1) >= level) {
167 while(indentations.get(indentations.size()-1) > level) {
168 indentationTokens.removeAt(indentations.size()-1);
169 indentations.removeAt(indentations.size()-1);
170 int loc = Locations.endOf(prevToken.location);
171 push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }"));
173 if(indentations.get(indentations.size()-1) == level)
174 push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;"));
177 firstTokenOfLine = false;
179 isFirstToken = false;
180 if(symbol.id == SCLTerminals.ID && symbol.text.equals("module") && options != null && options.isModule) {
181 push(new Token(SCLTerminals.MODULE, symbol.location, symbol.text));
182 isInsideModule = true;
189 case SCLTerminals.LBRACE:
190 case SCLTerminals.LPAREN:
191 case SCLTerminals.LBRACKET:
192 case SCLTerminals.IF:
193 case SCLTerminals.WHEN:
194 case SCLTerminals.LET:
195 indentations.add(-1);
196 indentationTokens.add(symbolId);
199 case SCLTerminals.THEN:
200 /*for(int tt : indentationTokens.toArray())
201 System.out.print(SCLParser.TERMINAL_NAMES[tt] + " ");
202 System.out.println();*/
203 if(prevTokenId == SCLTerminals.COMMA) {
204 // for list comprehension syntax
208 case SCLTerminals.RBRACE:
209 case SCLTerminals.RPAREN:
210 case SCLTerminals.RBRACKET:
211 case SCLTerminals.ELSE:
212 case SCLTerminals.IN:
213 int removedToken = SCLTerminals.EOF;
214 while(!indentations.isEmpty()) {
215 removedToken = indentationTokens.removeAt(indentations.size()-1);
216 //System.out.println(" removed " + SCLParser.TERMINAL_NAMES[removedToken]);
217 if(indentations.removeAt(indentations.size()-1) < 0)
219 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
220 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
222 if(indentations.isEmpty())
223 throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'.");
224 if(symbolId == SCLTerminals.THEN) {
225 if(removedToken == SCLTerminals.WHEN)
226 curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text);
228 indentations.add(-1);
229 indentationTokens.add(SCLTerminals.THEN);
234 case SCLTerminals.EOF:
235 while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) {
236 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
237 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
238 indentationTokens.removeAt(indentations.size()-1);
239 indentations.removeAt(indentations.size()-1);
241 if(indentations.size() > 1)
242 throw error(symbolStart, symbolEnd, "Unclosed parentheses.");
251 public void setParserOptions(SCLParserOptions options) {
252 this.options = options;
253 lexer.options = options;