1 package org.simantics.scl.compiler.internal.parsing.parser;
3 import java.io.IOException;
4 import java.util.Arrays;
6 import org.simantics.scl.compiler.errors.Locations;
7 import org.simantics.scl.compiler.internal.parsing.Token;
8 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
10 import gnu.trove.list.array.TIntArrayList;
11 import gnu.trove.set.hash.TIntHashSet;
15 * <http://www.haskell.org/onlinereport/haskell2010/haskellch10.html#x17-17800010.3>
16 * @author Hannu Niemistö
18 public class SCLPostLexer {
20 public static TIntHashSet INDENTABLE = new TIntHashSet();
21 public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet();
22 public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet();
24 INDENTABLE.add(SCLTerminals.WHERE);
25 INDENTABLE.add(SCLTerminals.QUERY_OP);
26 INDENTABLE.add(SCLTerminals.WITH);
27 INDENTABLE.add(SCLTerminals.DO);
28 INDENTABLE.add(SCLTerminals.MDO);
29 INDENTABLE.add(SCLTerminals.LET);
30 INDENTABLE.add(SCLTerminals.ENFORCE);
31 INDENTABLE.add(SCLTerminals.WHEN);
32 INDENTABLE.add(SCLTerminals.FOLLOWS);
33 INDENTABLE.add(SCLTerminals.EQ);
34 INDENTABLE.add(SCLTerminals.LAMBDA_MATCH);
35 INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN);
37 NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF);
38 NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL);
39 NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN);
40 NO_SEMICOLON_BEFORE.add(SCLTerminals.ELSE);
41 NO_SEMICOLON_BEFORE.add(SCLTerminals.IN);
42 NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE);
43 NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET);
44 NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN);
45 NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON);
47 NO_SEMICOLON_AFTER.add(SCLTerminals.EOF);
48 NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL);
52 Token[] queue = new Token[16];
53 int queuePos=0, queueSize=0;
54 TIntArrayList indentations = new TIntArrayList();
55 TIntArrayList indentationTokens = new TIntArrayList();
56 Token curToken = null;
58 boolean firstTokenOfLine = true;
59 private SCLParserOptions options;
60 private boolean isFirstToken = true;
64 indentationTokens.add(SCLTerminals.EOF);
67 public SCLPostLexer(SCLLexer lexer) {
71 public SCLPostLexer(java.io.Reader in) {
72 this(new SCLLexer(in));
75 public Token nextToken() throws Exception {
76 while(queuePos == queueSize)
78 return queue[queuePos++];
81 public Token peekToken() throws Exception {
82 while(queuePos == queueSize)
84 return queue[queuePos];
87 private void push(Token symbol) {
88 /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" +
90 + Locations.beginOf(symbol.location) + ".."
91 + Locations.endOf(symbol.location) + "]");*/
92 if(queueSize == queue.length)
93 queue = Arrays.copyOf(queue, queueSize*2);
94 queue[queueSize++] = symbol;
97 private void fillQueue() throws Exception {
102 handleToken(lexer.nextToken());
105 private SCLSyntaxErrorException error(int start, int end, String description) {
106 return new SCLSyntaxErrorException(Locations.location(start, end), description);
109 private void handleToken(Token symbol) throws IOException {
110 int symbolId = symbol.id;
111 if(symbolId == SCLTerminals.EOL) {
112 lineStart = Locations.endOf(symbol.location);
113 firstTokenOfLine = true;
117 if(symbolId == SCLTerminals.COMMENT) {
118 firstTokenOfLine = false;
122 Token prevToken = curToken;
123 int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id;
126 int symbolStart = Locations.beginOf(symbol.location);
127 int symbolEnd = Locations.endOf(symbol.location);
129 if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) {
130 push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {"));
131 int symbolIndentation = symbolStart-lineStart;
132 //System.out.println("symbolIndentation = " + symbolIndentation);
133 indentations.add(symbolIndentation);
134 indentationTokens.add(prevTokenId);
135 firstTokenOfLine = false;
137 else if(firstTokenOfLine) {
138 if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId))
141 int level = symbolStart - lineStart;
142 //System.out.println("level = " + level);
143 if(indentations.get(indentations.size()-1) >= level) {
144 while(indentations.get(indentations.size()-1) > level) {
145 indentationTokens.removeAt(indentations.size()-1);
146 indentations.removeAt(indentations.size()-1);
147 int loc = Locations.endOf(prevToken.location);
148 push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }"));
150 if(indentations.get(indentations.size()-1) == level)
151 push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;"));
154 firstTokenOfLine = false;
156 isFirstToken = false;
157 if(symbol.id == SCLTerminals.ID && symbol.text.equals("module") && options != null && options.isModule) {
158 push(new Token(SCLTerminals.MODULE, symbol.location, symbol.text));
165 case SCLTerminals.LBRACE:
166 case SCLTerminals.LPAREN:
167 case SCLTerminals.LBRACKET:
168 case SCLTerminals.IF:
169 case SCLTerminals.WHEN:
170 case SCLTerminals.LET:
171 indentations.add(-1);
172 indentationTokens.add(symbolId);
175 case SCLTerminals.THEN:
176 /*for(int tt : indentationTokens.toArray())
177 System.out.print(SCLParser.TERMINAL_NAMES[tt] + " ");
178 System.out.println();*/
179 if(prevTokenId == SCLTerminals.COMMA) {
180 // for list comprehension syntax
184 case SCLTerminals.RBRACE:
185 case SCLTerminals.RPAREN:
186 case SCLTerminals.RBRACKET:
187 case SCLTerminals.ELSE:
188 case SCLTerminals.IN:
189 int removedToken = SCLTerminals.EOF;
190 while(!indentations.isEmpty()) {
191 removedToken = indentationTokens.removeAt(indentations.size()-1);
192 //System.out.println(" removed " + SCLParser.TERMINAL_NAMES[removedToken]);
193 if(indentations.removeAt(indentations.size()-1) < 0)
195 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
196 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
198 if(indentations.isEmpty())
199 throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'.");
200 if(symbolId == SCLTerminals.THEN) {
201 if(removedToken == SCLTerminals.WHEN)
202 curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text);
204 indentations.add(-1);
205 indentationTokens.add(SCLTerminals.THEN);
210 case SCLTerminals.EOF:
211 while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) {
212 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
213 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
214 indentationTokens.removeAt(indentations.size()-1);
215 indentations.removeAt(indentations.size()-1);
217 if(indentations.size() > 1)
218 throw error(symbolStart, symbolEnd, "Unclosed parentheses.");
227 public void setParserOptions(SCLParserOptions options) {
228 this.options = options;
229 lexer.options = options;