]> gerrit.simantics Code Review - simantics/platform.git/blob
3d3b7b2f512033d90e13119b8edbfa8960cbaafd
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import java.io.IOException;
4 import java.util.Arrays;
5
6 import org.simantics.scl.compiler.errors.Locations;
7 import org.simantics.scl.compiler.internal.parsing.Token;
8 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
9
10 import gnu.trove.list.array.TIntArrayList;
11 import gnu.trove.set.hash.TIntHashSet;
12
13 /**
14  * 
15  * <http://www.haskell.org/onlinereport/haskell2010/haskellch10.html#x17-17800010.3>
16  * @author Hannu Niemistö
17  */
18 public class SCLPostLexer {
19         
20     public static TIntHashSet INDENTABLE = new TIntHashSet();
21     public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet();
22     public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet();
23     static {
24         INDENTABLE.add(SCLTerminals.WHERE);
25         INDENTABLE.add(SCLTerminals.QUERY_OP);
26         INDENTABLE.add(SCLTerminals.WITH);
27         INDENTABLE.add(SCLTerminals.DO);
28         INDENTABLE.add(SCLTerminals.MDO);
29         INDENTABLE.add(SCLTerminals.LET);
30         INDENTABLE.add(SCLTerminals.ENFORCE);
31         INDENTABLE.add(SCLTerminals.WHEN);
32         INDENTABLE.add(SCLTerminals.FOLLOWS);
33         INDENTABLE.add(SCLTerminals.EQ);
34         INDENTABLE.add(SCLTerminals.LAMBDA_MATCH);
35         INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN);
36         
37         NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF);
38         NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL);
39         NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN);
40         NO_SEMICOLON_BEFORE.add(SCLTerminals.IN);
41         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE);
42         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET);
43         NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN);
44         NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON);
45         
46         NO_SEMICOLON_AFTER.add(SCLTerminals.EOF);
47         NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL);
48     }
49     
50     SCLLexer lexer;
51     Token[] queue = new Token[16];
52     int queuePos=0, queueSize=0;
53     TIntArrayList indentations = new TIntArrayList();
54     TIntArrayList indentationTokens = new TIntArrayList();
55     Token curToken = null;
56     int lineStart = 0;
57     boolean firstTokenOfLine = true;
58     private SCLParserOptions options;
59             
60     {
61         indentations.add(0);
62         indentationTokens.add(SCLTerminals.EOF);
63     }
64     
65     public SCLPostLexer(SCLLexer lexer) {
66         this.lexer = lexer;
67     }
68     
69     public SCLPostLexer(java.io.Reader in) {
70         this(new SCLLexer(in));
71     }
72
73     public Token nextToken() throws Exception {
74         while(queuePos == queueSize)
75             fillQueue();
76         return queue[queuePos++];
77     }
78     
79     public Token peekToken() throws Exception {
80         while(queuePos == queueSize)
81             fillQueue();
82         return queue[queuePos];
83     }
84     
85     private void push(Token symbol) {
86         /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" +
87                 " [" 
88                 + Locations.beginOf(symbol.location) + ".." 
89                 + Locations.endOf(symbol.location) + "]");*/
90         if(queueSize == queue.length)
91             queue = Arrays.copyOf(queue, queueSize*2);
92         queue[queueSize++] = symbol;
93     }
94     
95     private void fillQueue() throws Exception {
96         queuePos = 0;
97         queueSize = 0;
98         
99         for(int i=0;i<8;++i)
100             handleToken(lexer.nextToken());
101     }
102     
103     private SCLSyntaxErrorException error(int start, int end, String description) {
104         return new SCLSyntaxErrorException(Locations.location(start, end), description);
105     }
106     
107     private void handleToken(Token symbol) throws IOException {
108         int symbolId = symbol.id;
109         if(symbolId == SCLTerminals.EOL) {
110             lineStart = Locations.endOf(symbol.location);
111             firstTokenOfLine = true;
112             return;
113         }
114         
115         if(symbolId == SCLTerminals.COMMENT) {
116             firstTokenOfLine = false;
117             return;
118         }
119         
120         Token prevToken = curToken;
121         int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id;
122         curToken = symbol;
123         
124         int symbolStart = Locations.beginOf(symbol.location);
125         int symbolEnd = Locations.endOf(symbol.location);
126         
127         if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) {
128             push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {"));
129             int symbolIndentation = symbolStart-lineStart;
130             //System.out.println("symbolIndentation = " + symbolIndentation);
131             indentations.add(symbolIndentation);
132             indentationTokens.add(prevTokenId);
133             firstTokenOfLine = false;
134         }
135         else if(firstTokenOfLine) {
136             if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId))
137                 ;
138             else {
139                 int level = symbolStart - lineStart;
140                 //System.out.println("level = " + level);
141                 if(indentations.get(indentations.size()-1) >= level) {
142                     while(indentations.get(indentations.size()-1) > level) {
143                         indentationTokens.removeAt(indentations.size()-1);
144                         indentations.removeAt(indentations.size()-1);
145                         int loc = Locations.endOf(prevToken.location);
146                         push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }"));
147                     }
148                     if(indentations.get(indentations.size()-1) == level && symbolId != SCLTerminals.ELSE)
149                         push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;"));
150                 }
151             }
152             firstTokenOfLine = false;
153         }
154         
155         switch(symbolId) {
156         case SCLTerminals.LBRACE:
157         case SCLTerminals.LPAREN:
158         case SCLTerminals.LBRACKET:
159         case SCLTerminals.IF:
160         case SCLTerminals.WHEN:
161         case SCLTerminals.LET:
162             indentations.add(-1);
163             indentationTokens.add(symbolId);
164             push(symbol);
165             return;
166         case SCLTerminals.THEN:
167             /*for(int tt : indentationTokens.toArray())
168                 System.out.print(SCLParser.TERMINAL_NAMES[tt] + " ");
169             System.out.println();*/
170             if(prevTokenId == SCLTerminals.COMMA) {
171                 // for list comprehension syntax
172                 push(symbol);
173                 break;
174             }
175         case SCLTerminals.RBRACE:
176         case SCLTerminals.RPAREN:
177         case SCLTerminals.RBRACKET:
178         //case SCLTerminals.ELSE:
179         case SCLTerminals.IN:
180             int removedToken = SCLTerminals.EOF;
181             while(!indentations.isEmpty()) {
182                 removedToken = indentationTokens.removeAt(indentations.size()-1);
183                 //System.out.println("    removed " + SCLParser.TERMINAL_NAMES[removedToken]);
184                 if(indentations.removeAt(indentations.size()-1) < 0)
185                     break;
186                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
187                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
188             }
189             if(indentations.isEmpty())
190                 throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'.");
191             if(symbolId == SCLTerminals.THEN && removedToken == SCLTerminals.WHEN)
192                 curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text);
193             push(symbol);
194             //if(symbolId == SCLTerminals.THEN)
195             //    indentations.add(-1);
196             return;
197         case SCLTerminals.EOF:
198             while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) {
199                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
200                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
201                 indentationTokens.removeAt(indentations.size()-1);
202                 indentations.removeAt(indentations.size()-1);
203             }
204             if(indentations.size() > 1)
205                 throw error(symbolStart, symbolEnd, "Unclosed parentheses.");
206             push(symbol);
207             return;
208         default:
209             push(symbol);
210             return;
211         }
212     }
213
214     public void setParserOptions(SCLParserOptions options) {
215         this.options = options;
216         lexer.options = options;
217     }
218
219 }