]> gerrit.simantics Code Review - simantics/platform.git/blob
efc799322b3d37c4220a90de6f8061154d3ace35
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import java.io.IOException;
4 import java.util.Arrays;
5
6 import org.simantics.scl.compiler.errors.Locations;
7 import org.simantics.scl.compiler.internal.parsing.Token;
8 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
9
10 import gnu.trove.list.array.TIntArrayList;
11 import gnu.trove.set.hash.TIntHashSet;
12
13 /**
14  * 
15  * <http://www.haskell.org/onlinereport/haskell2010/haskellch10.html#x17-17800010.3>
16  * @author Hannu Niemistö
17  */
18 public class SCLPostLexer {
19         
20     public static TIntHashSet INDENTABLE = new TIntHashSet();
21     public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet();
22     public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet();
23     static {
24         INDENTABLE.add(SCLTerminals.WHERE);
25         INDENTABLE.add(SCLTerminals.QUERY_OP);
26         INDENTABLE.add(SCLTerminals.WITH);
27         INDENTABLE.add(SCLTerminals.DO);
28         INDENTABLE.add(SCLTerminals.MDO);
29         INDENTABLE.add(SCLTerminals.LET);
30         INDENTABLE.add(SCLTerminals.ENFORCE);
31         INDENTABLE.add(SCLTerminals.WHEN);
32         INDENTABLE.add(SCLTerminals.FOLLOWS);
33         INDENTABLE.add(SCLTerminals.EQ);
34         INDENTABLE.add(SCLTerminals.LAMBDA_MATCH);
35         INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN);
36         
37         NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF);
38         NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL);
39         NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN);
40         NO_SEMICOLON_BEFORE.add(SCLTerminals.ELSE);
41         NO_SEMICOLON_BEFORE.add(SCLTerminals.IN);
42         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE);
43         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET);
44         NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN);
45         NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON);
46         
47         NO_SEMICOLON_AFTER.add(SCLTerminals.EOF);
48         NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL);
49     }
50     
51     SCLLexer lexer;
52     Token[] queue = new Token[16];
53     int queuePos=0, queueSize=0;
54     TIntArrayList indentations = new TIntArrayList();
55     TIntArrayList indentationTokens = new TIntArrayList();
56     Token curToken = null;
57     int lineStart = 0;
58     boolean firstTokenOfLine = true;
59     private SCLParserOptions options;
60             
61     {
62         indentations.add(0);
63         indentationTokens.add(SCLTerminals.EOF);
64     }
65     
66     public SCLPostLexer(SCLLexer lexer) {
67         this.lexer = lexer;
68     }
69     
70     public SCLPostLexer(java.io.Reader in) {
71         this(new SCLLexer(in));
72     }
73
74     public Token nextToken() throws Exception {
75         while(queuePos == queueSize)
76             fillQueue();
77         return queue[queuePos++];
78     }
79     
80     public Token peekToken() throws Exception {
81         while(queuePos == queueSize)
82             fillQueue();
83         return queue[queuePos];
84     }
85     
86     private void push(Token symbol) {
87         /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" +
88                 " [" 
89                 + Locations.beginOf(symbol.location) + ".." 
90                 + Locations.endOf(symbol.location) + "]");*/
91         if(queueSize == queue.length)
92             queue = Arrays.copyOf(queue, queueSize*2);
93         queue[queueSize++] = symbol;
94     }
95     
96     private void fillQueue() throws Exception {
97         queuePos = 0;
98         queueSize = 0;
99         
100         for(int i=0;i<8;++i)
101             handleToken(lexer.nextToken());
102     }
103     
104     private SCLSyntaxErrorException error(int start, int end, String description) {
105         return new SCLSyntaxErrorException(Locations.location(start, end), description);
106     }
107     
108     private void handleToken(Token symbol) throws IOException {
109         int symbolId = symbol.id;
110         if(symbolId == SCLTerminals.EOL) {
111             lineStart = Locations.endOf(symbol.location);
112             firstTokenOfLine = true;
113             return;
114         }
115         
116         if(symbolId == SCLTerminals.COMMENT) {
117             firstTokenOfLine = false;
118             return;
119         }
120         
121         Token prevToken = curToken;
122         int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id;
123         curToken = symbol;
124         
125         int symbolStart = Locations.beginOf(symbol.location);
126         int symbolEnd = Locations.endOf(symbol.location);
127         
128         if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) {
129             push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {"));
130             int symbolIndentation = symbolStart-lineStart;
131             //System.out.println("symbolIndentation = " + symbolIndentation);
132             indentations.add(symbolIndentation);
133             indentationTokens.add(prevTokenId);
134             firstTokenOfLine = false;
135         }
136         else if(firstTokenOfLine) {
137             if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId))
138                 ;
139             else {
140                 int level = symbolStart - lineStart;
141                 //System.out.println("level = " + level);
142                 if(indentations.get(indentations.size()-1) >= level) {
143                     while(indentations.get(indentations.size()-1) > level) {
144                         indentationTokens.removeAt(indentations.size()-1);
145                         indentations.removeAt(indentations.size()-1);
146                         int loc = Locations.endOf(prevToken.location);
147                         push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }"));
148                     }
149                     if(indentations.get(indentations.size()-1) == level)
150                         push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;"));
151                 }
152             }
153             firstTokenOfLine = false;
154         }
155         
156         switch(symbolId) {
157         case SCLTerminals.LBRACE:
158         case SCLTerminals.LPAREN:
159         case SCLTerminals.LBRACKET:
160         case SCLTerminals.IF:
161         case SCLTerminals.WHEN:
162         case SCLTerminals.LET:
163             indentations.add(-1);
164             indentationTokens.add(symbolId);
165             push(symbol);
166             return;
167         case SCLTerminals.THEN:
168             /*for(int tt : indentationTokens.toArray())
169                 System.out.print(SCLParser.TERMINAL_NAMES[tt] + " ");
170             System.out.println();*/
171             if(prevTokenId == SCLTerminals.COMMA) {
172                 // for list comprehension syntax
173                 push(symbol);
174                 break;
175             }
176         case SCLTerminals.RBRACE:
177         case SCLTerminals.RPAREN:
178         case SCLTerminals.RBRACKET:
179         case SCLTerminals.ELSE:
180         case SCLTerminals.IN:
181             int removedToken = SCLTerminals.EOF;
182             while(!indentations.isEmpty()) {
183                 removedToken = indentationTokens.removeAt(indentations.size()-1);
184                 //System.out.println("    removed " + SCLParser.TERMINAL_NAMES[removedToken]);
185                 if(indentations.removeAt(indentations.size()-1) < 0)
186                     break;
187                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
188                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
189             }
190             if(indentations.isEmpty())
191                 throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'.");
192             if(symbolId == SCLTerminals.THEN) {
193                 if(removedToken == SCLTerminals.WHEN)
194                     curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text);
195                 else {
196                     indentations.add(-1);
197                     indentationTokens.add(SCLTerminals.THEN);
198                 }
199             }   
200             push(symbol);
201             return;
202         case SCLTerminals.EOF:
203             while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) {
204                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
205                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
206                 indentationTokens.removeAt(indentations.size()-1);
207                 indentations.removeAt(indentations.size()-1);
208             }
209             if(indentations.size() > 1)
210                 throw error(symbolStart, symbolEnd, "Unclosed parentheses.");
211             push(symbol);
212             return;
213         default:
214             push(symbol);
215             return;
216         }
217     }
218
219     public void setParserOptions(SCLParserOptions options) {
220         this.options = options;
221         lexer.options = options;
222     }
223
224 }