]> gerrit.simantics Code Review - simantics/platform.git/blob
9cc1ff27b16a288afb11f138b9d8755a84f642d0
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import java.io.IOException;
4 import java.util.Arrays;
5
6 import org.simantics.scl.compiler.errors.Locations;
7 import org.simantics.scl.compiler.internal.parsing.Token;
8 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
9
10 import gnu.trove.list.array.TIntArrayList;
11 import gnu.trove.set.hash.TIntHashSet;
12
13 /**
14  * 
15  * <http://www.haskell.org/onlinereport/haskell2010/haskellch10.html#x17-17800010.3>
16  * @author Hannu Niemistö
17  */
18 public class SCLPostLexer {
19         
20     public static TIntHashSet INDENTABLE = new TIntHashSet();
21     public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet();
22     public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet();
23     static {
24         INDENTABLE.add(SCLTerminals.WHERE);
25         INDENTABLE.add(SCLTerminals.QUERY_OP);
26         INDENTABLE.add(SCLTerminals.WITH);
27         INDENTABLE.add(SCLTerminals.DO);
28         INDENTABLE.add(SCLTerminals.MDO);
29         INDENTABLE.add(SCLTerminals.LET);
30         INDENTABLE.add(SCLTerminals.ENFORCE);
31         INDENTABLE.add(SCLTerminals.WHEN);
32         INDENTABLE.add(SCLTerminals.FOLLOWS);
33         INDENTABLE.add(SCLTerminals.EQ);
34         INDENTABLE.add(SCLTerminals.LAMBDA_MATCH);
35         INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN);
36         
37         NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF);
38         NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL);
39         NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN);
40         NO_SEMICOLON_BEFORE.add(SCLTerminals.ELSE);
41         NO_SEMICOLON_BEFORE.add(SCLTerminals.IN);
42         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE);
43         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET);
44         NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN);
45         NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON);
46         
47         NO_SEMICOLON_AFTER.add(SCLTerminals.EOF);
48         NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL);
49     }
50     
51     SCLLexer lexer;
52     Token[] queue = new Token[16];
53     int queuePos=0, queueSize=0;
54     TIntArrayList indentations = new TIntArrayList();
55     TIntArrayList indentationTokens = new TIntArrayList();
56     Token curToken = null;
57     int lineStart = 0;
58     boolean firstTokenOfLine = true;
59     private SCLParserOptions options;
60     private boolean isFirstToken = true;
61             
62     {
63         indentations.add(0);
64         indentationTokens.add(SCLTerminals.EOF);
65     }
66     
67     public SCLPostLexer(SCLLexer lexer) {
68         this.lexer = lexer;
69     }
70     
71     public SCLPostLexer(java.io.Reader in) {
72         this(new SCLLexer(in));
73     }
74
75     public Token nextToken() throws Exception {
76         while(queuePos == queueSize)
77             fillQueue();
78         return queue[queuePos++];
79     }
80     
81     public Token peekToken() throws Exception {
82         while(queuePos == queueSize)
83             fillQueue();
84         return queue[queuePos];
85     }
86     
87     private void push(Token symbol) {
88         /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" +
89                 " [" 
90                 + Locations.beginOf(symbol.location) + ".." 
91                 + Locations.endOf(symbol.location) + "]");*/
92         if(queueSize == queue.length)
93             queue = Arrays.copyOf(queue, queueSize*2);
94         queue[queueSize++] = symbol;
95     }
96     
97     private void fillQueue() throws Exception {
98         queuePos = 0;
99         queueSize = 0;
100         
101         for(int i=0;i<8;++i)
102             handleToken(lexer.nextToken());
103     }
104     
105     private SCLSyntaxErrorException error(int start, int end, String description) {
106         return new SCLSyntaxErrorException(Locations.location(start, end), description);
107     }
108     
109     private void handleToken(Token symbol) throws IOException {
110         int symbolId = symbol.id;
111         if(symbolId == SCLTerminals.EOL) {
112             lineStart = Locations.endOf(symbol.location);
113             firstTokenOfLine = true;
114             return;
115         }
116         
117         if(symbolId == SCLTerminals.COMMENT) {
118             firstTokenOfLine = false;
119             return;
120         }
121         
122         Token prevToken = curToken;
123         int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id;
124         curToken = symbol;
125         
126         int symbolStart = Locations.beginOf(symbol.location);
127         int symbolEnd = Locations.endOf(symbol.location);
128         
129         if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) {
130             push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {"));
131             int symbolIndentation = symbolStart-lineStart;
132             //System.out.println("symbolIndentation = " + symbolIndentation);
133             indentations.add(symbolIndentation);
134             indentationTokens.add(prevTokenId);
135             firstTokenOfLine = false;
136         }
137         else if(firstTokenOfLine) {
138             if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId))
139                 ;
140             else {
141                 int level = symbolStart - lineStart;
142                 //System.out.println("level = " + level);
143                 if(indentations.get(indentations.size()-1) >= level) {
144                     while(indentations.get(indentations.size()-1) > level) {
145                         indentationTokens.removeAt(indentations.size()-1);
146                         indentations.removeAt(indentations.size()-1);
147                         int loc = Locations.endOf(prevToken.location);
148                         push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }"));
149                     }
150                     if(indentations.get(indentations.size()-1) == level)
151                         push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;"));
152                 }
153             }
154             firstTokenOfLine = false;
155             if(isFirstToken) {
156                 isFirstToken = false;
157                 if(symbol.id == SCLTerminals.ID && symbol.text.equals("module") && options != null && options.isModule) {
158                     push(new Token(SCLTerminals.MODULE, symbol.location, symbol.text));
159                     return;
160                 }
161             }
162         }
163         
164         switch(symbolId) {
165         case SCLTerminals.LBRACE:
166         case SCLTerminals.LPAREN:
167         case SCLTerminals.LBRACKET:
168         case SCLTerminals.IF:
169         case SCLTerminals.WHEN:
170         case SCLTerminals.LET:
171             indentations.add(-1);
172             indentationTokens.add(symbolId);
173             push(symbol);
174             return;
175         case SCLTerminals.THEN:
176             /*for(int tt : indentationTokens.toArray())
177                     System.out.print(SCLParser.TERMINAL_NAMES[tt] + " ");
178                 System.out.println();*/
179             if(prevTokenId == SCLTerminals.COMMA) {
180                 // for list comprehension syntax
181                 push(symbol);
182                 break;
183             }
184         case SCLTerminals.RBRACE:
185         case SCLTerminals.RPAREN:
186         case SCLTerminals.RBRACKET:
187         case SCLTerminals.ELSE:
188         case SCLTerminals.IN:
189             int removedToken = SCLTerminals.EOF;
190             while(!indentations.isEmpty()) {
191                 removedToken = indentationTokens.removeAt(indentations.size()-1);
192                 //System.out.println("    removed " + SCLParser.TERMINAL_NAMES[removedToken]);
193                 if(indentations.removeAt(indentations.size()-1) < 0)
194                     break;
195                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
196                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
197             }
198             if(indentations.isEmpty())
199                 throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'.");
200             if(symbolId == SCLTerminals.THEN) {
201                 if(removedToken == SCLTerminals.WHEN)
202                     curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text);
203                 else {
204                     indentations.add(-1);
205                     indentationTokens.add(SCLTerminals.THEN);
206                 }
207             }   
208             push(symbol);
209             return;
210         case SCLTerminals.EOF:
211             while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) {
212                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
213                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
214                 indentationTokens.removeAt(indentations.size()-1);
215                 indentations.removeAt(indentations.size()-1);
216             }
217             if(indentations.size() > 1)
218                 throw error(symbolStart, symbolEnd, "Unclosed parentheses.");
219             push(symbol);
220             return;
221         default:
222             push(symbol);
223             return;
224         }
225     }
226
227     public void setParserOptions(SCLParserOptions options) {
228         this.options = options;
229         lexer.options = options;
230     }
231
232 }