]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLPostLexer.java
(refs #7250) CHR rules modularization (first working version)
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / internal / parsing / parser / SCLPostLexer.java
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import java.io.IOException;
4 import java.util.Arrays;
5
6 import org.simantics.scl.compiler.compilation.CompilationContext;
7 import org.simantics.scl.compiler.errors.Locations;
8 import org.simantics.scl.compiler.internal.parsing.Token;
9 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
10
11 import gnu.trove.list.array.TIntArrayList;
12 import gnu.trove.set.hash.TIntHashSet;
13
14 /**
15  * 
16  * <http://www.haskell.org/onlinereport/haskell2010/haskellch10.html#x17-17800010.3>
17  * @author Hannu Niemistö
18  */
19 public class SCLPostLexer {
20     
21     private static final int PATCH_SIZE = 16;
22     private static final int INITIAL_QUEUE_SIZE = 32;
23         
24     public static TIntHashSet INDENTABLE = new TIntHashSet();
25     public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet();
26     public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet();
27     static {
28         INDENTABLE.add(SCLTerminals.WHERE);
29         INDENTABLE.add(SCLTerminals.QUERY_OP);
30         INDENTABLE.add(SCLTerminals.WITH);
31         INDENTABLE.add(SCLTerminals.DO);
32         INDENTABLE.add(SCLTerminals.MDO);
33         INDENTABLE.add(SCLTerminals.LET);
34         INDENTABLE.add(SCLTerminals.ENFORCE);
35         INDENTABLE.add(SCLTerminals.WHEN);
36         INDENTABLE.add(SCLTerminals.FOLLOWS);
37         INDENTABLE.add(SCLTerminals.EQ);
38         INDENTABLE.add(SCLTerminals.LAMBDA_MATCH);
39         INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN);
40         
41         NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF);
42         NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL);
43         NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN);
44         NO_SEMICOLON_BEFORE.add(SCLTerminals.ELSE);
45         NO_SEMICOLON_BEFORE.add(SCLTerminals.IN);
46         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE);
47         NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET);
48         NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN);
49         NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON);
50         
51         NO_SEMICOLON_AFTER.add(SCLTerminals.EOF);
52         NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL);
53     }
54     
55     SCLLexer lexer;
56     Token[] queue = new Token[INITIAL_QUEUE_SIZE];
57     int queuePos=0, queueSize=0;
58     TIntArrayList indentations = new TIntArrayList();
59     TIntArrayList indentationTokens = new TIntArrayList();
60     Token curToken = null;
61     int lineStart = 0;
62     boolean firstTokenOfLine = true;
63     private SCLParserOptions options;
64     private boolean isFirstToken = true;
65     private CompilationContext context;
66     
67     /**
68      * We are parsing a module header and therefore should process tokens one by one and not by patches.
69      */
70     private boolean isInsideModule = false; 
71             
72     {
73         indentations.add(0);
74         indentationTokens.add(SCLTerminals.EOF);
75     }
76     
77     public SCLPostLexer(SCLLexer lexer) {
78         this.lexer = lexer;
79     }
80     
81     public SCLPostLexer(java.io.Reader in) {
82         this(new SCLLexer(in));
83     }
84     
85     public void setCompilationContext(CompilationContext context) {
86         lexer.setCompilationContext(context);
87         this.context = context;
88     }
89
90     public Token nextToken() throws Exception {
91         while(queuePos == queueSize)
92             fillQueue();
93         return queue[queuePos++];
94     }
95     
96     public Token peekToken() throws Exception {
97         while(queuePos == queueSize)
98             fillQueue();
99         return queue[queuePos];
100     }
101     
102     private void push(Token symbol) {
103         /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" +
104                 " [" 
105                 + Locations.beginOf(symbol.location) + ".." 
106                 + Locations.endOf(symbol.location) + "]");*/
107         if(queueSize == queue.length)
108             queue = Arrays.copyOf(queue, queueSize*2);
109         queue[queueSize++] = symbol;
110     }
111     
112     private void fillQueue() throws Exception {
113         queuePos = 0;
114         queueSize = 0;
115         
116         for(int i=0;i<PATCH_SIZE;++i) {
117             handleToken(lexer.nextToken());
118             if(isInsideModule) {
119                 if(context.header == null)
120                     break;
121                 else
122                     isInsideModule = false;
123             }
124         }
125     }
126     
127     private SCLSyntaxErrorException error(int start, int end, String description) {
128         return new SCLSyntaxErrorException(Locations.location(start, end), description);
129     }
130     
131     private void handleToken(Token symbol) throws IOException {
132         int symbolId = symbol.id;
133         if(symbolId == SCLTerminals.EOL) {
134             lineStart = Locations.endOf(symbol.location);
135             firstTokenOfLine = true;
136             return;
137         }
138         
139         if(symbolId == SCLTerminals.COMMENT) {
140             firstTokenOfLine = false;
141             return;
142         }
143         
144         Token prevToken = curToken;
145         int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id;
146         curToken = symbol;
147         
148         int symbolStart = Locations.beginOf(symbol.location);
149         int symbolEnd = Locations.endOf(symbol.location);
150         
151         if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) {
152             push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {"));
153             int symbolIndentation = symbolStart-lineStart;
154             //System.out.println("symbolIndentation = " + symbolIndentation);
155             indentations.add(symbolIndentation);
156             indentationTokens.add(prevTokenId);
157             firstTokenOfLine = false;
158         }
159         else if(firstTokenOfLine) {
160             if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId))
161                 ;
162             else {
163                 int level = symbolStart - lineStart;
164                 //System.out.println("level = " + level);
165                 if(indentations.get(indentations.size()-1) >= level) {
166                     while(indentations.get(indentations.size()-1) > level) {
167                         indentationTokens.removeAt(indentations.size()-1);
168                         indentations.removeAt(indentations.size()-1);
169                         int loc = Locations.endOf(prevToken.location);
170                         push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }"));
171                     }
172                     if(indentations.get(indentations.size()-1) == level)
173                         push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;"));
174                 }
175             }
176             firstTokenOfLine = false;
177             if(isFirstToken) {
178                 isFirstToken = false;
179                 if(symbol.id == SCLTerminals.ID && symbol.text.equals("module") && options != null && options.isModule) {
180                     push(new Token(SCLTerminals.MODULE, symbol.location, symbol.text));
181                     isInsideModule = true;
182                     return;
183                 }
184             }
185         }
186         
187         switch(symbolId) {
188         case SCLTerminals.LBRACE:
189         case SCLTerminals.LPAREN:
190         case SCLTerminals.LBRACKET:
191         case SCLTerminals.IF:
192         case SCLTerminals.WHEN:
193         case SCLTerminals.LET:
194             indentations.add(-1);
195             indentationTokens.add(symbolId);
196             push(symbol);
197             return;
198         case SCLTerminals.THEN:
199             /*for(int tt : indentationTokens.toArray())
200                     System.out.print(SCLParser.TERMINAL_NAMES[tt] + " ");
201                 System.out.println();*/
202             if(prevTokenId == SCLTerminals.COMMA) {
203                 // for list comprehension syntax
204                 push(symbol);
205                 break;
206             }
207         case SCLTerminals.RBRACE:
208         case SCLTerminals.RPAREN:
209         case SCLTerminals.RBRACKET:
210         case SCLTerminals.ELSE:
211         case SCLTerminals.IN:
212             int removedToken = SCLTerminals.EOF;
213             while(!indentations.isEmpty()) {
214                 removedToken = indentationTokens.removeAt(indentations.size()-1);
215                 //System.out.println("    removed " + SCLParser.TERMINAL_NAMES[removedToken]);
216                 if(indentations.removeAt(indentations.size()-1) < 0)
217                     break;
218                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
219                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
220             }
221             if(indentations.isEmpty())
222                 throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'.");
223             if(symbolId == SCLTerminals.THEN) {
224                 if(removedToken == SCLTerminals.WHEN)
225                     curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text);
226                 else {
227                     indentations.add(-1);
228                     indentationTokens.add(SCLTerminals.THEN);
229                 }
230             }   
231             push(symbol);
232             return;
233         case SCLTerminals.EOF:
234             while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) {
235                 long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location;
236                 push(new Token(SCLTerminals.RBRACE, loc, "implicit }"));
237                 indentationTokens.removeAt(indentations.size()-1);
238                 indentations.removeAt(indentations.size()-1);
239             }
240             if(indentations.size() > 1)
241                 throw error(symbolStart, symbolEnd, "Unclosed parentheses.");
242             push(symbol);
243             return;
244         default:
245             push(symbol);
246             return;
247         }
248     }
249
250     public void setParserOptions(SCLParserOptions options) {
251         this.options = options;
252         lexer.options = options;
253     }
254
255 }