package org.simantics.scl.compiler.internal.parsing.parser; import java.io.IOException; import java.util.Arrays; import org.simantics.scl.compiler.errors.Locations; import org.simantics.scl.compiler.internal.parsing.Token; import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException; import gnu.trove.list.array.TIntArrayList; import gnu.trove.set.hash.TIntHashSet; /** * * * @author Hannu Niemistö */ public class SCLPostLexer { public static TIntHashSet INDENTABLE = new TIntHashSet(); public static TIntHashSet NO_SEMICOLON_BEFORE = new TIntHashSet(); public static TIntHashSet NO_SEMICOLON_AFTER = new TIntHashSet(); static { INDENTABLE.add(SCLTerminals.WHERE); INDENTABLE.add(SCLTerminals.QUERY_OP); INDENTABLE.add(SCLTerminals.WITH); INDENTABLE.add(SCLTerminals.DO); INDENTABLE.add(SCLTerminals.MDO); INDENTABLE.add(SCLTerminals.LET); INDENTABLE.add(SCLTerminals.ENFORCE); INDENTABLE.add(SCLTerminals.WHEN); INDENTABLE.add(SCLTerminals.FOLLOWS); INDENTABLE.add(SCLTerminals.EQ); INDENTABLE.add(SCLTerminals.LAMBDA_MATCH); INDENTABLE.add(SCLTerminals.THEN_AFTER_WHEN); NO_SEMICOLON_BEFORE.add(SCLTerminals.EOF); NO_SEMICOLON_BEFORE.add(SCLTerminals.SYMBOL); NO_SEMICOLON_BEFORE.add(SCLTerminals.THEN); NO_SEMICOLON_BEFORE.add(SCLTerminals.ELSE); NO_SEMICOLON_BEFORE.add(SCLTerminals.IN); NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACE); NO_SEMICOLON_BEFORE.add(SCLTerminals.RBRACKET); NO_SEMICOLON_BEFORE.add(SCLTerminals.RPAREN); NO_SEMICOLON_BEFORE.add(SCLTerminals.SEMICOLON); NO_SEMICOLON_AFTER.add(SCLTerminals.EOF); NO_SEMICOLON_AFTER.add(SCLTerminals.SYMBOL); } SCLLexer lexer; Token[] queue = new Token[16]; int queuePos=0, queueSize=0; TIntArrayList indentations = new TIntArrayList(); TIntArrayList indentationTokens = new TIntArrayList(); Token curToken = null; int lineStart = 0; boolean firstTokenOfLine = true; private SCLParserOptions options; private boolean isFirstToken = true; { indentations.add(0); indentationTokens.add(SCLTerminals.EOF); } public SCLPostLexer(SCLLexer lexer) { this.lexer = lexer; } public SCLPostLexer(java.io.Reader in) { this(new SCLLexer(in)); } public Token nextToken() throws Exception { while(queuePos == queueSize) fillQueue(); return queue[queuePos++]; } public Token peekToken() throws Exception { while(queuePos == queueSize) fillQueue(); return queue[queuePos]; } private void push(Token symbol) { /*System.out.println("TOKEN " + symbol.text + " (" + SCLParser.TERMINAL_NAMES[symbol.id] + ")" + " [" + Locations.beginOf(symbol.location) + ".." + Locations.endOf(symbol.location) + "]");*/ if(queueSize == queue.length) queue = Arrays.copyOf(queue, queueSize*2); queue[queueSize++] = symbol; } private void fillQueue() throws Exception { queuePos = 0; queueSize = 0; for(int i=0;i<8;++i) handleToken(lexer.nextToken()); } private SCLSyntaxErrorException error(int start, int end, String description) { return new SCLSyntaxErrorException(Locations.location(start, end), description); } private void handleToken(Token symbol) throws IOException { int symbolId = symbol.id; if(symbolId == SCLTerminals.EOL) { lineStart = Locations.endOf(symbol.location); firstTokenOfLine = true; return; } if(symbolId == SCLTerminals.COMMENT) { firstTokenOfLine = false; return; } Token prevToken = curToken; int prevTokenId = prevToken == null ? SCLTerminals.EOF : prevToken.id; curToken = symbol; int symbolStart = Locations.beginOf(symbol.location); int symbolEnd = Locations.endOf(symbol.location); if(INDENTABLE.contains(prevTokenId) && symbolId != SCLTerminals.LBRACE) { push(new Token(SCLTerminals.LBRACE, symbolStart, symbolStart, "implicit {")); int symbolIndentation = symbolStart-lineStart; //System.out.println("symbolIndentation = " + symbolIndentation); indentations.add(symbolIndentation); indentationTokens.add(prevTokenId); firstTokenOfLine = false; } else if(firstTokenOfLine) { if(NO_SEMICOLON_AFTER.contains(prevTokenId) || NO_SEMICOLON_BEFORE.contains(symbolId)) ; else { int level = symbolStart - lineStart; //System.out.println("level = " + level); if(indentations.get(indentations.size()-1) >= level) { while(indentations.get(indentations.size()-1) > level) { indentationTokens.removeAt(indentations.size()-1); indentations.removeAt(indentations.size()-1); int loc = Locations.endOf(prevToken.location); push(new Token(SCLTerminals.RBRACE, loc, loc, "implicit }")); } if(indentations.get(indentations.size()-1) == level) push(new Token(SCLTerminals.SEMICOLON, symbolStart, symbolStart, "implicit ;")); } } firstTokenOfLine = false; if(isFirstToken) { isFirstToken = false; if(symbol.id == SCLTerminals.ID && symbol.text.equals("module") && options != null && options.isModule) { push(new Token(SCLTerminals.MODULE, symbol.location, symbol.text)); return; } } } switch(symbolId) { case SCLTerminals.LBRACE: case SCLTerminals.LPAREN: case SCLTerminals.LBRACKET: case SCLTerminals.IF: case SCLTerminals.WHEN: case SCLTerminals.LET: indentations.add(-1); indentationTokens.add(symbolId); push(symbol); return; case SCLTerminals.THEN: /*for(int tt : indentationTokens.toArray()) System.out.print(SCLParser.TERMINAL_NAMES[tt] + " "); System.out.println();*/ if(prevTokenId == SCLTerminals.COMMA) { // for list comprehension syntax push(symbol); break; } case SCLTerminals.RBRACE: case SCLTerminals.RPAREN: case SCLTerminals.RBRACKET: case SCLTerminals.ELSE: case SCLTerminals.IN: int removedToken = SCLTerminals.EOF; while(!indentations.isEmpty()) { removedToken = indentationTokens.removeAt(indentations.size()-1); //System.out.println(" removed " + SCLParser.TERMINAL_NAMES[removedToken]); if(indentations.removeAt(indentations.size()-1) < 0) break; long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location; push(new Token(SCLTerminals.RBRACE, loc, "implicit }")); } if(indentations.isEmpty()) throw error(symbolStart, symbolEnd, "No corresponding opening parenthesis for '" + symbol.text + "'."); if(symbolId == SCLTerminals.THEN) { if(removedToken == SCLTerminals.WHEN) curToken = symbol = new Token(SCLTerminals.THEN_AFTER_WHEN, symbol.location, symbol.text); else { indentations.add(-1); indentationTokens.add(SCLTerminals.THEN); } } push(symbol); return; case SCLTerminals.EOF: while(indentations.size() > 1 && indentations.get(indentations.size()-1) >= 0) { long loc = prevToken != null ? Locations.location(Locations.endOf(prevToken.location), Locations.endOf(prevToken.location)) : symbol.location; push(new Token(SCLTerminals.RBRACE, loc, "implicit }")); indentationTokens.removeAt(indentations.size()-1); indentations.removeAt(indentations.size()-1); } if(indentations.size() > 1) throw error(symbolStart, symbolEnd, "Unclosed parentheses."); push(symbol); return; default: push(symbol); return; } } public void setParserOptions(SCLParserOptions options) { this.options = options; lexer.options = options; } }