From: Hannu Niemistö Date: Fri, 20 Jan 2017 12:03:44 +0000 (+0200) Subject: Moved SCL parser generator to platform repository. X-Git-Tag: v1.27.0~8 X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F83%2F283%2F1;p=simantics%2Fplatform.git Moved SCL parser generator to platform repository. In addition, implemented the parser generator's parser with parser generator itself to remove Antlr-dependency. refs #6995 Change-Id: I08537c59254ddd6ae49d9c89d36e8596079f0fb2 --- diff --git a/bundles/org.simantics.scl.compiler/generateGrammarLexer.xml b/bundles/org.simantics.scl.compiler/generateGrammarLexer.xml new file mode 100644 index 000000000..ff9ecb31f --- /dev/null +++ b/bundles/org.simantics.scl.compiler/generateGrammarLexer.xml @@ -0,0 +1,13 @@ + + + + + + + + + \ No newline at end of file diff --git a/bundles/org.simantics.scl.compiler/generateParser.xml b/bundles/org.simantics.scl.compiler/generateSCLLexer.xml similarity index 100% rename from bundles/org.simantics.scl.compiler/generateParser.xml rename to bundles/org.simantics.scl.compiler/generateSCLLexer.xml diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar index 0312cf6f8..6bcffa147 100644 --- a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar @@ -323,4 +323,5 @@ symbolWithoutMinus /****************************************************************************** * Auxiliary tokens */ -dummy = COMMENT EOL ; \ No newline at end of file +dummy = COMMENT EOL # Dummy + ; \ No newline at end of file diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParser.java index 532f6e8a3..6c8ee5be9 100644 --- a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParser.java +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParser.java @@ -252,18 +252,22 @@ public abstract class SCLParser { protected abstract RuntimeException syntaxError(Token token, String description); - private static String describeAction(int action) { + private static String describeAction(boolean isGoto, int action) { if(action == ERROR_ACTION) return "ERROR"; if(action == ACCEPT_ACTION) return "ACCEPT"; StringBuilder b = new StringBuilder(); - if((action & REDUCE_MASK) != 0) { - action ^= REDUCE_MASK; - b.append("REDUCE"); + if(isGoto) + b.append("GOTO "); + else { + if((action & REDUCE_MASK) != 0) { + action ^= REDUCE_MASK; + b.append("REDUCE"); + } + else + b.append("SHIFT"); } - else - b.append("SHIFT"); if((action & POP_MASK) != 0) { action ^= POP_MASK; b.append(" POP"); @@ -276,34 +280,46 @@ public abstract class SCLParser { return b.toString(); } + private void printState(int state) { + System.out.print("state=" + state + ":"); + for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) { + Object s = symbolStack[i]; + if(s instanceof Token) + System.out.print(" " + TERMINAL_NAMES[((Token)s).id]); + else if(s == null) + System.out.print(" null"); + else + System.out.print(" " + s.getClass().getSimpleName()); + while(j>=0 && symbolStackPositionStack[j]==i) + System.out.print(" (" + stateStack[j--] + ")"); + } + System.out.println(); + } + private Object parse(int state) { while(true) { Token token = nextToken(); int tokenId = token.id; + if(TRACE) + System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---"); while(true) { + if(TRACE) + printState(state); short action = getAction(state, tokenId); - if(TRACE) { - System.out.println("state=" + state + ", tokenId=" + TERMINAL_NAMES[tokenId] + - ", action=" + describeAction(action)); - System.out.print(" "); - for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) { - Object s = symbolStack[i]; - if(s instanceof Token) - System.out.print(" " + TERMINAL_NAMES[((Token)s).id]); - else - System.out.print(" " + s.getClass().getSimpleName()); - while(j>=0 && symbolStackPositionStack[j]==i) - System.out.print(" (" + stateStack[j--] + ")"); - } - System.out.println(); - } + if(TRACE) + System.out.println(" -> action=" + describeAction(false, action)); //System.out.println(STATE_DESCRIPTIONS[state]); if((action & REDUCE_MASK) != 0) { if(action == ACCEPT_ACTION) return symbolStack[symbolStackLength-1]; if(action == ERROR_ACTION) throw syntaxError(token, parseErrorDescription(state, token, tokenId)); - stateStackLength -= (action >>> 13)&3; + int popAmount = (action >>> 13)&3; + if(TRACE) { + if(popAmount > 0) + System.out.println(" POP " + popAmount); + } + stateStackLength -= popAmount; action &= STATE_MASK; int reductionBegin = symbolStackPositionStack[--stateStackLength]; @@ -318,7 +334,16 @@ public abstract class SCLParser { symbolStack[symbolStackLength] = symbol; state = stateStack[stateStackLength]; + if(TRACE) { + ++symbolStackLength; + printState(state); + --symbolStackLength; + System.out.println(" nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]); + } action = getGoto(state, PRODUCT_LHS[action]); + if(TRACE) + System.out.println(" -> action=" + describeAction(true, action)); + // Pop state if((action & POP_MASK) != 0) { --stateStackLength; @@ -637,7 +662,7 @@ public abstract class SCLParser { case 124: return reduceApplyType(); case 125: - return reduceDummy1(); + return reduceDummy(); default: throw new RuntimeException("Internal parser error."); @@ -1136,7 +1161,7 @@ public abstract class SCLParser { /** * dummy ::= COMMENT EOL */ - protected abstract Object reduceDummy1(); + protected abstract Object reduceDummy(); protected void postReduce(Object reduced) { } diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParserImpl.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParserImpl.java index 7477c3890..5498b16b5 100644 --- a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParserImpl.java +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParserImpl.java @@ -882,11 +882,6 @@ public class SCLParserImpl extends SCLParser { return new TApplyAst((TypeAst)get(0), parameters); } - @Override - protected Object reduceDummy1() { - throw new UnsupportedOperationException(); - } - @SuppressWarnings("unchecked") @Override protected void postReduce(Object reduced) { @@ -1279,4 +1274,9 @@ public class SCLParserImpl extends SCLParser { return new CHRStatement((ListQualifier[])get(1), (ListQualifier[])get(3)); } + @Override + protected Object reduceDummy() { + throw new UnsupportedOperationException(); + } + } diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/GenerateSCLParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/GenerateSCLParser.java new file mode 100644 index 000000000..abfc21485 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/GenerateSCLParser.java @@ -0,0 +1,19 @@ +package org.simantics.scl.compiler.parser; + +import java.io.File; + +import org.simantics.scl.compiler.parser.generator.ParserGenerator; + +public class GenerateSCLParser { + + public static void main(String[] args) throws Exception { + File plugin = new File(GenerateSCLParser.class.getResource(".").getPath().replace("%20", " ")) + .getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile(); + System.out.println(plugin); + ParserGenerator.createParser( + "org.simantics.scl.compiler.internal.parsing.parser", + "SCLSyntaxErrorException", + new File(plugin, "src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar")); + } + +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/ParserGenerator.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/ParserGenerator.java new file mode 100644 index 000000000..886ce726e --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/ParserGenerator.java @@ -0,0 +1,54 @@ +package org.simantics.scl.compiler.parser.generator; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.simantics.scl.compiler.parser.generator.compression.CompressedParseTable; +import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar; +import org.simantics.scl.compiler.parser.generator.java.GenerateEnum; +import org.simantics.scl.compiler.parser.generator.java.GenerateParser; +import org.simantics.scl.compiler.parser.generator.table.ParseTable; +import org.simantics.scl.compiler.parser.generator.table.ParseTableBuilder; +import org.simantics.scl.compiler.parser.grammar.Grammar; +import org.simantics.scl.compiler.parser.grammar.input.GrammarParserImpl; + +public class ParserGenerator { + + public static void createParser(String packageName, String exceptionName, File grammarFile) throws IOException { + // Read grammar and check it + FileInputStream inputStream = new FileInputStream(grammarFile); + Grammar grammar = GrammarParserImpl.read(inputStream); + inputStream.close(); + grammar.check(); + + AnaGrammar anaGrammar = new AnaGrammar(grammar); + ParseTable table = ParseTableBuilder.build(anaGrammar); + + File directory = grammarFile.getParentFile(); + String name = grammarFile.getName(); + { + int p = name.lastIndexOf('.'); + if(p > 0) + name = name.substring(0, p); + } + + // Write parse table + CompressedParseTable compressedTable = table.compress(); + compressedTable.writeTo(new File(directory, name + "Parser.dat")); + + // Write classes + { + String className = name + "Terminals"; + GenerateEnum.generate(new File(directory, className + ".java"), + packageName, className, + anaGrammar.terminalNames); + } + { + String className = name + "Parser"; + new GenerateParser(packageName, className, anaGrammar, exceptionName, compressedTable) + .generate(new File(directory, className + ".java")); + } + } + +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressTable.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressTable.java new file mode 100644 index 000000000..d2b797114 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressTable.java @@ -0,0 +1,101 @@ +package org.simantics.scl.compiler.parser.generator.compression; + +import java.util.Arrays; + +public class CompressTable { + + private static class Row implements Comparable { + int id; + int density; + int minPos; + int maxPos; + int[] data; + int displacement; + + @Override + public int compareTo(Row o) { + return o.density < density ? 1 : o.density > density ? -1 : 0; + } + } + + public static CompressedTable compress(int[][] table_) { + // Sort table by size + Row[] table = new Row[table_.length]; + for(int i=0;i= 0) + continue tryAgain; + } + } + row.displacement = d; + for(int p = row.minPos;p<=row.maxPos;++p) { + int val = row.data[p]; + if(val != 0) { + int id = d+p; + rowIds[id] = row.id; + values[id] = val; + } + } + break; + } + } + + // Produce final tables + int minDis=capacity; + int maxDis=0; + for(Row row : table) { + int d = row.displacement; + if(d < minDis) + minDis = d; + if(d > maxDis) + maxDis = d; + } + int[] displacement = new int[table.length]; + for(int i=0;i 0) { + // Choose a vertex is bestId from unassigned list so that + // bestBlockedCount=blockedCount[bestId] is maximized + int bestId = 0; + int bestUnassignedTableId = 0; + int bestBlockedCount = -1; + for(int i=0;i bestBlockedCount) { + bestUnassignedTableId = i; + bestId = id; + bestBlockedCount = tempCount; + } + } + + // Remove from unassigned table + unassigned[bestUnassignedTableId] = unassigned[--unassignedCount]; + + // Choose color + int chosenColor = 0; + if(bestBlockedCount == colorCount) // All colors are blocked + chosenColor = colorCount++; + else { // There is unblocked color + for(int i=0;i prods = new ArrayList(); + public int[] nonterminalPos; + + public final String[] terminalNames; + public final String[] nonterminalNames; + + public final int[] initialNonterminals; + + public boolean[] nullable; + public int[][] first; + + + public AnaGrammar(Grammar grammar) { + initialNonterminals = grammar.initialNonterminals; + terminalNames = Arrays.copyOf(grammar.terminalNames, grammar.terminalNames.length+1); + terminalNames[terminalNames.length-1] = "EOF"; + nonterminalNames = Arrays.copyOf(grammar.nonterminalNames, + grammar.nonterminalNames.length+initialNonterminals.length); + for(int i=1;i<=initialNonterminals.length;++i) + nonterminalNames[nonterminalNames.length-i] = "init$" + i; + + // Convert grammar + ArrayList[] prodMap = new ArrayList[nonterminalNames.length]; + for(int i=0;i(); + for(Production production : grammar.productions) { + Prod prod = new Prod(production.name, ~production.lhs, + production.rhs.toAutomaton().determinize().minimize(), + production.annotations); + prodMap[prod.lhs].add(prod); + } + + // Initial production + for(int i=1;i<=initialNonterminals.length;++i) { + DFA dfa = new DFA(); + int s0 = dfa.newState(); + dfa.setInitialState(s0); + int s1 = dfa.newState(); + dfa.addTransition(s0, initialNonterminals[i-1], s1); + int s2 = dfa.newState(); + dfa.addTransition(s1, terminalNames.length-1, s2); + dfa.setAccepts(s2, true); + Prod prod = new Prod("Init", nonterminalNames.length-i, dfa, new TIntByteHashMap()); + prodMap[prod.lhs].add(prod); + } + + TIntArrayList pos = new TIntArrayList(); + for(int i=0;i[] pendingItems = new ArrayList[nonterminalCount]; + final ArrayList stack = new ArrayList(); + THashSet handledItems = new THashSet(); + for(int i=0;i(); + + for(int i=0;i "); + if(nullable[i]) + System.out.print(" NULL"); + for(int s : this.first[i]) + System.out.print(" " + terminalNames[s]); + System.out.println(); + }*/ + } + + public static void gclose(TIntHashSet[] sets, TIntHashSet[] graph_) { + int[][] graph = new int[graph_.length][]; + for(int i=0;i= 0) + return terminalNames[symbolId]; + else + return nonterminalNames[~symbolId]; + } + + private class AlmostAcceptsProc implements TIntIntProcedure { + DFA rhs; + TIntHashSet visited = new TIntHashSet(); + + boolean result; + + public AlmostAcceptsProc(DFA rhs) { + this.rhs = rhs; + } + + @Override + public boolean execute(int a, int b) { + if(a < 0 && nullable[~a]) + visit(b); + return !result; + } + + public void visit(int position) { + if(visited.add(position)) { + if(rhs.getAccepts(position)) { + result = true; + return; + } + rhs.forEachTransition(position, this); + } + } + } + + public boolean almostAccepts(DFA rhs, int position) { + AlmostAcceptsProc proc = new AlmostAcceptsProc(rhs); + proc.visit(position); + return proc.result; + } +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/Prod.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/Prod.java new file mode 100644 index 000000000..6a6afcadd --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/Prod.java @@ -0,0 +1,27 @@ +package org.simantics.scl.compiler.parser.generator.grammar; + +import org.simantics.scl.compiler.parser.regexp.automata.DFA; + +import gnu.trove.map.hash.TIntByteHashMap; + +public class Prod { + public final String name; + public final int lhs; + public final DFA rhs; + public final TIntByteHashMap annotations; + + public Prod(String name, int lhs, DFA rhs, TIntByteHashMap annotations) { + this.name = name; + this.lhs = lhs; + this.rhs = rhs; + this.annotations = annotations; + } + + public String toString(AnaGrammar grammar) { + StringBuilder b = new StringBuilder(); + b.append(grammar.nonterminalNames[lhs]); + b.append(" ::= "); + rhs.toRegexp().toString(b, grammar, 0); + return b.toString(); + } +} \ No newline at end of file diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateEnum.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateEnum.java new file mode 100644 index 000000000..efda07e83 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateEnum.java @@ -0,0 +1,20 @@ +package org.simantics.scl.compiler.parser.generator.java; + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; + +public class GenerateEnum { + + public static void generate(File file, String packageName, String className, String[] names) throws IOException { + PrintStream out = new PrintStream(file); + out.println("package " + packageName + ";"); + out.println(); + out.println("public interface " + className + " {"); + for(int i=0;i 0) + out.println(","); + out.print(" \"" + grammar.terminalNames[i] + "\""); + } + } + else if("nonterminalNames".equals(varName)) { + for(int i=0;i 0) + out.println(","); + out.print(" \"" + grammar.nonterminalNames[i] + "\""); + } + } + else if("stateDescriptions".equals(varName)) { + for(int i=0;i 0) + out.println(","); + out.print(" \"" + table.stateDescriptions[i].replace("\n", "\\n") + "\""); + } + } + else if("reduceCases".equals(varName)) { + for(int i=0;i usedNames = new THashSet(); + for(int i=0;i>5] >> (id&31))&1) != 0 ) + return ERROR_ACTION; + return ACTION_TABLE[ACTION_ROW_ID[state] + ACTION_COLUMN_ID[symbol]]; + } + + private static short getGoto(int state, int symbol) { + return GOTO_TABLE[GOTO_ROW_ID[state] + GOTO_COLUMN_ID[symbol]]; + } + + protected abstract $Token$ nextToken(); + + private $Symbol$[] symbolStack = new $Symbol$[INITIAL_CAPACITY]; + private int symbolStackLength = 0; + + private int[] stateStack = new int[INITIAL_CAPACITY]; + private int[] symbolStackPositionStack = new int[INITIAL_CAPACITY]; + private int stateStackLength = 0; + + // For reduce + private int reductionLength; + + protected int length() { + return reductionLength; + } + + protected $Symbol$ get(int i) { + if(i < 0 || i >= reductionLength) + throw new IndexOutOfBoundsException(); + return symbolStack[symbolStackLength+i]; + } + + private String parseErrorDescription(int state, $Token$ token, int tokenId) { + StringBuilder b = new StringBuilder(); + b.append("Unexpected token '").append(token) + .append("' (").append(TERMINAL_NAMES[tokenId]) + .append("). Expected one of "); + ArrayList possibleTerminals = new ArrayList(); + for(int i=0;i 0) + b.append(", "); + b.append(possibleTerminals.get(i)); + } + b.append('.'); + return b.toString(); + } + + protected abstract RuntimeException syntaxError($Token$ token, String description); + + private static String describeAction(boolean isGoto, int action) { + if(action == ERROR_ACTION) + return "ERROR"; + if(action == ACCEPT_ACTION) + return "ACCEPT"; + StringBuilder b = new StringBuilder(); + if(isGoto) + b.append("GOTO "); + else { + if((action & REDUCE_MASK) != 0) { + action ^= REDUCE_MASK; + b.append("REDUCE"); + } + else + b.append("SHIFT"); + } + if((action & POP_MASK) != 0) { + action ^= POP_MASK; + b.append(" POP"); + } + if((action & PUSH_MASK) != 0) { + action ^= PUSH_MASK; + b.append(" PUSH"); + } + b.append(' ').append(action); + return b.toString(); + } + + private void printState(int state) { + System.out.print("state=" + state + ":"); + for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) { + Object s = symbolStack[i]; + if(s instanceof Token) + System.out.print(" " + TERMINAL_NAMES[((Token)s).id]); + else if(s == null) + System.out.print(" null"); + else + System.out.print(" " + s.getClass().getSimpleName()); + while(j>=0 && symbolStackPositionStack[j]==i) + System.out.print(" (" + stateStack[j--] + ")"); + } + System.out.println(); + } + + private $Symbol$ parse(int state) { + while(true) { + $Token$ token = nextToken(); + int tokenId = token.$tokenId$; + if(TRACE) + System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---"); + while(true) { + if(TRACE) + printState(state); + short action = getAction(state, tokenId); + if(TRACE) + System.out.println(" -> action=" + describeAction(false, action)); + //System.out.println(STATE_DESCRIPTIONS[state]); + if((action & REDUCE_MASK) != 0) { + if(action == ACCEPT_ACTION) + return symbolStack[symbolStackLength-1]; + if(action == ERROR_ACTION) + throw syntaxError(token, parseErrorDescription(state, token, tokenId)); + int popAmount = (action >>> 13)&3; + if(TRACE) { + if(popAmount > 0) + System.out.println(" POP " + popAmount); + } + stateStackLength -= popAmount; + action &= STATE_MASK; + + int reductionBegin = symbolStackPositionStack[--stateStackLength]; + + reductionLength = symbolStackLength-reductionBegin; + symbolStackLength = reductionBegin; + + if(symbolStackLength == symbolStack.length) + symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2); + $Symbol$ symbol = reduce(action); + postReduce(symbol); + symbolStack[symbolStackLength] = symbol; + + state = stateStack[stateStackLength]; + if(TRACE) { + ++symbolStackLength; + printState(state); + --symbolStackLength; + System.out.println(" nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]); + } + action = getGoto(state, PRODUCT_LHS[action]); + if(TRACE) + System.out.println(" -> action=" + describeAction(true, action)); + + // Pop state + if((action & POP_MASK) != 0) { + --stateStackLength; + } + // Push state + if((action & PUSH_MASK) != 0) { + if(stateStackLength == stateStack.length) { + stateStack = Arrays.copyOf(stateStack, stateStackLength*2); + symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2); + } + symbolStackPositionStack[stateStackLength] = symbolStackLength; + stateStack[stateStackLength++] = state; + } + state = action & STATE_MASK; + ++symbolStackLength; + } + else { + // Pop state + if((action & POP_MASK) != 0) { + --stateStackLength; + } + // Push state + if((action & PUSH_MASK) != 0) { + if(stateStackLength == stateStack.length) { + stateStack = Arrays.copyOf(stateStack, stateStackLength*2); + symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2); + } + symbolStackPositionStack[stateStackLength] = symbolStackLength; + stateStack[stateStackLength++] = state; + } + + // New state + state = action & STATE_MASK; + + // Push symbol + if(symbolStackLength == symbolStack.length) + symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2); + symbolStack[symbolStackLength++] = token; + break; + } + } + } + } + +$parseMethods$ + + protected $Symbol$ reduce(int productionId) { + try { + switch(productionId) { +$reduceCases$ + default: + throw new RuntimeException("Internal parser error."); + } + } catch($Exception$ e) { + StringBuilder b = new StringBuilder(); + b.append("Failed to reduce"); + for(int i=0;i { + public final int production; + public final int position; + public int stackPos; + + public Item(int production, int position, int stackPos) { + this.production = production; + this.position = position; + this.stackPos = stackPos; + } + + public int[] nextSymbols(AnaGrammar grammar) { + Prod prod = grammar.prods.get(production); + return prod.rhs.nextStates(position); + } + + public int nextPosition(AnaGrammar grammar, int symbol) { + Prod prod = grammar.prods.get(production); + return prod.rhs.getTransition(position, symbol); + } + + @Override + public int hashCode() { + return (production * 31 + position) * 31 + stackPos; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null || getClass() != obj.getClass()) + return false; + Item other = (Item) obj; + return production == other.production + && position == other.position + && stackPos == other.stackPos; + } + + @Override + public int compareTo(Item o) { + if(production < o.production) + return -1; + if(production > o.production) + return 1; + if(position < o.position) + return -1; + if(position > o.position) + return 1; + return 0; + } + + public String toString(AnaGrammar grammar) { + Prod prod = grammar.prods.get(production); + StringBuilder b = new StringBuilder(); + b.append(grammar.nonterminalNames[prod.lhs]); + b.append(" ::= "); + /*prod.rhs.toRegexpTo(position).toString(b, grammar, 0); + b.append(" . "); + prod.rhs.toRegexpFrom(position).toString(b, grammar, 0);*/ + prod.rhs.toPositionalRegexp(position).toString(b, grammar, 0); + if(stackPos >= 0) + b.append(" (stack ").append(stackPos).append(')'); + /*for(int i=0;i items) { + this(items.toArray(new Item[items.size()])); + } + + private static final int PROD = 31*31; + + @Override + public int hashCode() { + if(hash == 0) { + int h = 1; + for(Item item : items) + h = PROD*h + item.hashCode(); + hash = h; + } + return hash; + } + + @Override + public boolean equals(Object obj) { + if(this == obj) + return true; + if(obj == null || obj.getClass() != getClass()) + return false; + ItemSet other = (ItemSet)obj; + if(other.items.length != items.length) + return false; + for(int i=0;i 0) + b.append(", "); + b.append(actions[i]); + } + b.append("},\n"); + } + b.append("gotos:\n"); + for(int[] gotos : gotoTable) { + b.append(" {"); + for(int i=0;i 0) + b.append(", "); + b.append(gotos[i]); + } + b.append("},\n"); + } + return b.toString(); + } + + public CompressedParseTable compress() { + return new CompressedParseTable( + GCCompress.compress(actionTable), + ErrorTable.createErrorTable(actionTable), + GCCompress.compress(gotoTable), + productionInfo, + initialStates, + stateDescriptions); + } + + public void writeTo(File file) throws IOException { + FileOutputStream stream = new FileOutputStream(file); + DataOutputStream output = new DataOutputStream(stream); + for(int[] row : actionTable) + for(int val : row) + output.writeInt(val); + for(int[] row : gotoTable) + for(int val : row) + output.writeInt(val); + for(int val : productionInfo) + output.writeInt(val); + output.close(); + } +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java new file mode 100644 index 000000000..da837fb60 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java @@ -0,0 +1,515 @@ +package org.simantics.scl.compiler.parser.generator.table; + +import java.util.ArrayList; +import java.util.Arrays; + +import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar; +import org.simantics.scl.compiler.parser.generator.grammar.Prod; + +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.list.array.TLongArrayList; +import gnu.trove.map.hash.TIntIntHashMap; +import gnu.trove.map.hash.TIntObjectHashMap; +import gnu.trove.map.hash.TLongIntHashMap; +import gnu.trove.map.hash.TObjectIntHashMap; +import gnu.trove.procedure.TIntIntProcedure; +import gnu.trove.procedure.TIntObjectProcedure; +import gnu.trove.procedure.TObjectIntProcedure; +import gnu.trove.set.hash.THashSet; +import gnu.trove.set.hash.TIntHashSet; +import gnu.trove.set.hash.TLongHashSet; + +public class ParseTableBuilder { + public final static int MAX_STACK_ID = 10; + + private static final int STATE_MASK = 0x0fff; + private static final int REDUCE_MASK = 0x8000; + private static final int POP_MASK = 0x4000; + private static final int PUSH_MASK = 0x2000; + public static final int ERROR_ACTION = 0xffff; + private static final int ACCEPT_ACTION = 0xfffe; + + final AnaGrammar grammar; + private TObjectIntHashMap states = new TObjectIntHashMap(); + private ArrayList itemSets = new ArrayList(); + private ArrayList transitions = new ArrayList(); + private ArrayList stackOps = new ArrayList(); + private TIntArrayList backTransSymbols = new TIntArrayList(); + private ArrayList backLinks = new ArrayList(); + int[] initialStates; + TIntHashSet finalStates = new TIntHashSet(); + + private ParseTableBuilder(AnaGrammar grammar) { + this.grammar = grammar; + } + + private static boolean isNonterminal(int symbol) { + return symbol < 0; + } + + private void close(ArrayList items) { + THashSet itemSet = new THashSet(items); + for(int i=0;i items) { + // Create state + close(items); + final ItemSet itemSet = new ItemSet(items); + if(states.contains(itemSet)) + return states.get(itemSet); + final int newState = states.size(); + states.put(itemSet, newState); + itemSets.add(itemSet); + backTransSymbols.add(backTransSymbol); + backLinks.add(new TIntArrayList(2)); + + // Create transitions + TIntObjectHashMap> transitionMap = new TIntObjectHashMap>(); + //close(items); + for(Item item : items) { + for(int s : item.nextSymbols(grammar)) { + ArrayList l = transitionMap.get(s); + if(l == null) { + l = new ArrayList(); + transitionMap.put(s, l); + } + l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos)); + } + } + + final TIntIntHashMap trans = new TIntIntHashMap(); + final TIntIntHashMap stackOpMap = new TIntIntHashMap(); + transitions.add(trans); + stackOps.add(stackOpMap); + if(transitionMap.remove(grammar.terminalNames.length-1)!=null) { + finalStates.add(newState); + } + transitionMap.forEachEntry(new TIntObjectProcedure>() { + @Override + public boolean execute(int a, ArrayList b) { + boolean stackShift = false; + int minStackPos = Integer.MAX_VALUE; + for(Item item : b) { + if(item.stackPos == -1) + stackShift = true; + else + minStackPos = Math.min(minStackPos, item.stackPos); + } + int stackOp = 0; + if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) { + stackOp |= POP_MASK; + //System.out.println("minStackPos = " + minStackPos); + for(Item item : b) + if(item.stackPos >= 0) + --item.stackPos; + } + boolean stackOverflow = false; + if(stackShift) { + stackOp |= PUSH_MASK; + for(Item item : b) { + ++item.stackPos; + if(item.stackPos > MAX_STACK_ID) + stackOverflow = true; + } + } + stackOpMap.put(a, stackOp); + System.out.println(newState + " " + grammar.getName(a) + " " + stackOp); + + if(stackOverflow) { + System.err.println("Stack overflow when following " + grammar.getName(a) + " at"); + System.err.println(itemSet.toString(grammar)); + } + else { + int state = getState(a, b); + trans.put(a, state); + backLinks.get(state).add(newState); + } + return true; + } + + }); + return newState; + } + + TLongArrayList sMap = new TLongArrayList(); + TLongIntHashMap sMapInv = new TLongIntHashMap(); + TIntHashSet[] follow; + + private static int getState(long s) { + return (int)(s >> 32); + } + + private static int getSymbol(long s) { + return (int)s; + } + + private static long getS(int state, int symbol) { + return (((long)state) << 32) | (long)symbol; + } + + private void computeFollow() { + for(int i=0;i= 0) + drSet.add(symbol2); + else if(grammar.nullable[~symbol2]) + gread[sMapInv.get(getS(target, ~symbol2))].add(id); + return true; + } + }); + if(finalStates.contains(target)) + drSet.add(grammar.terminalNames.length-1); + follow[i] = drSet; + + ItemSet set = itemSets.get(target); + for(Item targetItem : set.items) { + Prod prod = grammar.prods.get(targetItem.production); + if(grammar.almostAccepts(prod.rhs, targetItem.position)) { + for(Item sourceItem : itemSets.get(source).items) { + if(sourceItem.production == targetItem.production && + prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) { + TLongHashSet visited = new TLongHashSet(); + traceBack(gla, id, visited, source, sourceItem); + } + } + + } + } + } + //System.out.println("follow: " + Arrays.toString(follow)); + //System.out.println("gread: " + Arrays.toString(gread)); + //System.out.println("gla: " + Arrays.toString(gla)); + AnaGrammar.gclose(follow, gread); + AnaGrammar.gclose(follow, gla); + + /*System.out.println("Gla:"); + for(int i=0;i= 0) { + int id = sMapInv.get(getS(prevState, prod.lhs)); + la.addAll(follow[id]); + } + } + } + } + } + + private void createReduceActions() { + computeFollow(); + for(int i=0;i laMap = new TIntObjectHashMap(); + TIntIntHashMap stackPosMap = new TIntIntHashMap(); + for(Item item : itemSets.get(i).items) { + Prod prod = grammar.prods.get(item.production); + if(prod.rhs.getAccepts(item.position)) { + TIntHashSet la = laMap.get(item.production); + if(la == null) { + la = new TIntHashSet(); + laMap.put(item.production, la); + } + + TLongHashSet visited = new TLongHashSet(); + lookback(visited, la, item.production, i, item.position); + + if(stackPosMap.containsKey(item.production)) { + stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice + } + else + stackPosMap.put(item.production, item.stackPos); + } + } + + // Create transitions + for(int production : laMap.keys()) { + int stackPos = 0; //stackPosMap.get(production); + TIntHashSet la = laMap.get(production); + for(int symbol : la.toArray()) { + if(trans.contains(symbol)) { + int oldAction = trans.get(symbol); + if(oldAction >= 0) { + Prod prod = grammar.prods.get(production); + if(prod.annotations.containsKey(symbol)) { + byte v = prod.annotations.get(symbol); + if(v == 1) + trans.put(symbol, REDUCE_MASK | production | (stackPos << 13)); + } + else { + System.err.println("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context"); + System.err.println(itemSets.get(i).toString(grammar)); + } + } + else { + System.err.println("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context"); + System.err.println(itemSets.get(i).toString(grammar)); + } + } + else + trans.put(symbol, REDUCE_MASK | production | (stackPos << 13)); + } + } + + // Check stacking conflicts + /*trans.forEachEntry(new TIntIntProcedure() { + @Override + public boolean execute(int a, int b) { + if(b >= 0) { + boolean kernelState = false; + boolean nonkernelState = false; + for(Item item : itemSets.get(b).items) { + Prod prod = grammar.prods.get(item.production); + if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a)) + nonkernelState = true; + else if(item.position != prod.rhs.getInitialState()) + kernelState = true; + } + + + if(kernelState && nonkernelState) { + System.err.println("Stacking conflict when following " + grammar.getName(a) + " to"); + System.err.println(itemSets.get(b).toString(grammar)); + } + } + return true; + } + });*/ + } + } + + public static ParseTable build(AnaGrammar grammar) { + ParseTableBuilder builder = new ParseTableBuilder(grammar); + + builder.initialStates = new int[grammar.initialNonterminals.length]; + for(int i=0;i seed = new ArrayList(); + int prodId = grammar.prods.size()-i-1; + seed.add(new Item(prodId, + grammar.prods.get(prodId).rhs.getInitialState(), 0)); + builder.initialStates[i] = builder.getState(REDUCE_MASK, seed); + } + + builder.createReduceActions(); + + System.out.println("States: " + builder.itemSets.size()); + + //builder.visualize(); + + builder.printParseTable(); + return builder.getParseTable(); + } + + private ParseTable getParseTable() { + int[] productionInfo = new int[grammar.prods.size()]; + for(int i=0;i= 0) + actions[a] = action; + else + gotos[~a] = action; + return true; + } + }); + } + + String[] stateDescriptions = new String[itemSets.size()]; + for(int i=0;i= 0) { + b.append("\n ").append(grammar.terminalNames[symbol]).append(" ->"); + if((action & REDUCE_MASK) == 0) { + if((action & POP_MASK) != 0) + b.append(" POP"); + if((action & PUSH_MASK) != 0) + b.append(" PUSH"); + b.append(" SHIFT(").append(action&STATE_MASK).append(")"); + } + else { + if(action == 0xfffffffe) + b.append(" ACCEPT"); + else + b.append(" REDUCE(").append(action&STATE_MASK).append(")"); + } + } + else { + b.append("\n ").append(grammar.nonterminalNames[~symbol]).append(" ->") + .append(" GOTO(").append(action).append(")"); + } + return true; + } + }); + stateDescriptions[i] = b.toString(); + } + + //printParseTable(); + return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo, + initialStates, stateDescriptions); + } + + private void printParseTable() { + final ItemSet[] stateSets = new ItemSet[states.size()]; + states.forEachEntry(new TObjectIntProcedure() { + @Override + public boolean execute(ItemSet a, int b) { + stateSets[b] = a; + return true; + } + }); + for(int i=0;i "); + if(sOp != 0) { + System.out.print("["); + if((sOp & PUSH_MASK) != 0) { + sOp ^= PUSH_MASK; + System.out.print("PUSH "); + } + if((sOp & POP_MASK) != 0) { + sOp ^= POP_MASK; + System.out.print("POP "); + } + if(sOp != 0) + System.out.print(sOp); + System.out.print("] "); + } + if((b & REDUCE_MASK) != 0) { + b ^= REDUCE_MASK; + System.out.println("reduce " + b); // grammar.prods.get(~b).toString(grammar)); + } + else { + System.out.println("shift " + b); + } + return true; + } + }); + } + } +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Grammar.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Grammar.java new file mode 100644 index 000000000..e80858c55 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Grammar.java @@ -0,0 +1,46 @@ +package org.simantics.scl.compiler.parser.grammar; + +import org.simantics.scl.compiler.parser.regexp.Namer; + +public class Grammar implements Namer { + public final Production[] productions; + public final String[] terminalNames; + public final String[] nonterminalNames; + public final int[] initialNonterminals; + + public Grammar(Production[] productions, String[] terminalNames, + String[] nonterminalNames, int[] initialNonterminals) { + this.productions = productions; + this.terminalNames = terminalNames; + this.nonterminalNames = nonterminalNames; + this.initialNonterminals = initialNonterminals; + } + + public String getName(int symbolId) { + if(symbolId >= 0) + return terminalNames[symbolId]; + else + return nonterminalNames[~symbolId]; + } + + @Override + public String toString() { + StringBuilder b = new StringBuilder(); + for(Production prod : productions) { + b.append(prod.toString(this)); + b.append("\n"); + } + /*for(int i=0;i { + {c_comment} { } + {cpp_comment} { } + "=" { return sym(GrammarTerminals.EQUALS); } + "|" { return sym(GrammarTerminals.BAR); } + ";" { return sym(GrammarTerminals.SEMICOLON); } + "#" { return sym(GrammarTerminals.HASH); } + "," { return sym(GrammarTerminals.COMMA); } + "(" { return sym(GrammarTerminals.LPAREN); } + ")" { return sym(GrammarTerminals.RPAREN); } + "*" { return sym(GrammarTerminals.STAR); } + "+" { return sym(GrammarTerminals.PLUS); } + "?" { return sym(GrammarTerminals.OPTIONAL); } + initial { return sym(GrammarTerminals.INITIAL); } + shift { return sym(GrammarTerminals.SHIFT); } + reduce { return sym(GrammarTerminals.REDUCE); } + {terminal} { return sym(GrammarTerminals.TERMINAL); } + {nonterminal} { return sym(GrammarTerminals.NONTERMINAL); } + {whitespace} { } + . { throw new RuntimeException("Illegal character '" + yytext() + "'."); } +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.java new file mode 100644 index 000000000..ce4092a55 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.java @@ -0,0 +1,653 @@ +/* The following code was generated by JFlex 1.6.1 */ + +package org.simantics.scl.compiler.parser.grammar.input; + +import org.simantics.scl.compiler.parser.grammar.input.Token; + + +/** + * This class is a scanner generated by + * JFlex 1.6.1 + * from the specification file C:/Users/hannu/git/scl/org.simantics.parser/src/org/simantics/parser/grammar/input2/GrammarLexer.flex + */ +public class GrammarLexer { + + /** This character denotes the end of file */ + public static final int YYEOF = -1; + + /** initial size of the lookahead buffer */ + private static final int ZZ_BUFFERSIZE = 16384; + + /** lexical states */ + public static final int YYINITIAL = 0; + + /** + * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l + * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l + * at the beginning of a line + * l is of the form l = 2*k, k a non negative integer + */ + private static final int ZZ_LEXSTATE[] = { + 0, 0 + }; + + /** + * Translates characters to character classes + */ + private static final String ZZ_CMAP_PACKED = + "\11\0\1\4\1\6\1\36\1\36\1\6\22\0\1\4\2\0\1\13"+ + "\4\0\1\15\1\16\1\7\1\17\1\14\2\0\1\5\12\1\1\0"+ + "\1\12\1\0\1\10\1\0\1\20\1\0\32\3\4\0\1\1\1\0"+ + "\1\24\1\2\1\35\1\33\1\32\1\30\1\2\1\27\1\21\2\2"+ + "\1\25\1\2\1\22\3\2\1\31\1\26\1\23\1\34\5\2\1\0"+ + "\1\11\10\0\1\36\u1fa2\0\1\36\1\36\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\udfe6\0"; + + /** + * Translates characters to character classes + */ + private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); + + /** + * Translates DFA states to action switch labels. + */ + private static final int [] ZZ_ACTION = zzUnpackAction(); + + private static final String ZZ_ACTION_PACKED_0 = + "\1\0\1\1\1\2\1\3\1\4\1\1\1\5\1\6"+ + "\1\7\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+ + "\3\2\1\4\1\0\3\2\1\0\3\2\1\4\4\2"+ + "\1\17\2\2\1\20\1\21"; + + private static int [] zzUnpackAction() { + int [] result = new int[38]; + int offset = 0; + offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackAction(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int count = packed.charAt(i++); + int value = packed.charAt(i++); + do result[j++] = value; while (--count > 0); + } + return j; + } + + + /** + * Translates a state to a row index in the transition table + */ + private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); + + private static final String ZZ_ROWMAP_PACKED_0 = + "\0\0\0\37\0\76\0\135\0\174\0\233\0\37\0\37"+ + "\0\37\0\37\0\37\0\37\0\37\0\37\0\37\0\37"+ + "\0\272\0\331\0\370\0\u0117\0\u0136\0\u0155\0\u0174\0\u0193"+ + "\0\u01b2\0\u01d1\0\u01f0\0\u020f\0\37\0\u022e\0\u024d\0\u026c"+ + "\0\u028b\0\76\0\u02aa\0\u02c9\0\76\0\76"; + + private static int [] zzUnpackRowMap() { + int [] result = new int[38]; + int offset = 0; + offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackRowMap(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int high = packed.charAt(i++) << 16; + result[j++] = high | packed.charAt(i++); + } + return j; + } + + /** + * The transition table of the DFA + */ + private static final int [] ZZ_TRANS = zzUnpackTrans(); + + private static final String ZZ_TRANS_PACKED_0 = + "\2\2\1\3\1\4\1\5\1\6\1\5\1\7\1\10"+ + "\1\11\1\12\1\13\1\14\1\15\1\16\1\17\1\20"+ + "\1\21\4\3\1\22\2\3\1\23\4\3\41\0\3\3"+ + "\15\0\15\3\2\0\3\4\15\0\15\4\5\0\1\5"+ + "\1\0\1\5\35\0\1\24\1\0\1\25\30\0\3\3"+ + "\15\0\1\3\1\26\13\3\2\0\3\3\15\0\6\3"+ + "\1\27\6\3\2\0\3\3\15\0\11\3\1\30\3\3"+ + "\1\0\6\24\1\0\30\24\7\25\1\31\27\25\1\0"+ + "\3\3\15\0\1\32\14\3\2\0\3\3\15\0\1\33"+ + "\14\3\2\0\3\3\15\0\12\3\1\34\2\3\1\0"+ + "\5\25\1\35\1\25\1\31\27\25\1\0\3\3\15\0"+ + "\2\3\1\36\12\3\2\0\3\3\15\0\7\3\1\37"+ + "\5\3\2\0\3\3\15\0\13\3\1\40\1\3\2\0"+ + "\3\3\15\0\1\41\14\3\2\0\3\3\15\0\2\3"+ + "\1\42\12\3\2\0\3\3\15\0\14\3\1\43\2\0"+ + "\3\3\15\0\3\3\1\44\11\3\2\0\3\3\15\0"+ + "\11\3\1\45\3\3\2\0\3\3\15\0\4\3\1\46"+ + "\10\3\1\0"; + + private static int [] zzUnpackTrans() { + int [] result = new int[744]; + int offset = 0; + offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackTrans(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int count = packed.charAt(i++); + int value = packed.charAt(i++); + value--; + do result[j++] = value; while (--count > 0); + } + return j; + } + + + /* error codes */ + private static final int ZZ_UNKNOWN_ERROR = 0; + private static final int ZZ_NO_MATCH = 1; + private static final int ZZ_PUSHBACK_2BIG = 2; + + /* error messages for the codes above */ + private static final String ZZ_ERROR_MSG[] = { + "Unknown internal scanner error", + "Error: could not match input", + "Error: pushback value was too large" + }; + + /** + * ZZ_ATTRIBUTE[aState] contains the attributes of state aState + */ + private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); + + private static final String ZZ_ATTRIBUTE_PACKED_0 = + "\1\0\1\11\4\1\12\11\4\1\1\0\3\1\1\0"+ + "\3\1\1\11\11\1"; + + private static int [] zzUnpackAttribute() { + int [] result = new int[38]; + int offset = 0; + offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackAttribute(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int count = packed.charAt(i++); + int value = packed.charAt(i++); + do result[j++] = value; while (--count > 0); + } + return j; + } + + /** the input device */ + private java.io.Reader zzReader; + + /** the current state of the DFA */ + private int zzState; + + /** the current lexical state */ + private int zzLexicalState = YYINITIAL; + + /** this buffer contains the current text to be matched and is + the source of the yytext() string */ + private char zzBuffer[] = new char[ZZ_BUFFERSIZE]; + + /** the textposition at the last accepting state */ + private int zzMarkedPos; + + /** the current text position in the buffer */ + private int zzCurrentPos; + + /** startRead marks the beginning of the yytext() string in the buffer */ + private int zzStartRead; + + /** endRead marks the last character in the buffer, that has been read + from input */ + private int zzEndRead; + + /** number of newlines encountered up to the start of the matched text */ + private int yyline; + + /** the number of characters up to the start of the matched text */ + private int yychar; + + /** + * the number of characters from the last newline up to the start of the + * matched text + */ + private int yycolumn; + + /** + * zzAtBOL == true <=> the scanner is currently at the beginning of a line + */ + private boolean zzAtBOL = true; + + /** zzAtEOF == true <=> the scanner is at the EOF */ + private boolean zzAtEOF; + + /** denotes if the user-EOF-code has already been executed */ + private boolean zzEOFDone; + + /** + * The number of occupied positions in zzBuffer beyond zzEndRead. + * When a lead/high surrogate has been read from the input stream + * into the final zzBuffer position, this will have a value of 1; + * otherwise, it will have a value of 0. + */ + private int zzFinalHighSurrogate = 0; + + /* user code: */ + private Token sym(int id) { + return new Token(id, yychar, yychar+yylength(), yytext()); + } + + + /** + * Creates a new scanner + * + * @param in the java.io.Reader to read input from. + */ + public GrammarLexer(java.io.Reader in) { + this.zzReader = in; + } + + + /** + * Unpacks the compressed character translation table. + * + * @param packed the packed character translation table + * @return the unpacked character translation table + */ + private static char [] zzUnpackCMap(String packed) { + char [] map = new char[0x110000]; + int i = 0; /* index in packed string */ + int j = 0; /* index in unpacked array */ + while (i < 146) { + int count = packed.charAt(i++); + char value = packed.charAt(i++); + do map[j++] = value; while (--count > 0); + } + return map; + } + + + /** + * Refills the input buffer. + * + * @return false, iff there was new input. + * + * @exception java.io.IOException if any I/O-Error occurs + */ + private boolean zzRefill() throws java.io.IOException { + + /* first: make room (if you can) */ + if (zzStartRead > 0) { + zzEndRead += zzFinalHighSurrogate; + zzFinalHighSurrogate = 0; + System.arraycopy(zzBuffer, zzStartRead, + zzBuffer, 0, + zzEndRead-zzStartRead); + + /* translate stored positions */ + zzEndRead-= zzStartRead; + zzCurrentPos-= zzStartRead; + zzMarkedPos-= zzStartRead; + zzStartRead = 0; + } + + /* is the buffer big enough? */ + if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) { + /* if not: blow it up */ + char newBuffer[] = new char[zzBuffer.length*2]; + System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length); + zzBuffer = newBuffer; + zzEndRead += zzFinalHighSurrogate; + zzFinalHighSurrogate = 0; + } + + /* fill the buffer with new input */ + int requested = zzBuffer.length - zzEndRead; + int numRead = zzReader.read(zzBuffer, zzEndRead, requested); + + /* not supposed to occur according to specification of java.io.Reader */ + if (numRead == 0) { + throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround."); + } + if (numRead > 0) { + zzEndRead += numRead; + /* If numRead == requested, we might have requested to few chars to + encode a full Unicode character. We assume that a Reader would + otherwise never return half characters. */ + if (numRead == requested) { + if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) { + --zzEndRead; + zzFinalHighSurrogate = 1; + } + } + /* potentially more input available */ + return false; + } + + /* numRead < 0 ==> end of stream */ + return true; + } + + + /** + * Closes the input stream. + */ + public final void yyclose() throws java.io.IOException { + zzAtEOF = true; /* indicate end of file */ + zzEndRead = zzStartRead; /* invalidate buffer */ + + if (zzReader != null) + zzReader.close(); + } + + + /** + * Resets the scanner to read from a new input stream. + * Does not close the old reader. + * + * All internal variables are reset, the old input stream + * cannot be reused (internal buffer is discarded and lost). + * Lexical state is set to ZZ_INITIAL. + * + * Internal scan buffer is resized down to its initial length, if it has grown. + * + * @param reader the new input stream + */ + public final void yyreset(java.io.Reader reader) { + zzReader = reader; + zzAtBOL = true; + zzAtEOF = false; + zzEOFDone = false; + zzEndRead = zzStartRead = 0; + zzCurrentPos = zzMarkedPos = 0; + zzFinalHighSurrogate = 0; + yyline = yychar = yycolumn = 0; + zzLexicalState = YYINITIAL; + if (zzBuffer.length > ZZ_BUFFERSIZE) + zzBuffer = new char[ZZ_BUFFERSIZE]; + } + + + /** + * Returns the current lexical state. + */ + public final int yystate() { + return zzLexicalState; + } + + + /** + * Enters a new lexical state + * + * @param newState the new lexical state + */ + public final void yybegin(int newState) { + zzLexicalState = newState; + } + + + /** + * Returns the text matched by the current regular expression. + */ + public final String yytext() { + return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead ); + } + + + /** + * Returns the character at position pos from the + * matched text. + * + * It is equivalent to yytext().charAt(pos), but faster + * + * @param pos the position of the character to fetch. + * A value from 0 to yylength()-1. + * + * @return the character at position pos + */ + public final char yycharat(int pos) { + return zzBuffer[zzStartRead+pos]; + } + + + /** + * Returns the length of the matched text region. + */ + public final int yylength() { + return zzMarkedPos-zzStartRead; + } + + + /** + * Reports an error that occured while scanning. + * + * In a wellformed scanner (no or only correct usage of + * yypushback(int) and a match-all fallback rule) this method + * will only be called with things that "Can't Possibly Happen". + * If this method is called, something is seriously wrong + * (e.g. a JFlex bug producing a faulty scanner etc.). + * + * Usual syntax/scanner level error handling should be done + * in error fallback rules. + * + * @param errorCode the code of the errormessage to display + */ + private void zzScanError(int errorCode) throws RuntimeException { + String message; + try { + message = ZZ_ERROR_MSG[errorCode]; + } + catch (ArrayIndexOutOfBoundsException e) { + message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; + } + + throw new RuntimeException(message); + } + + + /** + * Pushes the specified amount of characters back into the input stream. + * + * They will be read again by then next call of the scanning method + * + * @param number the number of characters to be read again. + * This number must not be greater than yylength()! + */ + public void yypushback(int number) throws RuntimeException { + if ( number > yylength() ) + zzScanError(ZZ_PUSHBACK_2BIG); + + zzMarkedPos -= number; + } + + + /** + * Resumes scanning until the next regular expression is matched, + * the end of input is encountered or an I/O-Error occurs. + * + * @return the next token + * @exception java.io.IOException if any I/O-Error occurs + */ + public Token nextToken() throws java.io.IOException, RuntimeException, RuntimeException { + int zzInput; + int zzAction; + + // cached fields: + int zzCurrentPosL; + int zzMarkedPosL; + int zzEndReadL = zzEndRead; + char [] zzBufferL = zzBuffer; + char [] zzCMapL = ZZ_CMAP; + + int [] zzTransL = ZZ_TRANS; + int [] zzRowMapL = ZZ_ROWMAP; + int [] zzAttrL = ZZ_ATTRIBUTE; + + while (true) { + zzMarkedPosL = zzMarkedPos; + + yychar+= zzMarkedPosL-zzStartRead; + + zzAction = -1; + + zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; + + zzState = ZZ_LEXSTATE[zzLexicalState]; + + // set up zzAction for empty match case: + int zzAttributes = zzAttrL[zzState]; + if ( (zzAttributes & 1) == 1 ) { + zzAction = zzState; + } + + + zzForAction: { + while (true) { + + if (zzCurrentPosL < zzEndReadL) { + zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); + zzCurrentPosL += Character.charCount(zzInput); + } + else if (zzAtEOF) { + zzInput = YYEOF; + break zzForAction; + } + else { + // store back cached positions + zzCurrentPos = zzCurrentPosL; + zzMarkedPos = zzMarkedPosL; + boolean eof = zzRefill(); + // get translated positions and possibly new buffer + zzCurrentPosL = zzCurrentPos; + zzMarkedPosL = zzMarkedPos; + zzBufferL = zzBuffer; + zzEndReadL = zzEndRead; + if (eof) { + zzInput = YYEOF; + break zzForAction; + } + else { + zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); + zzCurrentPosL += Character.charCount(zzInput); + } + } + int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ]; + if (zzNext == -1) break zzForAction; + zzState = zzNext; + + zzAttributes = zzAttrL[zzState]; + if ( (zzAttributes & 1) == 1 ) { + zzAction = zzState; + zzMarkedPosL = zzCurrentPosL; + if ( (zzAttributes & 8) == 8 ) break zzForAction; + } + + } + } + + // store back cached position + zzMarkedPos = zzMarkedPosL; + + if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { + zzAtEOF = true; + { return sym(GrammarTerminals.EOF); + } + } + else { + switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { + case 1: + { throw new RuntimeException("Illegal character '" + yytext() + "'."); + } + case 18: break; + case 2: + { return sym(GrammarTerminals.NONTERMINAL); + } + case 19: break; + case 3: + { return sym(GrammarTerminals.TERMINAL); + } + case 20: break; + case 4: + { + } + case 21: break; + case 5: + { return sym(GrammarTerminals.STAR); + } + case 22: break; + case 6: + { return sym(GrammarTerminals.EQUALS); + } + case 23: break; + case 7: + { return sym(GrammarTerminals.BAR); + } + case 24: break; + case 8: + { return sym(GrammarTerminals.SEMICOLON); + } + case 25: break; + case 9: + { return sym(GrammarTerminals.HASH); + } + case 26: break; + case 10: + { return sym(GrammarTerminals.COMMA); + } + case 27: break; + case 11: + { return sym(GrammarTerminals.LPAREN); + } + case 28: break; + case 12: + { return sym(GrammarTerminals.RPAREN); + } + case 29: break; + case 13: + { return sym(GrammarTerminals.PLUS); + } + case 30: break; + case 14: + { return sym(GrammarTerminals.OPTIONAL); + } + case 31: break; + case 15: + { return sym(GrammarTerminals.SHIFT); + } + case 32: break; + case 16: + { return sym(GrammarTerminals.REDUCE); + } + case 33: break; + case 17: + { return sym(GrammarTerminals.INITIAL); + } + case 34: break; + default: + zzScanError(ZZ_NO_MATCH); + } + } + } + } + + +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.dat b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.dat new file mode 100644 index 000000000..f1c4e8ab4 Binary files /dev/null and b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.dat differ diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.java new file mode 100644 index 000000000..d8526ca12 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.java @@ -0,0 +1,350 @@ +package org.simantics.scl.compiler.parser.grammar.input; + +import java.io.DataInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; + +public abstract class GrammarParser { + public static final boolean TRACE = true; + + private static final int INITIAL_CAPACITY = 16; + private static final int STATE_COUNT = 19; + private static final int TERMINAL_COUNT = 16; + private static final int NONTERMINAL_COUNT = 6; + private static final int PRODUCT_COUNT = 8; + + private static final int[] ACTION_ROW_ID = new int[STATE_COUNT]; + private static final int[] ACTION_COLUMN_ID = new int[TERMINAL_COUNT]; + private static final short[] ACTION_TABLE = new short[56]; + private static final int[] ERROR_TABLE = new int[10]; + private static final int[] GOTO_ROW_ID = new int[STATE_COUNT]; + private static final int[] GOTO_COLUMN_ID = new int[NONTERMINAL_COUNT]; + private static final short[] GOTO_TABLE = new short[12]; + private static final int[] PRODUCT_LHS = new int[PRODUCT_COUNT]; + + private static final short STATE_MASK = (short)0x0fff; + private static final short REDUCE_MASK = (short)0x8000; + private static final short POP_MASK = (short)0x4000; + private static final short PUSH_MASK = (short)0x2000; + private static final short ERROR_ACTION = (short)0xffff; + private static final short ACCEPT_ACTION = (short)0xfffe; + + public static final String[] TERMINAL_NAMES = new String[] { + "NONTERMINAL", + "EQUALS", + "BAR", + "SEMICOLON", + "INITIAL", + "HASH", + "TERMINAL", + "COMMA", + "SHIFT", + "REDUCE", + "STAR", + "PLUS", + "OPTIONAL", + "LPAREN", + "RPAREN", + "EOF" + }; + + public static final String[] NONTERMINAL_NAMES = new String[] { + "file", + "declaration", + "prod", + "regexps", + "regexp", + "init$1" + }; + + static { + try { + DataInputStream input = new DataInputStream(GrammarParser.class.getResourceAsStream("GrammarParser.dat")); + for(int i=0;i>5] >> (id&31))&1) != 0 ) + return ERROR_ACTION; + return ACTION_TABLE[ACTION_ROW_ID[state] + ACTION_COLUMN_ID[symbol]]; + } + + private static short getGoto(int state, int symbol) { + return GOTO_TABLE[GOTO_ROW_ID[state] + GOTO_COLUMN_ID[symbol]]; + } + + protected abstract Token nextToken(); + + private Object[] symbolStack = new Object[INITIAL_CAPACITY]; + private int symbolStackLength = 0; + + private int[] stateStack = new int[INITIAL_CAPACITY]; + private int[] symbolStackPositionStack = new int[INITIAL_CAPACITY]; + private int stateStackLength = 0; + + // For reduce + private int reductionLength; + + protected int length() { + return reductionLength; + } + + protected Object get(int i) { + if(i < 0 || i >= reductionLength) + throw new IndexOutOfBoundsException(); + return symbolStack[symbolStackLength+i]; + } + + private String parseErrorDescription(int state, Token token, int tokenId) { + StringBuilder b = new StringBuilder(); + b.append("Unexpected token '").append(token) + .append("' (").append(TERMINAL_NAMES[tokenId]) + .append("). Expected one of "); + ArrayList possibleTerminals = new ArrayList(); + for(int i=0;i 0) + b.append(", "); + b.append(possibleTerminals.get(i)); + } + b.append('.'); + return b.toString(); + } + + protected abstract RuntimeException syntaxError(Token token, String description); + + private static String describeAction(boolean isGoto, int action) { + if(action == ERROR_ACTION) + return "ERROR"; + if(action == ACCEPT_ACTION) + return "ACCEPT"; + StringBuilder b = new StringBuilder(); + if(isGoto) + b.append("GOTO "); + else { + if((action & REDUCE_MASK) != 0) { + action ^= REDUCE_MASK; + b.append("REDUCE"); + } + else + b.append("SHIFT"); + } + if((action & POP_MASK) != 0) { + action ^= POP_MASK; + b.append(" POP"); + } + if((action & PUSH_MASK) != 0) { + action ^= PUSH_MASK; + b.append(" PUSH"); + } + b.append(' ').append(action); + return b.toString(); + } + + private void printState(int state) { + System.out.print("state=" + state + ":"); + for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) { + Object s = symbolStack[i]; + if(s instanceof Token) + System.out.print(" " + TERMINAL_NAMES[((Token)s).id]); + else if(s == null) + System.out.print(" null"); + else + System.out.print(" " + s.getClass().getSimpleName()); + while(j>=0 && symbolStackPositionStack[j]==i) + System.out.print(" (" + stateStack[j--] + ")"); + } + System.out.println(); + } + + private Object parse(int state) { + while(true) { + Token token = nextToken(); + int tokenId = token.id; + if(TRACE) + System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---"); + while(true) { + if(TRACE) + printState(state); + short action = getAction(state, tokenId); + if(TRACE) + System.out.println(" -> action=" + describeAction(false, action)); + //System.out.println(STATE_DESCRIPTIONS[state]); + if((action & REDUCE_MASK) != 0) { + if(action == ACCEPT_ACTION) + return symbolStack[symbolStackLength-1]; + if(action == ERROR_ACTION) + throw syntaxError(token, parseErrorDescription(state, token, tokenId)); + int popAmount = (action >>> 13)&3; + if(TRACE) { + if(popAmount > 0) + System.out.println(" POP " + popAmount); + } + stateStackLength -= popAmount; + action &= STATE_MASK; + + int reductionBegin = symbolStackPositionStack[--stateStackLength]; + + reductionLength = symbolStackLength-reductionBegin; + symbolStackLength = reductionBegin; + + if(symbolStackLength == symbolStack.length) + symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2); + Object symbol = reduce(action); + postReduce(symbol); + symbolStack[symbolStackLength] = symbol; + + state = stateStack[stateStackLength]; + if(TRACE) { + ++symbolStackLength; + printState(state); + --symbolStackLength; + System.out.println(" nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]); + } + action = getGoto(state, PRODUCT_LHS[action]); + if(TRACE) + System.out.println(" -> action=" + describeAction(true, action)); + + // Pop state + if((action & POP_MASK) != 0) { + --stateStackLength; + } + // Push state + if((action & PUSH_MASK) != 0) { + if(stateStackLength == stateStack.length) { + stateStack = Arrays.copyOf(stateStack, stateStackLength*2); + symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2); + } + symbolStackPositionStack[stateStackLength] = symbolStackLength; + stateStack[stateStackLength++] = state; + } + state = action & STATE_MASK; + ++symbolStackLength; + } + else { + // Pop state + if((action & POP_MASK) != 0) { + --stateStackLength; + } + // Push state + if((action & PUSH_MASK) != 0) { + if(stateStackLength == stateStack.length) { + stateStack = Arrays.copyOf(stateStack, stateStackLength*2); + symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2); + } + symbolStackPositionStack[stateStackLength] = symbolStackLength; + stateStack[stateStackLength++] = state; + } + + // New state + state = action & STATE_MASK; + + // Push symbol + if(symbolStackLength == symbolStack.length) + symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2); + symbolStack[symbolStackLength++] = token; + break; + } + } + } + } + + public Object parseFile() { + return parse(0); + } + + + protected Object reduce(int productionId) { + try { + switch(productionId) { + case 0: + return reduceFile(); + case 1: + return reduceProduction(); + case 2: + return reduceInitial(); + case 3: + return reduceProductionRhs(); + case 4: + return reduceConcatenation(); + case 5: + return reduceTerminal(); + case 6: + return reduceUnion(); + + default: + throw new RuntimeException("Internal parser error."); + } + } catch(RuntimeException e) { + StringBuilder b = new StringBuilder(); + b.append("Failed to reduce"); + for(int i=0;i terminals = new ArrayList(); + ArrayList nonterminals = new ArrayList(); + TObjectIntHashMap symbols = new TObjectIntHashMap(); + ArrayList productions = new ArrayList(); + TIntArrayList initials = new TIntArrayList(); + + public GrammarParserImpl(Reader reader) { + lexer = new GrammarLexer(reader); + } + + private int getId(String symbol) { + if(symbols.contains(symbol)) + return symbols.get(symbol); + int id; + if(Character.isUpperCase(symbol.charAt(0))) { + id = terminals.size(); + terminals.add(symbol); + } + else { + id = ~nonterminals.size(); + nonterminals.add(symbol); + } + symbols.put(symbol, id); + return id; + } + + @Override + protected Token nextToken() { + try { + Token token = lexer.nextToken(); + return token; + } catch(Exception e) { + if(e instanceof RuntimeException) + throw (RuntimeException)e; + else + throw new RuntimeException(e); + } + } + + @Override + protected RuntimeException syntaxError(Token token, String description) { + return new RuntimeException(description); + } + + @Override + protected Object reduceFile() { + return null; + } + + @Override + protected Object reduceProduction() { + int lhs = getId(((Token)get(0)).text); + for(int i=2;i regexps = new ArrayList(length()); + for(int i=0;i= 1) + b.append('('); + boolean first = true; + for(Regexp exp : exps) { + if(first) + first = false; + else + b.append(" | "); + exp.toString(b, 1); + } + if(prec >= 1) + b.append(')'); + } + + @Override + public void toString(StringBuilder b, Namer grammar, int prec) { + if(prec >= 1) + b.append('('); + boolean first = true; + for(Regexp exp : exps) { + if(first) + first = false; + else + b.append(" | "); + exp.toString(b, grammar, 1); + } + if(prec >= 1) + b.append(')'); + } + + @Override + protected int getTypeId() { + return OR; + } + + @Override + public boolean equals(Object obj) { + if(obj == this) + return true; + if(obj == null || obj.getClass() != getClass()) + return false; + ROr other = (ROr)obj; + return Arrays.equals(exps, other.exps); + } + + @Override + public int hashCode() { + int r = 34235; + for(Regexp exp : exps) { + r *= 31; + r += exp.hashCode(); + } + return r; + } + + @Override + public boolean isNullable() { + for(Regexp exp : exps) + if(exp.isNullable()) + return true; + return false; + } + + private Regexp simplify(THashSet set) { + boolean qm = set.remove(ONE); + + Regexp exp; + simpl: { + if(set.size() > 1) { + frontLoop: { + Iterator it = set.iterator(); + Regexp common = front(it.next()); + while(it.hasNext()) { + Regexp temp = front(it.next()); + if(!temp.equals(common)) + break frontLoop; + } + + THashSet set2 = new THashSet(); + for(Regexp e : set) + set2.add(removeFront(e)); + + exp = seq(common, simplify(set2)); + break simpl; + } + + backLoop: { + Iterator it = set.iterator(); + Regexp common = back(it.next()); + while(it.hasNext()) { + Regexp temp = back(it.next()); + if(!temp.equals(common)) + break backLoop; + } + + THashSet set2 = new THashSet(); + for(Regexp e : set) + set2.add(removeBack(e)); + + exp = seq(simplify(set2), common); + break simpl; + } + } + + exp = or(set); + } + + if(qm && !exp.isNullable()) + exp = new ROp(exp, '?'); + return exp; + } + + @Override + public Regexp simplify() { + if(exps.length == 0) + return this; + THashSet set = new THashSet(); + for(Regexp exp : exps) { + exp = exp.simplify(); + or(set, exp); + } + return simplify(set); + } + + private static Regexp front(Regexp exp) { + if(exp instanceof RSeq) { + Regexp[] exps = ((RSeq)exp).exps; + if(exps.length == 0) + return null; + return exps[0]; + } + else + return exp; + } + + private static Regexp removeFront(Regexp exp) { + if(exp instanceof RSeq) { + Regexp[] exps = ((RSeq)exp).exps; + return seq_(Arrays.asList(exps).subList(1, exps.length)); + } + else + return ONE; + } + + private static Regexp back(Regexp exp) { + if(exp instanceof RSeq) { + Regexp[] exps = ((RSeq)exp).exps; + if(exps.length == 0) + return null; + return exps[exps.length-1]; + } + else + return exp; + } + + private static Regexp removeBack(Regexp exp) { + if(exp instanceof RSeq) { + Regexp[] exps = ((RSeq)exp).exps; + return seq_(Arrays.asList(exps).subList(0, exps.length-1)); + } + else + return ONE; + } +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RSeq.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RSeq.java new file mode 100644 index 000000000..354ee5b24 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RSeq.java @@ -0,0 +1,99 @@ +package org.simantics.scl.compiler.parser.regexp; + +import java.util.ArrayList; +import java.util.Arrays; + +import org.simantics.scl.compiler.parser.regexp.automata.NFA; + +public class RSeq extends Regexp { + public final Regexp[] exps; + + RSeq(Regexp[] exps) { + this.exps = exps; + } + + @Override + protected void buildAutomaton(NFA aut, int inState, int outState) { + if(exps.length == 0) { + aut.addEpsilonTransition(inState, outState); + return; + } + for(int i=exps.length-1;i>0;--i) { + int midState = aut.newState(); + exps[i].buildAutomaton(aut, midState, outState); + outState = midState; + } + exps[0].buildAutomaton(aut, inState, outState); + } + + @Override + protected void toString(StringBuilder b, int prec) { + if(prec >= 2) + b.append('('); + for(Regexp exp : exps) + exp.toString(b, 2); + if(prec >= 2) + b.append(')'); + } + + @Override + public void toString(StringBuilder b, Namer grammar, int prec) { + if(prec >= 2) + b.append('('); + boolean first = true; + for(Regexp exp : exps) { + if(first) + first = false; + else + b.append(' '); + exp.toString(b, grammar, 2); + } + if(prec >= 2) + b.append(')'); + } + + @Override + protected int getTypeId() { + return SEQ; + } + + @Override + public boolean equals(Object obj) { + if(obj == this) + return true; + if(obj == null || obj.getClass() != getClass()) + return false; + RSeq other = (RSeq)obj; + return Arrays.equals(exps, other.exps); + } + + @Override + public int hashCode() { + int r = 31340123; + for(Regexp exp : exps) { + r *= 31; + r += exp.hashCode(); + } + return r; + } + + @Override + public Regexp simplify() { + if(exps.length == 0) + return this; + ArrayList l = new ArrayList(exps.length); + for(Regexp exp : this.exps) { + exp = exp.simplify(); + seq(l, exp); + } + return seq_(l); + } + + @Override + public boolean isNullable() { + for(Regexp exp : exps) + if(!exp.isNullable()) + return false; + return true; + } +} diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java new file mode 100644 index 000000000..8211b6a74 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java @@ -0,0 +1,244 @@ +package org.simantics.scl.compiler.parser.regexp; + +import gnu.trove.set.hash.THashSet; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import org.simantics.scl.compiler.parser.regexp.automata.NFA; + +public abstract class Regexp implements Comparable { + static final int ATOM = 0; + static final int OP = 1; + static final int OR = 2; + static final int SEQ = 3; + + protected abstract void buildAutomaton(NFA aut, int inState, int outState); + + public static final Regexp ONE = new RSeq(new Regexp[0]); + public static final Regexp ZERO = new ROr(new Regexp[0]); + + Regexp() { + } + + public NFA toAutomaton() { + NFA aut = new NFA(); + int inState = aut.newState(); + int outState = aut.newState(); + buildAutomaton(aut, inState, outState); + aut.setInitialState(inState); + aut.setAccepts(outState, true); + return aut; + } + + @Override + public String toString() { + StringBuilder b = new StringBuilder(); + toString(b, 0); + return b.toString(); + } + + protected abstract void toString(StringBuilder b, int prec); + + protected abstract int getTypeId(); + + public abstract boolean isNullable(); + + public Regexp simplify() { + return this; + } + + private static int parseRegexp(int pos, String regexp, AtomicReference result) { + ArrayList dc = new ArrayList(); + ArrayList cc = new ArrayList(); + loop: while(true) { + if(pos == regexp.length()) + break loop; + char c = regexp.charAt(pos++); + switch(c) { + case '(': { + AtomicReference child = new AtomicReference(); + pos = parseRegexp(pos, regexp, child); + cc.add(child.get()); + } break; + case ')': + break loop; + case '|': { + dc.add(seq(cc)); + cc.clear(); + } break; + case '*': { + if(cc.isEmpty()) + throw new IllegalArgumentException("Encountered * that is not front of anything."); + int p = cc.size()-1; + cc.set(p, Regexp.star(cc.get(p))); + } break; + case '?': { + if(cc.isEmpty()) + throw new IllegalArgumentException("Encountered ? that is not front of anything."); + int p = cc.size()-1; + cc.set(p, Regexp.optional(cc.get(p))); + } break; + case '+': { + if(cc.isEmpty()) + throw new IllegalArgumentException("Encountered + that is not front of anything."); + int p = cc.size()-1; + cc.set(p, Regexp.plus(cc.get(p))); + } break; + default: + cc.add(new RAtom(c)); + } + } + dc.add(seq(cc)); + result.set(or(dc)); + return pos; + } + + public static Regexp of(String regexp) { + AtomicReference result = new AtomicReference(); + int finalPos = parseRegexp(0, regexp, result); + if(finalPos < regexp.length()) + throw new IllegalArgumentException("Extra closing parenteses"); + return result.get(); + } + + static void seq(ArrayList l, Regexp exp) { + if(l.size() > 0 && l.get(0) == ZERO) + return; + if(exp instanceof RSeq) + for(Regexp e : ((RSeq)exp).exps) + seq(l, e); + else if(exp == ZERO) { + l.clear(); + l.add(exp); + } + else + l.add(exp); + } + + static Regexp seq_(List es) { + if(es.size() == 0) + return ONE; + if(es.size() == 1) + return es.get(0); + Regexp[] eArray = es.toArray(new Regexp[es.size()]); + return new RSeq(eArray); + } + + public static Regexp seq(Regexp ... exps) { + if(exps.length == 0) + return ONE; + if(exps.length == 1) + return exps[0]; + ArrayList es = new ArrayList(); + for(Regexp exp : exps) + seq(es, exp); + return seq_(es); + } + + public static Regexp seq(Collection exps) { + return seq(exps.toArray(new Regexp[exps.size()])); + } + + static void or(THashSet s, Regexp exp) { + if(exp instanceof ROr) + for(Regexp e : ((ROr)exp).exps) + or(s, e); + else + s.add(exp); + } + + static Regexp or_(THashSet es) { + if(es.size() == 0) + return ZERO; + if(es.size() == 1) + return es.iterator().next(); + Regexp[] eArray = es.toArray(new Regexp[es.size()]); + Arrays.sort(eArray); + return new ROr(eArray); + } + + public static Regexp or(Regexp ... exps) { + if(exps.length == 0) + return ZERO; + if(exps.length == 1) + return exps[0]; + THashSet es = new THashSet(); + for(Regexp exp : exps) + or(es, exp); + return or_(es); + } + + public static Regexp star(Regexp exp) { + if(exp == ONE || exp == ZERO) + return ONE; + return new ROp(exp, '*'); + } + + public static Regexp plus(Regexp exp) { + return new ROp(exp, '+'); + } + + public static Regexp optional(Regexp exp) { + return new ROp(exp, '?'); + } + + public static Regexp or(Collection exps) { + return or(exps.toArray(new Regexp[exps.size()])); + } + + @Override + public int compareTo(Regexp o) { + int tA = getTypeId(); + int tB = o.getTypeId(); + if(tA < tB) + return -1; + if(tA > tB) + return 1; + switch(tA) { + case ATOM: { + int sA = ((RAtom)this).symbolId; + int sB = ((RAtom)o).symbolId; + if(sA < sB) + return -1; + if(sA > sB) + return 1; + return 0; + } + case OP: { + ROp a = (ROp)this; + ROp b = (ROp)o; + if(a.op < b.op) + return -1; + if(a.op > b.op) + return 1; + return a.exp.compareTo(b.exp); + } + case OR: + return compare(((ROr)this).exps, ((ROr)o).exps); + case SEQ: { + return compare(((RSeq)this).exps, ((RSeq)o).exps); + } + default: + throw new IllegalArgumentException(); + } + } + + private static int compare(Regexp[] a, Regexp[] b) { + if(a.length < b.length) + return -1; + if(a.length > b.length) + return 1; + for(int i=0;i transitions = new ArrayList(); + private TByteArrayList accepts = new TByteArrayList(); + private int initialState; + + public int newState() { + int stateId = transitions.size(); + transitions.add(new TIntIntHashMap( + Constants.DEFAULT_CAPACITY, + Constants.DEFAULT_LOAD_FACTOR, + 0, -1)); + accepts.add((byte)0); + return stateId; + } + + public int size() { + return transitions.size(); + } + + public DFA copy() { + DFA copy = new DFA(); + for(TIntIntHashMap t : transitions) + copy.transitions.add(new TIntIntHashMap(t)); + copy.accepts = new TByteArrayList(accepts); + copy.initialState = initialState; + return copy; + } + + public void addTransition(int sourceId, int symbol, int targetId) { + transitions.get(sourceId).put(symbol, targetId); + } + + public int getTransition(int sourceId, int symbol) { + return transitions.get(sourceId).get(symbol); + } + + public void forEachTransition(int source, final TIntIntProcedure proc) { + transitions.get(source).forEachEntry(proc); + } + + public int[] nextStates(int id) { + return transitions.get(id).keys(); + } + + public void setAccepts(int id, boolean accepts) { + this.accepts.set(id, accepts ? (byte)1 : (byte)0); + } + + public boolean getAccepts(int id) { + return accepts.get(id)==1; + } + + public void setInitialState(int initialState) { + this.initialState = initialState; + } + + public int getInitialState() { + return initialState; + } + + public DFA minimize() { + // Compute relevant input characters for minimization + final TIntIntHashMap symbolMap = new TIntIntHashMap(); + final int[] symbolArray; + { + final TIntArrayList l = new TIntArrayList(); + TIntProcedure proc = new TIntProcedure() { + @Override + public boolean execute(int value) { + if(!symbolMap.containsKey(value)) { + symbolMap.put(value, l.size()); + l.add(value); + } + return true; + } + }; + for(TIntIntHashMap tMap : transitions) + tMap.forEachKey(proc); + symbolArray = l.toArray(); + } + int symbolCount = symbolMap.size(); + int stateCount = transitions.size(); + + // Inverse automata + final TIntArrayList[][] inverse = new TIntArrayList[stateCount+1][]; + for(int i=0;i " + inverse[i][j]); + */ + + // + int[] ids = new int[stateCount+1]; + final int[] memPartion = new int[stateCount+1]; + TIntArrayList partionBegin = new TIntArrayList(); + TIntArrayList partionEnd = new TIntArrayList(); + TIntArrayList stack = new TIntArrayList(); + TIntArrayList scheduled = new TIntArrayList(); + + // Initial partition + { + int min = 0; + int max = stateCount; + ids[min++] = stateCount; + memPartion[stateCount] = 0; + for(int i=0;i=0;--n) { + Regexp ss = Regexp.star(a[n][n]); + b[n] = Regexp.seq(ss, b[n]); + for(int j=0;j transitions = new TIntObjectHashMap(); + TIntArrayList epsilonTransitions = new TIntArrayList(); + boolean accepts; + + private void addTransition(int symbol, int target) { + TIntArrayList l = transitions.get(symbol); + if(l == null) { + l = new TIntArrayList(); + l.add(target); + transitions.put(symbol, l); + } + else if(!l.contains(target)) + l.add(target); + } + + private void addEpsilonTransition(int target) { + if(!epsilonTransitions.contains(target)) + epsilonTransitions.add(target); + } + } + + private ArrayList states = new ArrayList(); + private int initialState; + + public int size() { + return states.size(); + } + + public void setInitialState(int initialState) { + this.initialState = initialState; + } + + public int getInitialState() { + return initialState; + } + + public int newState() { + int id = states.size(); + states.add(new State()); + return id; + } + + public void setAccepts(int id, boolean accepts) { + states.get(id).accepts = accepts; + } + + public boolean getAccepts(int id) { + return states.get(id).accepts; + } + + public void addTransition(int source, int symbol, int target) { + states.get(source).addTransition(symbol, target); + } + + public void forEachTransition(int source, final TIntIntProcedure proc) { + states.get(source).transitions.forEachEntry(new TIntObjectProcedure() { + @Override + public boolean execute(int a, TIntArrayList b) { + for(int i=0;i stack = new ArrayList(); + final TObjectIntHashMap map = new TObjectIntHashMap(); + { + TIntHashSet initialSet = new TIntHashSet(4); + initialSet.add(getInitialState()); + TIntArrayList stateList = closure(initialSet); + + int stateId = aut.newState(); + map.put(stateList, stateId); + aut.setInitialState(stateId); + + stack.add(stateList); + } + + while(!stack.isEmpty()) { + TIntArrayList curList = stack.remove(stack.size()-1); + int[] stateArray = curList.toArray(); + final int id = map.get(curList); + + // Transitions + final TIntObjectHashMap transitions = + new TIntObjectHashMap(); + TIntIntProcedure proc = new TIntIntProcedure() { + @Override + public boolean execute(int symbol, int b) { + TIntHashSet set = transitions.get(symbol); + if(set == null) { + set = new TIntHashSet(); + transitions.put(symbol, set); + } + set.add(b); + return true; + } + }; + for(int s : stateArray) + forEachTransition(s, proc); + + // Create transition targets + transitions.forEachEntry(new TIntObjectProcedure() { + @Override + public boolean execute(int symbol, TIntHashSet b) { + TIntArrayList stateList = closure(b); + + if(map.containsKey(stateList)) + aut.addTransition(id, symbol, map.get(stateList)); + else { + int stateId = aut.newState(); + map.put(stateList, stateId); + stack.add(stateList); + + aut.addTransition(id, symbol, stateId); + } + return true; + } + }); + + // Accepts + for(int s : stateArray) + if(getAccepts(s)) { + aut.setAccepts(id, true); + break; + } + } + + return aut; + } + +}