--- /dev/null
+<project>
+
+ <taskdef name="jflex" classname="jflex.anttask.JFlexTask" classpath="tools/jflex-1.6.1.jar"/>
+
+ <property name="package" location="src/org/simantics/scl/compiler/parser/grammar/input2"/>
+
+ <jflex
+ file="${package}/GrammarLexer.flex"
+ destdir="src/"
+ nobak="true"
+ />
+
+</project>
\ No newline at end of file
/******************************************************************************
* Auxiliary tokens
*/
-dummy = COMMENT EOL ;
\ No newline at end of file
+dummy = COMMENT EOL # Dummy
+ ;
\ No newline at end of file
protected abstract RuntimeException syntaxError(Token token, String description);
- private static String describeAction(int action) {
+ private static String describeAction(boolean isGoto, int action) {
if(action == ERROR_ACTION)
return "ERROR";
if(action == ACCEPT_ACTION)
return "ACCEPT";
StringBuilder b = new StringBuilder();
- if((action & REDUCE_MASK) != 0) {
- action ^= REDUCE_MASK;
- b.append("REDUCE");
+ if(isGoto)
+ b.append("GOTO ");
+ else {
+ if((action & REDUCE_MASK) != 0) {
+ action ^= REDUCE_MASK;
+ b.append("REDUCE");
+ }
+ else
+ b.append("SHIFT");
}
- else
- b.append("SHIFT");
if((action & POP_MASK) != 0) {
action ^= POP_MASK;
b.append(" POP");
return b.toString();
}
+ private void printState(int state) {
+ System.out.print("state=" + state + ":");
+ for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {
+ Object s = symbolStack[i];
+ if(s instanceof Token)
+ System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
+ else if(s == null)
+ System.out.print(" null");
+ else
+ System.out.print(" " + s.getClass().getSimpleName());
+ while(j>=0 && symbolStackPositionStack[j]==i)
+ System.out.print(" (" + stateStack[j--] + ")");
+ }
+ System.out.println();
+ }
+
private Object parse(int state) {
while(true) {
Token token = nextToken();
int tokenId = token.id;
+ if(TRACE)
+ System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---");
while(true) {
+ if(TRACE)
+ printState(state);
short action = getAction(state, tokenId);
- if(TRACE) {
- System.out.println("state=" + state + ", tokenId=" + TERMINAL_NAMES[tokenId] +
- ", action=" + describeAction(action));
- System.out.print(" ");
- for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {
- Object s = symbolStack[i];
- if(s instanceof Token)
- System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
- else
- System.out.print(" " + s.getClass().getSimpleName());
- while(j>=0 && symbolStackPositionStack[j]==i)
- System.out.print(" (" + stateStack[j--] + ")");
- }
- System.out.println();
- }
+ if(TRACE)
+ System.out.println(" -> action=" + describeAction(false, action));
//System.out.println(STATE_DESCRIPTIONS[state]);
if((action & REDUCE_MASK) != 0) {
if(action == ACCEPT_ACTION)
return symbolStack[symbolStackLength-1];
if(action == ERROR_ACTION)
throw syntaxError(token, parseErrorDescription(state, token, tokenId));
- stateStackLength -= (action >>> 13)&3;
+ int popAmount = (action >>> 13)&3;
+ if(TRACE) {
+ if(popAmount > 0)
+ System.out.println(" POP " + popAmount);
+ }
+ stateStackLength -= popAmount;
action &= STATE_MASK;
int reductionBegin = symbolStackPositionStack[--stateStackLength];
symbolStack[symbolStackLength] = symbol;
state = stateStack[stateStackLength];
+ if(TRACE) {
+ ++symbolStackLength;
+ printState(state);
+ --symbolStackLength;
+ System.out.println(" nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]);
+ }
action = getGoto(state, PRODUCT_LHS[action]);
+ if(TRACE)
+ System.out.println(" -> action=" + describeAction(true, action));
+
// Pop state
if((action & POP_MASK) != 0) {
--stateStackLength;
case 124:
return reduceApplyType();
case 125:
- return reduceDummy1();
+ return reduceDummy();
default:
throw new RuntimeException("Internal parser error.");
/**
* dummy ::= COMMENT EOL
*/
- protected abstract Object reduceDummy1();
+ protected abstract Object reduceDummy();
protected void postReduce(Object reduced) {
}
return new TApplyAst((TypeAst)get(0), parameters);
}
- @Override
- protected Object reduceDummy1() {
- throw new UnsupportedOperationException();
- }
-
@SuppressWarnings("unchecked")
@Override
protected void postReduce(Object reduced) {
return new CHRStatement((ListQualifier[])get(1), (ListQualifier[])get(3));
}
+ @Override
+ protected Object reduceDummy() {
+ throw new UnsupportedOperationException();
+ }
+
}
--- /dev/null
+package org.simantics.scl.compiler.parser;
+
+import java.io.File;
+
+import org.simantics.scl.compiler.parser.generator.ParserGenerator;
+
+public class GenerateSCLParser {
+
+ public static void main(String[] args) throws Exception {
+ File plugin = new File(GenerateSCLParser.class.getResource(".").getPath().replace("%20", " "))
+ .getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile();
+ System.out.println(plugin);
+ ParserGenerator.createParser(
+ "org.simantics.scl.compiler.internal.parsing.parser",
+ "SCLSyntaxErrorException",
+ new File(plugin, "src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar"));
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.simantics.scl.compiler.parser.generator.compression.CompressedParseTable;
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.java.GenerateEnum;
+import org.simantics.scl.compiler.parser.generator.java.GenerateParser;
+import org.simantics.scl.compiler.parser.generator.table.ParseTable;
+import org.simantics.scl.compiler.parser.generator.table.ParseTableBuilder;
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.input.GrammarParserImpl;
+
+public class ParserGenerator {
+
+ public static void createParser(String packageName, String exceptionName, File grammarFile) throws IOException {
+ // Read grammar and check it
+ FileInputStream inputStream = new FileInputStream(grammarFile);
+ Grammar grammar = GrammarParserImpl.read(inputStream);
+ inputStream.close();
+ grammar.check();
+
+ AnaGrammar anaGrammar = new AnaGrammar(grammar);
+ ParseTable table = ParseTableBuilder.build(anaGrammar);
+
+ File directory = grammarFile.getParentFile();
+ String name = grammarFile.getName();
+ {
+ int p = name.lastIndexOf('.');
+ if(p > 0)
+ name = name.substring(0, p);
+ }
+
+ // Write parse table
+ CompressedParseTable compressedTable = table.compress();
+ compressedTable.writeTo(new File(directory, name + "Parser.dat"));
+
+ // Write classes
+ {
+ String className = name + "Terminals";
+ GenerateEnum.generate(new File(directory, className + ".java"),
+ packageName, className,
+ anaGrammar.terminalNames);
+ }
+ {
+ String className = name + "Parser";
+ new GenerateParser(packageName, className, anaGrammar, exceptionName, compressedTable)
+ .generate(new File(directory, className + ".java"));
+ }
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.util.Arrays;
+
+public class CompressTable {
+
+ private static class Row implements Comparable<Row> {
+ int id;
+ int density;
+ int minPos;
+ int maxPos;
+ int[] data;
+ int displacement;
+
+ @Override
+ public int compareTo(Row o) {
+ return o.density < density ? 1 : o.density > density ? -1 : 0;
+ }
+ }
+
+ public static CompressedTable compress(int[][] table_) {
+ // Sort table by size
+ Row[] table = new Row[table_.length];
+ for(int i=0;i<table.length;++i) {
+ Row row = new Row();
+ row.id = i;
+ row.data = table_[i];
+ int density = 0;
+ int minPos = -1;
+ int maxPos = -1;
+ for(int j=0;j<row.data.length;++j) {
+ int el = row.data[j];
+ if(el != 0) {
+ ++density;
+ if(minPos == -1)
+ minPos = j;
+ maxPos = j;
+ }
+ }
+ row.density = density;
+ row.minPos = minPos;
+ row.maxPos = maxPos;
+ table[i] = row;
+ }
+ Arrays.sort(table);
+
+ // Start to fill
+ int width = table_.length == 0 ? 0 : table_[0].length;
+ int capacity = 2*table_.length*width;
+ int[] rowIds = new int[capacity];
+ Arrays.fill(rowIds, -1);
+ int[] values = new int[capacity];
+ for(Row row : table) {
+ if(row.density == 0) {
+ row.displacement = capacity/2;
+ continue;
+ }
+ tryAgain: for(int dId=0;dId<capacity;++dId) {
+ int d = capacity/2 + ((dId&1)==0 ? dId/2 : -1-dId/2);
+ for(int p = row.minPos;p<=row.maxPos;++p) {
+ int newValue = row.data[p];
+
+ if(newValue != 0) {
+ int id = d+p;
+ if(rowIds[id] >= 0)
+ continue tryAgain;
+ }
+ }
+ row.displacement = d;
+ for(int p = row.minPos;p<=row.maxPos;++p) {
+ int val = row.data[p];
+ if(val != 0) {
+ int id = d+p;
+ rowIds[id] = row.id;
+ values[id] = val;
+ }
+ }
+ break;
+ }
+ }
+
+ // Produce final tables
+ int minDis=capacity;
+ int maxDis=0;
+ for(Row row : table) {
+ int d = row.displacement;
+ if(d < minDis)
+ minDis = d;
+ if(d > maxDis)
+ maxDis = d;
+ }
+ int[] displacement = new int[table.length];
+ for(int i=0;i<table.length;++i)
+ displacement[table[i].id] = table[i].displacement-minDis;
+ rowIds = Arrays.copyOfRange(rowIds, minDis, maxDis+width);
+ values = Arrays.copyOfRange(values, minDis, maxDis+width);
+
+ return new CompressedTable(rowIds, values, displacement);
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+public class CompressedParseTable {
+ public final CompressedTable actionTable;
+ public final int[] errorTable;
+ public final CompressedTable gotoTable;
+ public final int[] productionInfo;
+ public final int[] initialStates;
+ public final String[] stateDescriptions;
+
+ public CompressedParseTable(CompressedTable actionTable,
+ int[] errorTable,
+ CompressedTable gotoTable, int[] productionInfo,
+ int[] initialStates,
+ String[] stateDescriptions) {
+ this.actionTable = actionTable;
+ this.errorTable = errorTable;
+ this.gotoTable = gotoTable;
+ this.productionInfo = productionInfo;
+ this.initialStates = initialStates;
+ this.stateDescriptions = stateDescriptions;
+ }
+
+ public void writeTo(File file) throws IOException {
+ FileOutputStream stream = new FileOutputStream(file);
+ DataOutputStream output = new DataOutputStream(stream);
+ actionTable.writeTo(output);
+ for(int val : errorTable)
+ output.writeInt(val);
+ gotoTable.writeTo(output);
+ for(int val : productionInfo)
+ output.writeInt(val);
+ output.close();
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+
+public class CompressedTable {
+ public final int[] rowIndex;
+ public final int[] columnIndex;
+ public final int[] table;
+
+ public CompressedTable(int[] rowIndex, int[] columnIndex, int[] table) {
+ this.rowIndex = rowIndex;
+ this.columnIndex = columnIndex;
+ this.table = table;
+ }
+
+ public void writeTo(DataOutputStream output) throws IOException {
+ for(int v : rowIndex)
+ output.writeInt(v);
+ for(int v : columnIndex)
+ output.writeInt(v);
+ for(int v : table)
+ output.writeShort(v);
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import org.simantics.scl.compiler.parser.generator.table.ParseTableBuilder;
+
+public class ErrorTable {
+ public static int[] createErrorTable(int[][] table) {
+ int columns = table[0].length;
+ int[] result = new int[(table.length*columns+31)/32];
+ int p=0;
+ for(int[] row : table)
+ for(int v : row) {
+ if(v == ParseTableBuilder.ERROR_ACTION)
+ result[p/32] |= 1 << (p%32);
+ ++p;
+ }
+ return result;
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.generator.table.ParseTableBuilder;
+
+public class GCCompress {
+
+ public static final int DONT_CARE = ParseTableBuilder.ERROR_ACTION;
+
+ private static int[][] compressRows(int[] colors, final int[][] table) {
+ final int columns = table[0].length;
+ int colorCount = GraphColoring.color(colors, new GraphColoring.ColGraph() {
+ @Override
+ public int size() {
+ return table.length;
+ }
+
+ @Override
+ public boolean areConnected(int a, int b) {
+ int[] aRow = table[a];
+ int[] bRow = table[b];
+ for(int i=0;i<columns;++i) {
+ int aV = aRow[i];
+ int bV = bRow[i];
+ if(aV != DONT_CARE && bV != DONT_CARE && aV != bV)
+ return true;
+ }
+ return false;
+ }
+ });
+
+ final int[][] compr = new int[colorCount][columns];
+ for(int i=0;i<compr.length;++i)
+ Arrays.fill(compr[i], DONT_CARE);
+
+ for(int i=0;i<table.length;++i) {
+ int color = colors[i];
+ int[] inRow = table[i];
+ int[] outRow = compr[color];
+ for(int j=0;j<columns;++j) {
+ int v = inRow[j];
+ if(v != DONT_CARE)
+ outRow[j] = v;
+ }
+ }
+
+ return compr;
+ }
+
+ private static int[][] compressColumns(int[] colors, final int[][] table) {
+ final int columns = table[0].length;
+ int colorCount = GraphColoring.color(colors, new GraphColoring.ColGraph() {
+ @Override
+ public int size() {
+ return columns;
+ }
+
+ @Override
+ public boolean areConnected(int a, int b) {
+ for(int i=0;i<table.length;++i) {
+ int[] row = table[i];
+ int aV = row[a];
+ int bV = row[b];
+ if(aV != DONT_CARE && bV != DONT_CARE && aV != bV)
+ return true;
+ }
+ return false;
+ }
+ });
+
+ final int[][] compr = new int[table.length][colorCount];
+ for(int i=0;i<compr.length;++i) {
+ Arrays.fill(compr[i], DONT_CARE);
+ }
+ for(int i=0;i<table.length;++i) {
+ for(int j=0;j<columns;++j) {
+ int v = table[i][j];
+ if(v != DONT_CARE) {
+ int color = colors[j];
+ compr[i][color] = v;
+ }
+ }
+ }
+
+ return compr;
+ }
+
+ public static CompressedTable compress(int[][] table) {
+ System.out.println("Compress:");
+ System.out.println(" Rows: " + table.length);
+ System.out.println(" Columns: " + table[0].length);
+
+ int[] rowIndex = new int[table.length];
+ int[] columnIndex = new int[table[0].length];
+
+ table = compressRows(rowIndex, table);
+ table = compressColumns(columnIndex, table);
+
+ System.out.println(" Compressed rows: " + table.length);
+ System.out.println(" Compressed columns: " + table[0].length);
+
+ int columns = table[0].length;
+
+ int[] compressedTable = new int[table.length * columns];
+ for(int i=0;i<table.length;++i)
+ System.arraycopy(table[i], 0, compressedTable, i*columns, columns);
+ for(int i=0;i<rowIndex.length;++i)
+ rowIndex[i] *= columns;
+ return new CompressedTable(rowIndex, columnIndex, compressedTable);
+
+ //for(int[] row : table)
+ // System.out.println(Arrays.toString(row));
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.compression;
+
+
+public class GraphColoring {
+
+ public static interface ColGraph {
+ int size();
+ boolean areConnected(int a, int b);
+ }
+
+ /**
+ * Finds a coloring of the graph so that all connected vertices
+ * have a different color. Returns the number of colors used.
+ */
+ public static int color(int[] color, ColGraph graph) {
+ int size = graph.size();
+
+ // unassigned/unassignedCount is a list of unassigned vertex ids
+ int[] unassigned = new int[size];
+ for(int i=0;i<size;++i)
+ unassigned[i] = i;
+ int unassignedCount = size;
+
+ // blockedCount[vertexId] is the number of blocked colors for vertexId
+ int[] blockedCount = new int[size];
+
+ // blocked[color][vertexId] is true, if color is not a possible color for vertexId
+ boolean[][] blocked = new boolean[size][size];
+
+ // Number of colors currently in use
+ int colorCount = 0;
+
+ while(unassignedCount > 0) {
+ // Choose a vertex is bestId from unassigned list so that
+ // bestBlockedCount=blockedCount[bestId] is maximized
+ int bestId = 0;
+ int bestUnassignedTableId = 0;
+ int bestBlockedCount = -1;
+ for(int i=0;i<unassignedCount;++i) {
+ int id = unassigned[i];
+ int tempCount = blockedCount[id];
+ if(tempCount > bestBlockedCount) {
+ bestUnassignedTableId = i;
+ bestId = id;
+ bestBlockedCount = tempCount;
+ }
+ }
+
+ // Remove from unassigned table
+ unassigned[bestUnassignedTableId] = unassigned[--unassignedCount];
+
+ // Choose color
+ int chosenColor = 0;
+ if(bestBlockedCount == colorCount) // All colors are blocked
+ chosenColor = colorCount++;
+ else { // There is unblocked color
+ for(int i=0;i<colorCount;++i)
+ if(!blocked[i][bestId]) {
+ chosenColor = i;
+ break;
+ }
+ }
+ color[bestId] = chosenColor;
+
+ // Update blocked table
+ boolean[] blockedRow = blocked[chosenColor];
+ for(int i=0;i<unassignedCount;++i) {
+ int id = unassigned[i];
+ if(!blockedRow[id] && graph.areConnected(bestId, id)) {
+ blockedRow[id] = true;
+ ++blockedCount[id];
+ }
+ }
+ }
+ return colorCount;
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.grammar;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntByteHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.THashSet;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.Production;
+import org.simantics.scl.compiler.parser.regexp.Namer;
+import org.simantics.scl.compiler.parser.regexp.automata.DFA;
+
+public class AnaGrammar implements Namer {
+
+ public ArrayList<Prod> prods = new ArrayList<Prod>();
+ public int[] nonterminalPos;
+
+ public final String[] terminalNames;
+ public final String[] nonterminalNames;
+
+ public final int[] initialNonterminals;
+
+ public boolean[] nullable;
+ public int[][] first;
+
+
+ public AnaGrammar(Grammar grammar) {
+ initialNonterminals = grammar.initialNonterminals;
+ terminalNames = Arrays.copyOf(grammar.terminalNames, grammar.terminalNames.length+1);
+ terminalNames[terminalNames.length-1] = "EOF";
+ nonterminalNames = Arrays.copyOf(grammar.nonterminalNames,
+ grammar.nonterminalNames.length+initialNonterminals.length);
+ for(int i=1;i<=initialNonterminals.length;++i)
+ nonterminalNames[nonterminalNames.length-i] = "init$" + i;
+
+ // Convert grammar
+ ArrayList<Prod>[] prodMap = new ArrayList[nonterminalNames.length];
+ for(int i=0;i<nonterminalNames.length;++i)
+ prodMap[i] = new ArrayList<Prod>();
+ for(Production production : grammar.productions) {
+ Prod prod = new Prod(production.name, ~production.lhs,
+ production.rhs.toAutomaton().determinize().minimize(),
+ production.annotations);
+ prodMap[prod.lhs].add(prod);
+ }
+
+ // Initial production
+ for(int i=1;i<=initialNonterminals.length;++i) {
+ DFA dfa = new DFA();
+ int s0 = dfa.newState();
+ dfa.setInitialState(s0);
+ int s1 = dfa.newState();
+ dfa.addTransition(s0, initialNonterminals[i-1], s1);
+ int s2 = dfa.newState();
+ dfa.addTransition(s1, terminalNames.length-1, s2);
+ dfa.setAccepts(s2, true);
+ Prod prod = new Prod("Init", nonterminalNames.length-i, dfa, new TIntByteHashMap());
+ prodMap[prod.lhs].add(prod);
+ }
+
+ TIntArrayList pos = new TIntArrayList();
+ for(int i=0;i<nonterminalNames.length;++i) {
+ pos.add(prods.size());
+ prods.addAll(prodMap[i]);
+ }
+ pos.add(prods.size());
+ nonterminalPos = pos.toArray();
+
+ // Basic analysis
+ computeNullableNonterminals();
+ }
+
+ private static class FrontierItem {
+ public final int production;
+ public final int state;
+
+ public FrontierItem(int production, int state) {
+ this.production = production;
+ this.state = state;
+ }
+
+ @Override
+ public int hashCode() {
+ return 31*state + production;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(this == obj)
+ return true;
+ if(obj == null || obj.getClass() != getClass())
+ return false;
+ FrontierItem other = (FrontierItem)obj;
+ return other.state == state && other.production == production;
+ }
+ }
+
+ private void computeNullableNonterminals() {
+ int nonterminalCount = nonterminalNames.length;
+ nullable = new boolean[nonterminalCount];
+ final TIntHashSet[] first = new TIntHashSet[nonterminalCount];
+ final TIntHashSet[] gfirst = new TIntHashSet[nonterminalCount];
+ for(int i=0;i<nonterminalCount;++i) {
+ first[i] = new TIntHashSet();
+ gfirst[i] = new TIntHashSet();
+ }
+ final ArrayList<FrontierItem>[] pendingItems = new ArrayList[nonterminalCount];
+ final ArrayList<FrontierItem> stack = new ArrayList<FrontierItem>();
+ THashSet<FrontierItem> handledItems = new THashSet<FrontierItem>();
+ for(int i=0;i<nonterminalCount;++i)
+ pendingItems[i] = new ArrayList<FrontierItem>();
+
+ for(int i=0;i<prods.size();++i)
+ stack.add(new FrontierItem(i, prods.get(i).rhs.getInitialState()));
+ while(!stack.isEmpty()) {
+ final FrontierItem cur = stack.remove(stack.size()-1);
+ if(!handledItems.add(cur))
+ continue;
+ final Prod curProd = prods.get(cur.production);
+ if(curProd.rhs.getAccepts(cur.state)) {
+ nullable[curProd.lhs] = true;
+ stack.addAll(pendingItems[curProd.lhs]);
+ }
+ curProd.rhs.forEachTransition(cur.state, new TIntIntProcedure() {
+ @Override
+ public boolean execute(int symbol, int targetState) {
+ if(symbol < 0) {
+ symbol = ~symbol;
+ gfirst[symbol].add(curProd.lhs);
+ FrontierItem item = new FrontierItem(cur.production, targetState);
+ if(nullable[symbol])
+ stack.add(item);
+ else
+ pendingItems[symbol].add(item);
+ }
+ else
+ first[curProd.lhs].add(symbol);
+ return true;
+ }
+ });
+ }
+
+ gclose(first, gfirst);
+ this.first = new int[first.length][];
+ for(int i=0;i<first.length;++i)
+ this.first[i] = first[i].toArray();
+
+ /*System.out.println("Nullable nonterminals:");
+ for(int i=0;i<nullable.length;++i) {
+ System.out.print(" " + nonterminalNames[i] + " -> ");
+ if(nullable[i])
+ System.out.print(" NULL");
+ for(int s : this.first[i])
+ System.out.print(" " + terminalNames[s]);
+ System.out.println();
+ }*/
+ }
+
+ public static void gclose(TIntHashSet[] sets, TIntHashSet[] graph_) {
+ int[][] graph = new int[graph_.length][];
+ for(int i=0;i<graph.length;++i)
+ graph[i] = graph_[i].toArray();
+
+ final TIntArrayList stack = new TIntArrayList();
+ for(int i=0;i<sets.length;++i) {
+ final int i_ = i;
+ sets[i].forEach(new TIntProcedure() {
+ @Override
+ public boolean execute(int value) {
+ stack.add(i_);
+ stack.add(value);
+ return true;
+ }
+ });
+ }
+
+ while(!stack.isEmpty()) {
+ int sp = stack.size();
+ int value = stack.removeAt(sp-1);
+ int id = stack.removeAt(sp-2);
+ for(int s : graph[id])
+ if(sets[s].add(value)) {
+ stack.add(s);
+ stack.add(value);
+ }
+ }
+ }
+
+ public String getName(int symbolId) {
+ if(symbolId >= 0)
+ return terminalNames[symbolId];
+ else
+ return nonterminalNames[~symbolId];
+ }
+
+ private class AlmostAcceptsProc implements TIntIntProcedure {
+ DFA rhs;
+ TIntHashSet visited = new TIntHashSet();
+
+ boolean result;
+
+ public AlmostAcceptsProc(DFA rhs) {
+ this.rhs = rhs;
+ }
+
+ @Override
+ public boolean execute(int a, int b) {
+ if(a < 0 && nullable[~a])
+ visit(b);
+ return !result;
+ }
+
+ public void visit(int position) {
+ if(visited.add(position)) {
+ if(rhs.getAccepts(position)) {
+ result = true;
+ return;
+ }
+ rhs.forEachTransition(position, this);
+ }
+ }
+ }
+
+ public boolean almostAccepts(DFA rhs, int position) {
+ AlmostAcceptsProc proc = new AlmostAcceptsProc(rhs);
+ proc.visit(position);
+ return proc.result;
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.grammar;
+
+import org.simantics.scl.compiler.parser.regexp.automata.DFA;
+
+import gnu.trove.map.hash.TIntByteHashMap;
+
+public class Prod {
+ public final String name;
+ public final int lhs;
+ public final DFA rhs;
+ public final TIntByteHashMap annotations;
+
+ public Prod(String name, int lhs, DFA rhs, TIntByteHashMap annotations) {
+ this.name = name;
+ this.lhs = lhs;
+ this.rhs = rhs;
+ this.annotations = annotations;
+ }
+
+ public String toString(AnaGrammar grammar) {
+ StringBuilder b = new StringBuilder();
+ b.append(grammar.nonterminalNames[lhs]);
+ b.append(" ::= ");
+ rhs.toRegexp().toString(b, grammar, 0);
+ return b.toString();
+ }
+}
\ No newline at end of file
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.java;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+
+public class GenerateEnum {
+
+ public static void generate(File file, String packageName, String className, String[] names) throws IOException {
+ PrintStream out = new PrintStream(file);
+ out.println("package " + packageName + ";");
+ out.println();
+ out.println("public interface " + className + " {");
+ for(int i=0;i<names.length;++i)
+ out.println(" public static final int " + names[i] + " = " + i + ";");
+ out.println("}");
+ out.close();
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.java;
+
+import gnu.trove.set.hash.THashSet;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.Arrays;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.simantics.scl.compiler.parser.generator.compression.CompressedParseTable;
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.grammar.Prod;
+
+public class GenerateParser {
+
+ String packageName;
+ String className;
+ String exceptionName;
+ AnaGrammar grammar;
+ CompressedParseTable table;
+
+ public GenerateParser(String packageName, String className,
+ AnaGrammar grammar, String exceptionName, CompressedParseTable table) {
+ this.packageName = packageName;
+ this.className = className;
+ this.grammar = grammar;
+ this.exceptionName = exceptionName;
+ this.table = table;
+ }
+
+ private String readTemplate() throws IOException {
+ InputStream stream = getClass().getResourceAsStream("Parser.java.template");
+ byte[] buffer = new byte[10000];
+ int pos = 0;
+ while(true) {
+ int c = stream.read(buffer, pos, buffer.length-pos);
+ if(c <= 0)
+ break;
+ pos += c;
+ if(pos == buffer.length)
+ buffer = Arrays.copyOf(buffer, buffer.length*2);
+ }
+ return new String(buffer, 0, pos, "UTF-8");
+ }
+
+ private static final Pattern PATTERN = Pattern.compile("\\$[A-Za-z0-9]+\\$");
+
+ public void generate(File file) throws IOException {
+ PrintStream out = new PrintStream(file, "UTF-8");
+
+ int aPos = 0;
+ String template = readTemplate();
+ Matcher matcher = PATTERN.matcher(template);
+ while(matcher.find()) {
+ int start = matcher.start();
+ int end = matcher.end();
+ out.print(template.substring(aPos, start));
+ String varName = template.substring(start+1, end-1);
+ generateVar(out, varName);
+ aPos = end;
+ }
+ out.print(template.substring(aPos));
+
+ out.close();
+ }
+
+ private void generateVar(PrintStream out, String varName) {
+ if("package".equals(varName)) {
+ out.print(packageName);
+ }
+ else if("class".equals(varName)) {
+ out.print(className);
+ }
+ else if("terminalCount".equals(varName)) {
+ out.print(grammar.terminalNames.length);
+ }
+ else if("nonterminalCount".equals(varName)) {
+ out.print(grammar.nonterminalNames.length);
+ }
+ else if("productCount".equals(varName)) {
+ out.print(grammar.prods.size());
+ }
+ else if("stateCount".equals(varName)) {
+ out.print(table.actionTable.rowIndex.length);
+ }
+ else if("parseMethods".equals(varName)) {
+ for(int i=0;i<grammar.initialNonterminals.length;++i) {
+ String ntName = grammar.getName(grammar.initialNonterminals[i]);
+ ntName = ntName.substring(0, 1).toUpperCase() + ntName.substring(1);
+ out.println(" public Object parse" + ntName + "() {");
+ out.println(" return parse(" + table.initialStates[i] + ");");
+ out.println(" }");
+ }
+ }
+ else if("terminalNames".equals(varName)) {
+ for(int i=0;i<grammar.terminalNames.length;++i) {
+ if(i > 0)
+ out.println(",");
+ out.print(" \"" + grammar.terminalNames[i] + "\"");
+ }
+ }
+ else if("nonterminalNames".equals(varName)) {
+ for(int i=0;i<grammar.nonterminalNames.length;++i) {
+ if(i > 0)
+ out.println(",");
+ out.print(" \"" + grammar.nonterminalNames[i] + "\"");
+ }
+ }
+ else if("stateDescriptions".equals(varName)) {
+ for(int i=0;i<table.stateDescriptions.length;++i) {
+ if(i > 0)
+ out.println(",");
+ out.print(" \"" + table.stateDescriptions[i].replace("\n", "\\n") + "\"");
+ }
+ }
+ else if("reduceCases".equals(varName)) {
+ for(int i=0;i<grammar.prods.size();++i) {
+ Prod prod = grammar.prods.get(i);
+ if(grammar.nonterminalNames[prod.lhs].startsWith("init$"))
+ continue;
+ out.println(" case " + i + ":");
+ out.println(" return reduce" + prod.name + "();");
+ }
+ }
+ else if("reduceDelegates".equals(varName)) {
+ for(int i=0;i<grammar.prods.size();++i) {
+ Prod prod = grammar.prods.get(i);
+ if(grammar.nonterminalNames[prod.lhs].startsWith("init$"))
+ out.println(" null,");
+ else {
+ out.println(" new ReduceDelegate() {");
+ out.println(" public Object reduce(" + className + " parser) {");
+ out.println(" return parser.reduce" + prod.name + "();");
+ out.println(" }");
+ out.println(" },");
+ }
+ }
+ }
+ else if("reduceMethods".equals(varName)) {
+ THashSet<String> usedNames = new THashSet<String>();
+ for(int i=0;i<grammar.prods.size();++i) {
+ Prod prod = grammar.prods.get(i);
+ if(grammar.nonterminalNames[prod.lhs].startsWith("init$"))
+ continue;
+ if(usedNames.add(prod.name)) {
+ out.println(" /**");
+ out.println(" * " + prod.toString(grammar).replace("*", "*"));
+ out.println(" */");
+ out.println(" protected abstract Object reduce" + prod.name + "();");
+ }
+ }
+ }
+ else if("Token".equals(varName)) {
+ out.print("Token");
+ }
+ else if("Symbol".equals(varName)) {
+ out.print("Object");
+ }
+ else if("tokenId".equals(varName)) {
+ out.print("id");
+ }
+ else if("imports".equals(varName)) {
+ out.println("import org.simantics.scl.compiler.internal.parsing.Token;");
+ }
+ else if("actionTableLength".equals(varName)) {
+ out.print(table.actionTable.table.length);
+ }
+ else if("gotoTableLength".equals(varName)) {
+ out.print(table.gotoTable.table.length);
+ }
+ else if("errorTableLength".equals(varName)) {
+ out.print(table.errorTable.length);
+ }
+ else if("Exception".equals(varName)) {
+ out.print(exceptionName);
+ }
+ }
+
+}
--- /dev/null
+package $package$;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
+
+$imports$
+public abstract class $class$ {
+ public static final boolean TRACE = false;
+
+ private static final int INITIAL_CAPACITY = 16;
+ private static final int STATE_COUNT = $stateCount$;
+ private static final int TERMINAL_COUNT = $terminalCount$;
+ private static final int NONTERMINAL_COUNT = $nonterminalCount$;
+ private static final int PRODUCT_COUNT = $productCount$;
+
+ private static final int[] ACTION_ROW_ID = new int[STATE_COUNT];
+ private static final int[] ACTION_COLUMN_ID = new int[TERMINAL_COUNT];
+ private static final short[] ACTION_TABLE = new short[$actionTableLength$];
+ private static final int[] ERROR_TABLE = new int[$errorTableLength$];
+ private static final int[] GOTO_ROW_ID = new int[STATE_COUNT];
+ private static final int[] GOTO_COLUMN_ID = new int[NONTERMINAL_COUNT];
+ private static final short[] GOTO_TABLE = new short[$gotoTableLength$];
+ private static final int[] PRODUCT_LHS = new int[PRODUCT_COUNT];
+
+ private static final short STATE_MASK = (short)0x0fff;
+ private static final short REDUCE_MASK = (short)0x8000;
+ private static final short POP_MASK = (short)0x4000;
+ private static final short PUSH_MASK = (short)0x2000;
+ private static final short ERROR_ACTION = (short)0xffff;
+ private static final short ACCEPT_ACTION = (short)0xfffe;
+
+ public static final String[] TERMINAL_NAMES = new String[] {
+$terminalNames$
+ };
+
+ public static final String[] NONTERMINAL_NAMES = new String[] {
+$nonterminalNames$
+ };
+
+ static {
+ try {
+ DataInputStream input = new DataInputStream($class$.class.getResourceAsStream("$class$.dat"));
+ for(int i=0;i<ACTION_ROW_ID.length;++i)
+ ACTION_ROW_ID[i] = input.readInt();
+ for(int i=0;i<ACTION_COLUMN_ID.length;++i)
+ ACTION_COLUMN_ID[i] = input.readInt();
+ for(int i=0;i<ACTION_TABLE.length;++i)
+ ACTION_TABLE[i] = input.readShort();
+ for(int i=0;i<ERROR_TABLE.length;++i)
+ ERROR_TABLE[i] = input.readInt();
+ for(int i=0;i<GOTO_ROW_ID.length;++i)
+ GOTO_ROW_ID[i] = input.readInt();
+ for(int i=0;i<GOTO_COLUMN_ID.length;++i)
+ GOTO_COLUMN_ID[i] = input.readInt();
+ for(int i=0;i<GOTO_TABLE.length;++i)
+ GOTO_TABLE[i] = input.readShort();
+ for(int i=0;i<PRODUCT_LHS.length;++i)
+ PRODUCT_LHS[i] = input.readInt();
+ input.close();
+ } catch(IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private static short getAction(int state, int symbol) {
+ int id = TERMINAL_COUNT*state + symbol;
+ if( ((ERROR_TABLE[id>>5] >> (id&31))&1) != 0 )
+ return ERROR_ACTION;
+ return ACTION_TABLE[ACTION_ROW_ID[state] + ACTION_COLUMN_ID[symbol]];
+ }
+
+ private static short getGoto(int state, int symbol) {
+ return GOTO_TABLE[GOTO_ROW_ID[state] + GOTO_COLUMN_ID[symbol]];
+ }
+
+ protected abstract $Token$ nextToken();
+
+ private $Symbol$[] symbolStack = new $Symbol$[INITIAL_CAPACITY];
+ private int symbolStackLength = 0;
+
+ private int[] stateStack = new int[INITIAL_CAPACITY];
+ private int[] symbolStackPositionStack = new int[INITIAL_CAPACITY];
+ private int stateStackLength = 0;
+
+ // For reduce
+ private int reductionLength;
+
+ protected int length() {
+ return reductionLength;
+ }
+
+ protected $Symbol$ get(int i) {
+ if(i < 0 || i >= reductionLength)
+ throw new IndexOutOfBoundsException();
+ return symbolStack[symbolStackLength+i];
+ }
+
+ private String parseErrorDescription(int state, $Token$ token, int tokenId) {
+ StringBuilder b = new StringBuilder();
+ b.append("Unexpected token '").append(token)
+ .append("' (").append(TERMINAL_NAMES[tokenId])
+ .append("). Expected one of ");
+ ArrayList<String> possibleTerminals = new ArrayList<String>();
+ for(int i=0;i<TERMINAL_COUNT;++i)
+ if(getAction(state, i) != ERROR_ACTION)
+ possibleTerminals.add(TERMINAL_NAMES[i]);
+ Collections.sort(possibleTerminals);
+ for(int i=0;i<possibleTerminals.size();++i) {
+ if(i > 0)
+ b.append(", ");
+ b.append(possibleTerminals.get(i));
+ }
+ b.append('.');
+ return b.toString();
+ }
+
+ protected abstract RuntimeException syntaxError($Token$ token, String description);
+
+ private static String describeAction(boolean isGoto, int action) {
+ if(action == ERROR_ACTION)
+ return "ERROR";
+ if(action == ACCEPT_ACTION)
+ return "ACCEPT";
+ StringBuilder b = new StringBuilder();
+ if(isGoto)
+ b.append("GOTO ");
+ else {
+ if((action & REDUCE_MASK) != 0) {
+ action ^= REDUCE_MASK;
+ b.append("REDUCE");
+ }
+ else
+ b.append("SHIFT");
+ }
+ if((action & POP_MASK) != 0) {
+ action ^= POP_MASK;
+ b.append(" POP");
+ }
+ if((action & PUSH_MASK) != 0) {
+ action ^= PUSH_MASK;
+ b.append(" PUSH");
+ }
+ b.append(' ').append(action);
+ return b.toString();
+ }
+
+ private void printState(int state) {
+ System.out.print("state=" + state + ":");
+ for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {
+ Object s = symbolStack[i];
+ if(s instanceof Token)
+ System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
+ else if(s == null)
+ System.out.print(" null");
+ else
+ System.out.print(" " + s.getClass().getSimpleName());
+ while(j>=0 && symbolStackPositionStack[j]==i)
+ System.out.print(" (" + stateStack[j--] + ")");
+ }
+ System.out.println();
+ }
+
+ private $Symbol$ parse(int state) {
+ while(true) {
+ $Token$ token = nextToken();
+ int tokenId = token.$tokenId$;
+ if(TRACE)
+ System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---");
+ while(true) {
+ if(TRACE)
+ printState(state);
+ short action = getAction(state, tokenId);
+ if(TRACE)
+ System.out.println(" -> action=" + describeAction(false, action));
+ //System.out.println(STATE_DESCRIPTIONS[state]);
+ if((action & REDUCE_MASK) != 0) {
+ if(action == ACCEPT_ACTION)
+ return symbolStack[symbolStackLength-1];
+ if(action == ERROR_ACTION)
+ throw syntaxError(token, parseErrorDescription(state, token, tokenId));
+ int popAmount = (action >>> 13)&3;
+ if(TRACE) {
+ if(popAmount > 0)
+ System.out.println(" POP " + popAmount);
+ }
+ stateStackLength -= popAmount;
+ action &= STATE_MASK;
+
+ int reductionBegin = symbolStackPositionStack[--stateStackLength];
+
+ reductionLength = symbolStackLength-reductionBegin;
+ symbolStackLength = reductionBegin;
+
+ if(symbolStackLength == symbolStack.length)
+ symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+ $Symbol$ symbol = reduce(action);
+ postReduce(symbol);
+ symbolStack[symbolStackLength] = symbol;
+
+ state = stateStack[stateStackLength];
+ if(TRACE) {
+ ++symbolStackLength;
+ printState(state);
+ --symbolStackLength;
+ System.out.println(" nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]);
+ }
+ action = getGoto(state, PRODUCT_LHS[action]);
+ if(TRACE)
+ System.out.println(" -> action=" + describeAction(true, action));
+
+ // Pop state
+ if((action & POP_MASK) != 0) {
+ --stateStackLength;
+ }
+ // Push state
+ if((action & PUSH_MASK) != 0) {
+ if(stateStackLength == stateStack.length) {
+ stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+ symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+ }
+ symbolStackPositionStack[stateStackLength] = symbolStackLength;
+ stateStack[stateStackLength++] = state;
+ }
+ state = action & STATE_MASK;
+ ++symbolStackLength;
+ }
+ else {
+ // Pop state
+ if((action & POP_MASK) != 0) {
+ --stateStackLength;
+ }
+ // Push state
+ if((action & PUSH_MASK) != 0) {
+ if(stateStackLength == stateStack.length) {
+ stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+ symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+ }
+ symbolStackPositionStack[stateStackLength] = symbolStackLength;
+ stateStack[stateStackLength++] = state;
+ }
+
+ // New state
+ state = action & STATE_MASK;
+
+ // Push symbol
+ if(symbolStackLength == symbolStack.length)
+ symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+ symbolStack[symbolStackLength++] = token;
+ break;
+ }
+ }
+ }
+ }
+
+$parseMethods$
+
+ protected $Symbol$ reduce(int productionId) {
+ try {
+ switch(productionId) {
+$reduceCases$
+ default:
+ throw new RuntimeException("Internal parser error.");
+ }
+ } catch($Exception$ e) {
+ StringBuilder b = new StringBuilder();
+ b.append("Failed to reduce");
+ for(int i=0;i<length();++i) {
+ Object obj = get(i);
+ b.append("\n (").append(i).append(") \"").append(obj).append('\"');
+ if(obj instanceof $Token$)
+ b.append(" (").append(TERMINAL_NAMES[(($Token$)obj).$tokenId$]).append(")");
+ else
+ b.append(" [").append(obj.getClass().getSimpleName()).append("]");
+ }
+ throw new RuntimeException(b.toString(), e);
+ }
+ }
+
+$reduceMethods$
+ protected void postReduce($Symbol$ reduced) {
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.table;
+
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.grammar.Prod;
+
+
+public class Item implements Comparable<Item> {
+ public final int production;
+ public final int position;
+ public int stackPos;
+
+ public Item(int production, int position, int stackPos) {
+ this.production = production;
+ this.position = position;
+ this.stackPos = stackPos;
+ }
+
+ public int[] nextSymbols(AnaGrammar grammar) {
+ Prod prod = grammar.prods.get(production);
+ return prod.rhs.nextStates(position);
+ }
+
+ public int nextPosition(AnaGrammar grammar, int symbol) {
+ Prod prod = grammar.prods.get(production);
+ return prod.rhs.getTransition(position, symbol);
+ }
+
+ @Override
+ public int hashCode() {
+ return (production * 31 + position) * 31 + stackPos;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null || getClass() != obj.getClass())
+ return false;
+ Item other = (Item) obj;
+ return production == other.production
+ && position == other.position
+ && stackPos == other.stackPos;
+ }
+
+ @Override
+ public int compareTo(Item o) {
+ if(production < o.production)
+ return -1;
+ if(production > o.production)
+ return 1;
+ if(position < o.position)
+ return -1;
+ if(position > o.position)
+ return 1;
+ return 0;
+ }
+
+ public String toString(AnaGrammar grammar) {
+ Prod prod = grammar.prods.get(production);
+ StringBuilder b = new StringBuilder();
+ b.append(grammar.nonterminalNames[prod.lhs]);
+ b.append(" ::= ");
+ /*prod.rhs.toRegexpTo(position).toString(b, grammar, 0);
+ b.append(" . ");
+ prod.rhs.toRegexpFrom(position).toString(b, grammar, 0);*/
+ prod.rhs.toPositionalRegexp(position).toString(b, grammar, 0);
+ if(stackPos >= 0)
+ b.append(" (stack ").append(stackPos).append(')');
+ /*for(int i=0;i<position;++i)
+ b.append(' ').append(grammar.getName(prod.rhs[i]));
+ b.append(" *");
+ for(int i=position;i<prod.rhs.length;++i)
+ b.append(' ').append(grammar.getName(prod.rhs[i]));*/
+ return b.toString();
+ }
+
+ public boolean isReducible(AnaGrammar grammar) {
+ return grammar.prods.get(production).rhs.getAccepts(position);
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.table;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+
+public class ItemSet {
+ public final Item[] items;
+ int hash;
+
+ public ItemSet(Item[] items) {
+ this.items = items;
+ Arrays.sort(items);
+ }
+
+ public ItemSet(Collection<Item> items) {
+ this(items.toArray(new Item[items.size()]));
+ }
+
+ private static final int PROD = 31*31;
+
+ @Override
+ public int hashCode() {
+ if(hash == 0) {
+ int h = 1;
+ for(Item item : items)
+ h = PROD*h + item.hashCode();
+ hash = h;
+ }
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(this == obj)
+ return true;
+ if(obj == null || obj.getClass() != getClass())
+ return false;
+ ItemSet other = (ItemSet)obj;
+ if(other.items.length != items.length)
+ return false;
+ for(int i=0;i<items.length;++i)
+ if(!items[i].equals(other.items[i]))
+ return false;
+ return true;
+ }
+
+ public String toString(AnaGrammar grammar) {
+ StringBuilder b = new StringBuilder();
+ boolean first = true;
+ for(Item item : items) {
+ if(first)
+ first = false;
+ else
+ b.append('\n');
+ b.append(" ").append(item.toString(grammar));
+ }
+ return b.toString();
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.table;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.generator.compression.CompressedParseTable;
+import org.simantics.scl.compiler.parser.generator.compression.ErrorTable;
+import org.simantics.scl.compiler.parser.generator.compression.GCCompress;
+
+public class ParseTable {
+ public final int stateCount;
+ public final int[][] actionTable;
+ public final int[][] gotoTable;
+ public final int[] productionInfo;
+ public final int[] initialStates;
+ public final String[] stateDescriptions;
+
+ public ParseTable(
+ int stateCount,
+ int[][] actionTable,
+ int[][] gotoTable,
+ int[] productionInfo,
+ int[] initialStates,
+ String[] stateDescriptions) {
+ this.stateCount = stateCount;
+ this.actionTable = actionTable;
+ this.gotoTable = gotoTable;
+ this.productionInfo = productionInfo;
+ this.initialStates = initialStates;
+ this.stateDescriptions = stateDescriptions;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder b = new StringBuilder();
+ b.append("productionInfo = ").append(Arrays.toString(productionInfo)).append('\n');
+ b.append("actions:\n");
+ for(int[] actions : actionTable) {
+ b.append(" {");
+ for(int i=0;i<actions.length;++i) {
+ if(i > 0)
+ b.append(", ");
+ b.append(actions[i]);
+ }
+ b.append("},\n");
+ }
+ b.append("gotos:\n");
+ for(int[] gotos : gotoTable) {
+ b.append(" {");
+ for(int i=0;i<gotos.length;++i) {
+ if(i > 0)
+ b.append(", ");
+ b.append(gotos[i]);
+ }
+ b.append("},\n");
+ }
+ return b.toString();
+ }
+
+ public CompressedParseTable compress() {
+ return new CompressedParseTable(
+ GCCompress.compress(actionTable),
+ ErrorTable.createErrorTable(actionTable),
+ GCCompress.compress(gotoTable),
+ productionInfo,
+ initialStates,
+ stateDescriptions);
+ }
+
+ public void writeTo(File file) throws IOException {
+ FileOutputStream stream = new FileOutputStream(file);
+ DataOutputStream output = new DataOutputStream(stream);
+ for(int[] row : actionTable)
+ for(int val : row)
+ output.writeInt(val);
+ for(int[] row : gotoTable)
+ for(int val : row)
+ output.writeInt(val);
+ for(int val : productionInfo)
+ output.writeInt(val);
+ output.close();
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.generator.table;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.grammar.Prod;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.list.array.TLongArrayList;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.map.hash.TIntObjectHashMap;
+import gnu.trove.map.hash.TLongIntHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntObjectProcedure;
+import gnu.trove.procedure.TObjectIntProcedure;
+import gnu.trove.set.hash.THashSet;
+import gnu.trove.set.hash.TIntHashSet;
+import gnu.trove.set.hash.TLongHashSet;
+
+public class ParseTableBuilder {
+ public final static int MAX_STACK_ID = 10;
+
+ private static final int STATE_MASK = 0x0fff;
+ private static final int REDUCE_MASK = 0x8000;
+ private static final int POP_MASK = 0x4000;
+ private static final int PUSH_MASK = 0x2000;
+ public static final int ERROR_ACTION = 0xffff;
+ private static final int ACCEPT_ACTION = 0xfffe;
+
+ final AnaGrammar grammar;
+ private TObjectIntHashMap<ItemSet> states = new TObjectIntHashMap<ItemSet>();
+ private ArrayList<ItemSet> itemSets = new ArrayList<ItemSet>();
+ private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
+ private ArrayList<TIntIntHashMap> stackOps = new ArrayList<TIntIntHashMap>();
+ private TIntArrayList backTransSymbols = new TIntArrayList();
+ private ArrayList<TIntArrayList> backLinks = new ArrayList<TIntArrayList>();
+ int[] initialStates;
+ TIntHashSet finalStates = new TIntHashSet();
+
+ private ParseTableBuilder(AnaGrammar grammar) {
+ this.grammar = grammar;
+ }
+
+ private static boolean isNonterminal(int symbol) {
+ return symbol < 0;
+ }
+
+ private void close(ArrayList<Item> items) {
+ THashSet<Item> itemSet = new THashSet<Item>(items);
+ for(int i=0;i<items.size();++i) {
+ Item item = items.get(i);
+ for(int nextSymbol : item.nextSymbols(grammar))
+ if(isNonterminal(nextSymbol)) {
+ nextSymbol = ~nextSymbol;
+ int pEnd = grammar.nonterminalPos[nextSymbol+1];
+ for(int pId=grammar.nonterminalPos[nextSymbol];pId<pEnd;++pId) {
+ Item newItem = new Item(pId, grammar.prods.get(pId).rhs.getInitialState(), -1);
+ if(itemSet.add(newItem))
+ items.add(newItem);
+ }
+ }
+ }
+ }
+
+ private int getState(int backTransSymbol, ArrayList<Item> items) {
+ // Create state
+ close(items);
+ final ItemSet itemSet = new ItemSet(items);
+ if(states.contains(itemSet))
+ return states.get(itemSet);
+ final int newState = states.size();
+ states.put(itemSet, newState);
+ itemSets.add(itemSet);
+ backTransSymbols.add(backTransSymbol);
+ backLinks.add(new TIntArrayList(2));
+
+ // Create transitions
+ TIntObjectHashMap<ArrayList<Item>> transitionMap = new TIntObjectHashMap<ArrayList<Item>>();
+ //close(items);
+ for(Item item : items) {
+ for(int s : item.nextSymbols(grammar)) {
+ ArrayList<Item> l = transitionMap.get(s);
+ if(l == null) {
+ l = new ArrayList<Item>();
+ transitionMap.put(s, l);
+ }
+ l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos));
+ }
+ }
+
+ final TIntIntHashMap trans = new TIntIntHashMap();
+ final TIntIntHashMap stackOpMap = new TIntIntHashMap();
+ transitions.add(trans);
+ stackOps.add(stackOpMap);
+ if(transitionMap.remove(grammar.terminalNames.length-1)!=null) {
+ finalStates.add(newState);
+ }
+ transitionMap.forEachEntry(new TIntObjectProcedure<ArrayList<Item>>() {
+ @Override
+ public boolean execute(int a, ArrayList<Item> b) {
+ boolean stackShift = false;
+ int minStackPos = Integer.MAX_VALUE;
+ for(Item item : b) {
+ if(item.stackPos == -1)
+ stackShift = true;
+ else
+ minStackPos = Math.min(minStackPos, item.stackPos);
+ }
+ int stackOp = 0;
+ if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) {
+ stackOp |= POP_MASK;
+ //System.out.println("minStackPos = " + minStackPos);
+ for(Item item : b)
+ if(item.stackPos >= 0)
+ --item.stackPos;
+ }
+ boolean stackOverflow = false;
+ if(stackShift) {
+ stackOp |= PUSH_MASK;
+ for(Item item : b) {
+ ++item.stackPos;
+ if(item.stackPos > MAX_STACK_ID)
+ stackOverflow = true;
+ }
+ }
+ stackOpMap.put(a, stackOp);
+ System.out.println(newState + " " + grammar.getName(a) + " " + stackOp);
+
+ if(stackOverflow) {
+ System.err.println("Stack overflow when following " + grammar.getName(a) + " at");
+ System.err.println(itemSet.toString(grammar));
+ }
+ else {
+ int state = getState(a, b);
+ trans.put(a, state);
+ backLinks.get(state).add(newState);
+ }
+ return true;
+ }
+
+ });
+ return newState;
+ }
+
+ TLongArrayList sMap = new TLongArrayList();
+ TLongIntHashMap sMapInv = new TLongIntHashMap();
+ TIntHashSet[] follow;
+
+ private static int getState(long s) {
+ return (int)(s >> 32);
+ }
+
+ private static int getSymbol(long s) {
+ return (int)s;
+ }
+
+ private static long getS(int state, int symbol) {
+ return (((long)state) << 32) | (long)symbol;
+ }
+
+ private void computeFollow() {
+ for(int i=0;i<itemSets.size();++i) {
+ final int source = i;
+ transitions.get(i).forEachEntry(new TIntIntProcedure() {
+ @Override
+ public boolean execute(int symbol, int target) {
+ if(symbol < 0) {
+ long s = getS(source, ~symbol);
+ int id = sMap.size();
+ sMap.add(s);
+ sMapInv.put(s, id);
+ }
+ return true;
+ }
+ });
+ }
+
+ // initfollow
+ follow = new TIntHashSet[sMap.size()];
+ final TIntHashSet[] gread = new TIntHashSet[follow.length];
+ final TIntHashSet[] gla = new TIntHashSet[sMap.size()];
+ for(int i=0;i<follow.length;++i) {
+ gread[i] = new TIntHashSet();
+ gla[i] = new TIntHashSet();
+ }
+ for(int i=0;i<follow.length;++i) {
+ final int id = i;
+ long s = sMap.get(i);
+ int source = getState(s);
+ int symbol = getSymbol(s);
+ final int target = transitions.get(source).get(~symbol);
+ final TIntHashSet drSet = new TIntHashSet();
+ transitions.get(target).forEachEntry(new TIntIntProcedure() {
+ @Override
+ public boolean execute(int symbol2, int target2) {
+ if(symbol2 >= 0)
+ drSet.add(symbol2);
+ else if(grammar.nullable[~symbol2])
+ gread[sMapInv.get(getS(target, ~symbol2))].add(id);
+ return true;
+ }
+ });
+ if(finalStates.contains(target))
+ drSet.add(grammar.terminalNames.length-1);
+ follow[i] = drSet;
+
+ ItemSet set = itemSets.get(target);
+ for(Item targetItem : set.items) {
+ Prod prod = grammar.prods.get(targetItem.production);
+ if(grammar.almostAccepts(prod.rhs, targetItem.position)) {
+ for(Item sourceItem : itemSets.get(source).items) {
+ if(sourceItem.production == targetItem.production &&
+ prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) {
+ TLongHashSet visited = new TLongHashSet();
+ traceBack(gla, id, visited, source, sourceItem);
+ }
+ }
+
+ }
+ }
+ }
+ //System.out.println("follow: " + Arrays.toString(follow));
+ //System.out.println("gread: " + Arrays.toString(gread));
+ //System.out.println("gla: " + Arrays.toString(gla));
+ AnaGrammar.gclose(follow, gread);
+ AnaGrammar.gclose(follow, gla);
+
+ /*System.out.println("Gla:");
+ for(int i=0;i<gla.length;++i) {
+ int iState = getState(sMap.get(i));
+ int iSymbol = getSymbol(sMap.get(i));
+ for(int j : gla[i].toArray()) {
+ int jState = getState(sMap.get(j));
+ int jSymbol = getSymbol(sMap.get(j));
+ System.out.println("-- from --");
+ System.out.println(itemSets.get(iState).toString(grammar));
+ System.out.println(" symbol: " + grammar.nonterminalNames[iSymbol]);
+ System.out.println("-- to --");
+ System.out.println(itemSets.get(jState).toString(grammar));
+ System.out.println(" symbol: " + grammar.nonterminalNames[jSymbol]);
+ }
+ }*/
+
+ /*for(int i=0;i<follow.length;++i) {
+ long s = sMap.get(i);
+ int source = getState(s);
+ int symbol = getSymbol(s);
+ System.out.println("------------------------------");
+ System.out.println(itemSets.get(source).toString(grammar));
+ System.out.println("Symbol: " + grammar.nonterminalNames[symbol]);
+ System.out.print("Follow:");
+ for(int sym : follow[i].toArray())
+ System.out.print(" " + grammar.terminalNames[sym]);
+ System.out.println();
+ }*/
+ }
+
+ private void traceBack(TIntHashSet[] gla, int initialId, TLongHashSet visited, int state, Item item) {
+ if(visited.add( (((long)state)<<32) | (long)item.position )) {
+ Prod prod = grammar.prods.get(item.production);
+ if(item.stackPos == -1) {
+ int id = sMapInv.get(getS(state, prod.lhs));
+ gla[id].add(initialId);
+ }
+
+ int backTransSymbol = backTransSymbols.get(state);
+ for(int prevState : backLinks.get(state).toArray())
+ for(Item prevItem : itemSets.get(prevState).items)
+ if(prevItem.production == item.production &&
+ prod.rhs.getTransition(prevItem.position, backTransSymbol) == item.position)
+ traceBack(gla, initialId, visited, prevState, prevItem);
+ }
+ }
+
+ private void lookback( TLongHashSet visited, TIntHashSet la, int production, int state, int position) {
+ if(visited.add( (((long)state)<<32) | (long)position )) {
+ int backTransSymbol = backTransSymbols.get(state);
+ Prod prod = grammar.prods.get(production);
+ boolean mightBeInitial = prod.rhs.getTransition(prod.rhs.getInitialState(), backTransSymbol) == position;
+ for(int prevState : backLinks.get(state).toArray()) {
+ for(Item item : itemSets.get(prevState).items) {
+ if(item.production == production &&
+ prod.rhs.getTransition(item.position, backTransSymbol) == position)
+ lookback(visited, la, production, prevState, item.position);
+ if(mightBeInitial && grammar.prods.get(item.production).rhs.getTransition(item.position, ~prod.lhs) >= 0) {
+ int id = sMapInv.get(getS(prevState, prod.lhs));
+ la.addAll(follow[id]);
+ }
+ }
+ }
+ }
+ }
+
+ private void createReduceActions() {
+ computeFollow();
+ for(int i=0;i<itemSets.size();++i) {
+ TIntIntHashMap trans = transitions.get(i);
+ if(finalStates.contains(i))
+ trans.put(grammar.terminalNames.length-1, ACCEPT_ACTION);
+
+ TIntObjectHashMap<TIntHashSet> laMap = new TIntObjectHashMap<TIntHashSet>();
+ TIntIntHashMap stackPosMap = new TIntIntHashMap();
+ for(Item item : itemSets.get(i).items) {
+ Prod prod = grammar.prods.get(item.production);
+ if(prod.rhs.getAccepts(item.position)) {
+ TIntHashSet la = laMap.get(item.production);
+ if(la == null) {
+ la = new TIntHashSet();
+ laMap.put(item.production, la);
+ }
+
+ TLongHashSet visited = new TLongHashSet();
+ lookback(visited, la, item.production, i, item.position);
+
+ if(stackPosMap.containsKey(item.production)) {
+ stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice
+ }
+ else
+ stackPosMap.put(item.production, item.stackPos);
+ }
+ }
+
+ // Create transitions
+ for(int production : laMap.keys()) {
+ int stackPos = 0; //stackPosMap.get(production);
+ TIntHashSet la = laMap.get(production);
+ for(int symbol : la.toArray()) {
+ if(trans.contains(symbol)) {
+ int oldAction = trans.get(symbol);
+ if(oldAction >= 0) {
+ Prod prod = grammar.prods.get(production);
+ if(prod.annotations.containsKey(symbol)) {
+ byte v = prod.annotations.get(symbol);
+ if(v == 1)
+ trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
+ }
+ else {
+ System.err.println("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
+ System.err.println(itemSets.get(i).toString(grammar));
+ }
+ }
+ else {
+ System.err.println("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
+ System.err.println(itemSets.get(i).toString(grammar));
+ }
+ }
+ else
+ trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
+ }
+ }
+
+ // Check stacking conflicts
+ /*trans.forEachEntry(new TIntIntProcedure() {
+ @Override
+ public boolean execute(int a, int b) {
+ if(b >= 0) {
+ boolean kernelState = false;
+ boolean nonkernelState = false;
+ for(Item item : itemSets.get(b).items) {
+ Prod prod = grammar.prods.get(item.production);
+ if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a))
+ nonkernelState = true;
+ else if(item.position != prod.rhs.getInitialState())
+ kernelState = true;
+ }
+
+
+ if(kernelState && nonkernelState) {
+ System.err.println("Stacking conflict when following " + grammar.getName(a) + " to");
+ System.err.println(itemSets.get(b).toString(grammar));
+ }
+ }
+ return true;
+ }
+ });*/
+ }
+ }
+
+ public static ParseTable build(AnaGrammar grammar) {
+ ParseTableBuilder builder = new ParseTableBuilder(grammar);
+
+ builder.initialStates = new int[grammar.initialNonterminals.length];
+ for(int i=0;i<grammar.initialNonterminals.length;++i) {
+ ArrayList<Item> seed = new ArrayList<Item>();
+ int prodId = grammar.prods.size()-i-1;
+ seed.add(new Item(prodId,
+ grammar.prods.get(prodId).rhs.getInitialState(), 0));
+ builder.initialStates[i] = builder.getState(REDUCE_MASK, seed);
+ }
+
+ builder.createReduceActions();
+
+ System.out.println("States: " + builder.itemSets.size());
+
+ //builder.visualize();
+
+ builder.printParseTable();
+ return builder.getParseTable();
+ }
+
+ private ParseTable getParseTable() {
+ int[] productionInfo = new int[grammar.prods.size()];
+ for(int i=0;i<productionInfo.length;++i) {
+ Prod prod = grammar.prods.get(i);
+ productionInfo[i] = prod.lhs;
+ }
+
+ int[][] actionTable = new int[transitions.size()][];
+ int[][] gotoTable = new int[transitions.size()][];
+ for(int i=0;i<transitions.size();++i) {
+ final int[] actions = new int[grammar.terminalNames.length];
+ Arrays.fill(actions, ERROR_ACTION);
+ actionTable[i] = actions;
+ final int[] gotos = new int[grammar.nonterminalNames.length];
+ Arrays.fill(gotos, ERROR_ACTION);
+ gotoTable[i] = gotos;
+ final TIntIntHashMap stackOpMap = stackOps.get(i);
+ transitions.get(i).forEachEntry(new TIntIntProcedure() {
+ @Override
+ public boolean execute(int a, int b) {
+ int action = b | stackOpMap.get(a);
+ if(a >= 0)
+ actions[a] = action;
+ else
+ gotos[~a] = action;
+ return true;
+ }
+ });
+ }
+
+ String[] stateDescriptions = new String[itemSets.size()];
+ for(int i=0;i<stateDescriptions.length;++i) {
+ final StringBuilder b = new StringBuilder();
+ b.append(itemSets.get(i).toString(grammar));
+ transitions.get(i).forEachEntry(new TIntIntProcedure() {
+ @Override
+ public boolean execute(int symbol, int action) {
+ if(symbol >= 0) {
+ b.append("\n ").append(grammar.terminalNames[symbol]).append(" ->");
+ if((action & REDUCE_MASK) == 0) {
+ if((action & POP_MASK) != 0)
+ b.append(" POP");
+ if((action & PUSH_MASK) != 0)
+ b.append(" PUSH");
+ b.append(" SHIFT(").append(action&STATE_MASK).append(")");
+ }
+ else {
+ if(action == 0xfffffffe)
+ b.append(" ACCEPT");
+ else
+ b.append(" REDUCE(").append(action&STATE_MASK).append(")");
+ }
+ }
+ else {
+ b.append("\n ").append(grammar.nonterminalNames[~symbol]).append(" ->")
+ .append(" GOTO(").append(action).append(")");
+ }
+ return true;
+ }
+ });
+ stateDescriptions[i] = b.toString();
+ }
+
+ //printParseTable();
+ return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo,
+ initialStates, stateDescriptions);
+ }
+
+ private void printParseTable() {
+ final ItemSet[] stateSets = new ItemSet[states.size()];
+ states.forEachEntry(new TObjectIntProcedure<ItemSet>() {
+ @Override
+ public boolean execute(ItemSet a, int b) {
+ stateSets[b] = a;
+ return true;
+ }
+ });
+ for(int i=0;i<stateSets.length;++i) {
+ System.out.println("--- State " + i + " ---");
+ System.out.println(stateSets[i].toString(grammar));
+ final TIntIntHashMap stackOp = stackOps.get(i);
+ transitions.get(i).forEachEntry(new TIntIntProcedure() {
+ @Override
+ public boolean execute(int a, int b) {
+ int sOp = stackOp.get(a);
+ System.out.print(grammar.getName(a) + " -> ");
+ if(sOp != 0) {
+ System.out.print("[");
+ if((sOp & PUSH_MASK) != 0) {
+ sOp ^= PUSH_MASK;
+ System.out.print("PUSH ");
+ }
+ if((sOp & POP_MASK) != 0) {
+ sOp ^= POP_MASK;
+ System.out.print("POP ");
+ }
+ if(sOp != 0)
+ System.out.print(sOp);
+ System.out.print("] ");
+ }
+ if((b & REDUCE_MASK) != 0) {
+ b ^= REDUCE_MASK;
+ System.out.println("reduce " + b); // grammar.prods.get(~b).toString(grammar));
+ }
+ else {
+ System.out.println("shift " + b);
+ }
+ return true;
+ }
+ });
+ }
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.grammar;
+
+import org.simantics.scl.compiler.parser.regexp.Namer;
+
+public class Grammar implements Namer {
+ public final Production[] productions;
+ public final String[] terminalNames;
+ public final String[] nonterminalNames;
+ public final int[] initialNonterminals;
+
+ public Grammar(Production[] productions, String[] terminalNames,
+ String[] nonterminalNames, int[] initialNonterminals) {
+ this.productions = productions;
+ this.terminalNames = terminalNames;
+ this.nonterminalNames = nonterminalNames;
+ this.initialNonterminals = initialNonterminals;
+ }
+
+ public String getName(int symbolId) {
+ if(symbolId >= 0)
+ return terminalNames[symbolId];
+ else
+ return nonterminalNames[~symbolId];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder b = new StringBuilder();
+ for(Production prod : productions) {
+ b.append(prod.toString(this));
+ b.append("\n");
+ }
+ /*for(int i=0;i<terminalNames.length;++i)
+ b.append(terminalNames[i]).append(' ').append(i).append('\n');*/
+ return b.toString();
+ }
+
+ public void check() {
+ int[] prodCount = new int[nonterminalNames.length];
+ for(Production prod : productions)
+ ++prodCount[~prod.lhs];
+ for(int i=0;i<nonterminalNames.length;++i)
+ if(prodCount[i]==0)
+ System.err.println("Nonterminal " + nonterminalNames[i] + " does not have productions.");
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.grammar;
+
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+import gnu.trove.map.hash.TIntByteHashMap;
+
+public class Production {
+ public final String name;
+ public int lhs;
+ public final Regexp rhs;
+ public final TIntByteHashMap annotations;
+
+ public Production(String name, int lhs, Regexp rhs,
+ TIntByteHashMap annotations) {
+ this.name = name;
+ this.lhs = lhs;
+ this.rhs = rhs;
+ this.annotations = annotations;
+ }
+
+ public String toString(Grammar grammar) {
+ StringBuilder b = new StringBuilder();
+ b.append(grammar.getName(lhs));
+ b.append(" = ");
+ rhs.toString(b, grammar, 0);
+ return b.toString();
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import java.io.File;
+
+import org.simantics.scl.compiler.parser.generator.ParserGenerator;
+
+public class GenerateParserParser {
+
+ public static void main(String[] args) throws Exception {
+ File plugin = new File(GenerateParserParser.class.getResource(".").getPath().replace("%20", " "))
+ .getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile();
+ System.out.println(plugin);
+ ParserGenerator.createParser(
+ "org.simantics.scl.copiler.parser.grammar.input",
+ "RuntimeException",
+ new File(plugin, "src/org/simantics/scl/compiler/parser/grammar/input/Grammar.grammar"));
+ }
+
+}
--- /dev/null
+initial file ;
+
+file
+ = declaration+ # File
+ ;
+
+declaration
+ = NONTERMINAL EQUALS prod (BAR prod)* SEMICOLON # Production
+ | INITIAL NONTERMINAL SEMICOLON # Initial
+ ;
+
+prod
+ = regexps HASH TERMINAL (COMMA (SHIFT|REDUCE) TERMINAL)* # ProductionRhs
+ ;
+
+regexps
+ = (regexp (regexp | STAR | PLUS | OPTIONAL)*)? # Concatenation
+ ;
+
+regexp
+ = (TERMINAL | NONTERMINAL | SHIFT | REDUCE | INITIAL) # Terminal
+ | LPAREN regexps (BAR regexps)* RPAREN # Union
+ ;
--- /dev/null
+package org.simantics.parser.grammar.input2;
+
+import org.simantics.parser.grammar.input2.Token;
+
+%%
+
+%public
+%char
+%unicode
+%class GrammarLexer
+%function nextToken
+%type Token
+%yylexthrow RuntimeException
+%scanerror RuntimeException
+%eofval{
+ return sym(GrammarTerminals.EOF);
+%eofval}
+
+%{
+ private Token sym(int id) {
+ return new Token(id, yychar, yychar+yylength(), yytext());
+ }
+%}
+
+
+idChar = [a-zA-Z0-9_]
+nonterminal = [a-z]{idChar}*
+terminal = [A-Z]{idChar}*
+
+whitespace = [ \n\r\t]+
+c_comment = "//" [^\n\r]*
+cpp_comment = "/*" ~"*/"
+
+%%
+
+<YYINITIAL> {
+ {c_comment} { }
+ {cpp_comment} { }
+ "=" { return sym(GrammarTerminals.EQUALS); }
+ "|" { return sym(GrammarTerminals.BAR); }
+ ";" { return sym(GrammarTerminals.SEMICOLON); }
+ "#" { return sym(GrammarTerminals.HASH); }
+ "," { return sym(GrammarTerminals.COMMA); }
+ "(" { return sym(GrammarTerminals.LPAREN); }
+ ")" { return sym(GrammarTerminals.RPAREN); }
+ "*" { return sym(GrammarTerminals.STAR); }
+ "+" { return sym(GrammarTerminals.PLUS); }
+ "?" { return sym(GrammarTerminals.OPTIONAL); }
+ initial { return sym(GrammarTerminals.INITIAL); }
+ shift { return sym(GrammarTerminals.SHIFT); }
+ reduce { return sym(GrammarTerminals.REDUCE); }
+ {terminal} { return sym(GrammarTerminals.TERMINAL); }
+ {nonterminal} { return sym(GrammarTerminals.NONTERMINAL); }
+ {whitespace} { }
+ . { throw new RuntimeException("Illegal character '" + yytext() + "'."); }
+}
--- /dev/null
+/* The following code was generated by JFlex 1.6.1 */
+
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import org.simantics.scl.compiler.parser.grammar.input.Token;
+
+
+/**
+ * This class is a scanner generated by
+ * <a href="http://www.jflex.de/">JFlex</a> 1.6.1
+ * from the specification file <tt>C:/Users/hannu/git/scl/org.simantics.parser/src/org/simantics/parser/grammar/input2/GrammarLexer.flex</tt>
+ */
+public class GrammarLexer {
+
+ /** This character denotes the end of file */
+ public static final int YYEOF = -1;
+
+ /** initial size of the lookahead buffer */
+ private static final int ZZ_BUFFERSIZE = 16384;
+
+ /** lexical states */
+ public static final int YYINITIAL = 0;
+
+ /**
+ * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ * at the beginning of a line
+ * l is of the form l = 2*k, k a non negative integer
+ */
+ private static final int ZZ_LEXSTATE[] = {
+ 0, 0
+ };
+
+ /**
+ * Translates characters to character classes
+ */
+ private static final String ZZ_CMAP_PACKED =
+ "\11\0\1\4\1\6\1\36\1\36\1\6\22\0\1\4\2\0\1\13"+
+ "\4\0\1\15\1\16\1\7\1\17\1\14\2\0\1\5\12\1\1\0"+
+ "\1\12\1\0\1\10\1\0\1\20\1\0\32\3\4\0\1\1\1\0"+
+ "\1\24\1\2\1\35\1\33\1\32\1\30\1\2\1\27\1\21\2\2"+
+ "\1\25\1\2\1\22\3\2\1\31\1\26\1\23\1\34\5\2\1\0"+
+ "\1\11\10\0\1\36\u1fa2\0\1\36\1\36\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\udfe6\0";
+
+ /**
+ * Translates characters to character classes
+ */
+ private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+ /**
+ * Translates DFA states to action switch labels.
+ */
+ private static final int [] ZZ_ACTION = zzUnpackAction();
+
+ private static final String ZZ_ACTION_PACKED_0 =
+ "\1\0\1\1\1\2\1\3\1\4\1\1\1\5\1\6"+
+ "\1\7\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+
+ "\3\2\1\4\1\0\3\2\1\0\3\2\1\4\4\2"+
+ "\1\17\2\2\1\20\1\21";
+
+ private static int [] zzUnpackAction() {
+ int [] result = new int[38];
+ int offset = 0;
+ offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackAction(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ do result[j++] = value; while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /**
+ * Translates a state to a row index in the transition table
+ */
+ private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
+
+ private static final String ZZ_ROWMAP_PACKED_0 =
+ "\0\0\0\37\0\76\0\135\0\174\0\233\0\37\0\37"+
+ "\0\37\0\37\0\37\0\37\0\37\0\37\0\37\0\37"+
+ "\0\272\0\331\0\370\0\u0117\0\u0136\0\u0155\0\u0174\0\u0193"+
+ "\0\u01b2\0\u01d1\0\u01f0\0\u020f\0\37\0\u022e\0\u024d\0\u026c"+
+ "\0\u028b\0\76\0\u02aa\0\u02c9\0\76\0\76";
+
+ private static int [] zzUnpackRowMap() {
+ int [] result = new int[38];
+ int offset = 0;
+ offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackRowMap(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int high = packed.charAt(i++) << 16;
+ result[j++] = high | packed.charAt(i++);
+ }
+ return j;
+ }
+
+ /**
+ * The transition table of the DFA
+ */
+ private static final int [] ZZ_TRANS = zzUnpackTrans();
+
+ private static final String ZZ_TRANS_PACKED_0 =
+ "\2\2\1\3\1\4\1\5\1\6\1\5\1\7\1\10"+
+ "\1\11\1\12\1\13\1\14\1\15\1\16\1\17\1\20"+
+ "\1\21\4\3\1\22\2\3\1\23\4\3\41\0\3\3"+
+ "\15\0\15\3\2\0\3\4\15\0\15\4\5\0\1\5"+
+ "\1\0\1\5\35\0\1\24\1\0\1\25\30\0\3\3"+
+ "\15\0\1\3\1\26\13\3\2\0\3\3\15\0\6\3"+
+ "\1\27\6\3\2\0\3\3\15\0\11\3\1\30\3\3"+
+ "\1\0\6\24\1\0\30\24\7\25\1\31\27\25\1\0"+
+ "\3\3\15\0\1\32\14\3\2\0\3\3\15\0\1\33"+
+ "\14\3\2\0\3\3\15\0\12\3\1\34\2\3\1\0"+
+ "\5\25\1\35\1\25\1\31\27\25\1\0\3\3\15\0"+
+ "\2\3\1\36\12\3\2\0\3\3\15\0\7\3\1\37"+
+ "\5\3\2\0\3\3\15\0\13\3\1\40\1\3\2\0"+
+ "\3\3\15\0\1\41\14\3\2\0\3\3\15\0\2\3"+
+ "\1\42\12\3\2\0\3\3\15\0\14\3\1\43\2\0"+
+ "\3\3\15\0\3\3\1\44\11\3\2\0\3\3\15\0"+
+ "\11\3\1\45\3\3\2\0\3\3\15\0\4\3\1\46"+
+ "\10\3\1\0";
+
+ private static int [] zzUnpackTrans() {
+ int [] result = new int[744];
+ int offset = 0;
+ offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackTrans(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ value--;
+ do result[j++] = value; while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /* error codes */
+ private static final int ZZ_UNKNOWN_ERROR = 0;
+ private static final int ZZ_NO_MATCH = 1;
+ private static final int ZZ_PUSHBACK_2BIG = 2;
+
+ /* error messages for the codes above */
+ private static final String ZZ_ERROR_MSG[] = {
+ "Unknown internal scanner error",
+ "Error: could not match input",
+ "Error: pushback value was too large"
+ };
+
+ /**
+ * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+ */
+ private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+ private static final String ZZ_ATTRIBUTE_PACKED_0 =
+ "\1\0\1\11\4\1\12\11\4\1\1\0\3\1\1\0"+
+ "\3\1\1\11\11\1";
+
+ private static int [] zzUnpackAttribute() {
+ int [] result = new int[38];
+ int offset = 0;
+ offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackAttribute(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ do result[j++] = value; while (--count > 0);
+ }
+ return j;
+ }
+
+ /** the input device */
+ private java.io.Reader zzReader;
+
+ /** the current state of the DFA */
+ private int zzState;
+
+ /** the current lexical state */
+ private int zzLexicalState = YYINITIAL;
+
+ /** this buffer contains the current text to be matched and is
+ the source of the yytext() string */
+ private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+ /** the textposition at the last accepting state */
+ private int zzMarkedPos;
+
+ /** the current text position in the buffer */
+ private int zzCurrentPos;
+
+ /** startRead marks the beginning of the yytext() string in the buffer */
+ private int zzStartRead;
+
+ /** endRead marks the last character in the buffer, that has been read
+ from input */
+ private int zzEndRead;
+
+ /** number of newlines encountered up to the start of the matched text */
+ private int yyline;
+
+ /** the number of characters up to the start of the matched text */
+ private int yychar;
+
+ /**
+ * the number of characters from the last newline up to the start of the
+ * matched text
+ */
+ private int yycolumn;
+
+ /**
+ * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ */
+ private boolean zzAtBOL = true;
+
+ /** zzAtEOF == true <=> the scanner is at the EOF */
+ private boolean zzAtEOF;
+
+ /** denotes if the user-EOF-code has already been executed */
+ private boolean zzEOFDone;
+
+ /**
+ * The number of occupied positions in zzBuffer beyond zzEndRead.
+ * When a lead/high surrogate has been read from the input stream
+ * into the final zzBuffer position, this will have a value of 1;
+ * otherwise, it will have a value of 0.
+ */
+ private int zzFinalHighSurrogate = 0;
+
+ /* user code: */
+ private Token sym(int id) {
+ return new Token(id, yychar, yychar+yylength(), yytext());
+ }
+
+
+ /**
+ * Creates a new scanner
+ *
+ * @param in the java.io.Reader to read input from.
+ */
+ public GrammarLexer(java.io.Reader in) {
+ this.zzReader = in;
+ }
+
+
+ /**
+ * Unpacks the compressed character translation table.
+ *
+ * @param packed the packed character translation table
+ * @return the unpacked character translation table
+ */
+ private static char [] zzUnpackCMap(String packed) {
+ char [] map = new char[0x110000];
+ int i = 0; /* index in packed string */
+ int j = 0; /* index in unpacked array */
+ while (i < 146) {
+ int count = packed.charAt(i++);
+ char value = packed.charAt(i++);
+ do map[j++] = value; while (--count > 0);
+ }
+ return map;
+ }
+
+
+ /**
+ * Refills the input buffer.
+ *
+ * @return <code>false</code>, iff there was new input.
+ *
+ * @exception java.io.IOException if any I/O-Error occurs
+ */
+ private boolean zzRefill() throws java.io.IOException {
+
+ /* first: make room (if you can) */
+ if (zzStartRead > 0) {
+ zzEndRead += zzFinalHighSurrogate;
+ zzFinalHighSurrogate = 0;
+ System.arraycopy(zzBuffer, zzStartRead,
+ zzBuffer, 0,
+ zzEndRead-zzStartRead);
+
+ /* translate stored positions */
+ zzEndRead-= zzStartRead;
+ zzCurrentPos-= zzStartRead;
+ zzMarkedPos-= zzStartRead;
+ zzStartRead = 0;
+ }
+
+ /* is the buffer big enough? */
+ if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) {
+ /* if not: blow it up */
+ char newBuffer[] = new char[zzBuffer.length*2];
+ System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+ zzBuffer = newBuffer;
+ zzEndRead += zzFinalHighSurrogate;
+ zzFinalHighSurrogate = 0;
+ }
+
+ /* fill the buffer with new input */
+ int requested = zzBuffer.length - zzEndRead;
+ int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+ /* not supposed to occur according to specification of java.io.Reader */
+ if (numRead == 0) {
+ throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+ }
+ if (numRead > 0) {
+ zzEndRead += numRead;
+ /* If numRead == requested, we might have requested to few chars to
+ encode a full Unicode character. We assume that a Reader would
+ otherwise never return half characters. */
+ if (numRead == requested) {
+ if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+ --zzEndRead;
+ zzFinalHighSurrogate = 1;
+ }
+ }
+ /* potentially more input available */
+ return false;
+ }
+
+ /* numRead < 0 ==> end of stream */
+ return true;
+ }
+
+
+ /**
+ * Closes the input stream.
+ */
+ public final void yyclose() throws java.io.IOException {
+ zzAtEOF = true; /* indicate end of file */
+ zzEndRead = zzStartRead; /* invalidate buffer */
+
+ if (zzReader != null)
+ zzReader.close();
+ }
+
+
+ /**
+ * Resets the scanner to read from a new input stream.
+ * Does not close the old reader.
+ *
+ * All internal variables are reset, the old input stream
+ * <b>cannot</b> be reused (internal buffer is discarded and lost).
+ * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+ *
+ * Internal scan buffer is resized down to its initial length, if it has grown.
+ *
+ * @param reader the new input stream
+ */
+ public final void yyreset(java.io.Reader reader) {
+ zzReader = reader;
+ zzAtBOL = true;
+ zzAtEOF = false;
+ zzEOFDone = false;
+ zzEndRead = zzStartRead = 0;
+ zzCurrentPos = zzMarkedPos = 0;
+ zzFinalHighSurrogate = 0;
+ yyline = yychar = yycolumn = 0;
+ zzLexicalState = YYINITIAL;
+ if (zzBuffer.length > ZZ_BUFFERSIZE)
+ zzBuffer = new char[ZZ_BUFFERSIZE];
+ }
+
+
+ /**
+ * Returns the current lexical state.
+ */
+ public final int yystate() {
+ return zzLexicalState;
+ }
+
+
+ /**
+ * Enters a new lexical state
+ *
+ * @param newState the new lexical state
+ */
+ public final void yybegin(int newState) {
+ zzLexicalState = newState;
+ }
+
+
+ /**
+ * Returns the text matched by the current regular expression.
+ */
+ public final String yytext() {
+ return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+ }
+
+
+ /**
+ * Returns the character at position <tt>pos</tt> from the
+ * matched text.
+ *
+ * It is equivalent to yytext().charAt(pos), but faster
+ *
+ * @param pos the position of the character to fetch.
+ * A value from 0 to yylength()-1.
+ *
+ * @return the character at position pos
+ */
+ public final char yycharat(int pos) {
+ return zzBuffer[zzStartRead+pos];
+ }
+
+
+ /**
+ * Returns the length of the matched text region.
+ */
+ public final int yylength() {
+ return zzMarkedPos-zzStartRead;
+ }
+
+
+ /**
+ * Reports an error that occured while scanning.
+ *
+ * In a wellformed scanner (no or only correct usage of
+ * yypushback(int) and a match-all fallback rule) this method
+ * will only be called with things that "Can't Possibly Happen".
+ * If this method is called, something is seriously wrong
+ * (e.g. a JFlex bug producing a faulty scanner etc.).
+ *
+ * Usual syntax/scanner level error handling should be done
+ * in error fallback rules.
+ *
+ * @param errorCode the code of the errormessage to display
+ */
+ private void zzScanError(int errorCode) throws RuntimeException {
+ String message;
+ try {
+ message = ZZ_ERROR_MSG[errorCode];
+ }
+ catch (ArrayIndexOutOfBoundsException e) {
+ message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+ }
+
+ throw new RuntimeException(message);
+ }
+
+
+ /**
+ * Pushes the specified amount of characters back into the input stream.
+ *
+ * They will be read again by then next call of the scanning method
+ *
+ * @param number the number of characters to be read again.
+ * This number must not be greater than yylength()!
+ */
+ public void yypushback(int number) throws RuntimeException {
+ if ( number > yylength() )
+ zzScanError(ZZ_PUSHBACK_2BIG);
+
+ zzMarkedPos -= number;
+ }
+
+
+ /**
+ * Resumes scanning until the next regular expression is matched,
+ * the end of input is encountered or an I/O-Error occurs.
+ *
+ * @return the next token
+ * @exception java.io.IOException if any I/O-Error occurs
+ */
+ public Token nextToken() throws java.io.IOException, RuntimeException, RuntimeException {
+ int zzInput;
+ int zzAction;
+
+ // cached fields:
+ int zzCurrentPosL;
+ int zzMarkedPosL;
+ int zzEndReadL = zzEndRead;
+ char [] zzBufferL = zzBuffer;
+ char [] zzCMapL = ZZ_CMAP;
+
+ int [] zzTransL = ZZ_TRANS;
+ int [] zzRowMapL = ZZ_ROWMAP;
+ int [] zzAttrL = ZZ_ATTRIBUTE;
+
+ while (true) {
+ zzMarkedPosL = zzMarkedPos;
+
+ yychar+= zzMarkedPosL-zzStartRead;
+
+ zzAction = -1;
+
+ zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+
+ zzState = ZZ_LEXSTATE[zzLexicalState];
+
+ // set up zzAction for empty match case:
+ int zzAttributes = zzAttrL[zzState];
+ if ( (zzAttributes & 1) == 1 ) {
+ zzAction = zzState;
+ }
+
+
+ zzForAction: {
+ while (true) {
+
+ if (zzCurrentPosL < zzEndReadL) {
+ zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
+ zzCurrentPosL += Character.charCount(zzInput);
+ }
+ else if (zzAtEOF) {
+ zzInput = YYEOF;
+ break zzForAction;
+ }
+ else {
+ // store back cached positions
+ zzCurrentPos = zzCurrentPosL;
+ zzMarkedPos = zzMarkedPosL;
+ boolean eof = zzRefill();
+ // get translated positions and possibly new buffer
+ zzCurrentPosL = zzCurrentPos;
+ zzMarkedPosL = zzMarkedPos;
+ zzBufferL = zzBuffer;
+ zzEndReadL = zzEndRead;
+ if (eof) {
+ zzInput = YYEOF;
+ break zzForAction;
+ }
+ else {
+ zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
+ zzCurrentPosL += Character.charCount(zzInput);
+ }
+ }
+ int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
+ if (zzNext == -1) break zzForAction;
+ zzState = zzNext;
+
+ zzAttributes = zzAttrL[zzState];
+ if ( (zzAttributes & 1) == 1 ) {
+ zzAction = zzState;
+ zzMarkedPosL = zzCurrentPosL;
+ if ( (zzAttributes & 8) == 8 ) break zzForAction;
+ }
+
+ }
+ }
+
+ // store back cached position
+ zzMarkedPos = zzMarkedPosL;
+
+ if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+ zzAtEOF = true;
+ { return sym(GrammarTerminals.EOF);
+ }
+ }
+ else {
+ switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+ case 1:
+ { throw new RuntimeException("Illegal character '" + yytext() + "'.");
+ }
+ case 18: break;
+ case 2:
+ { return sym(GrammarTerminals.NONTERMINAL);
+ }
+ case 19: break;
+ case 3:
+ { return sym(GrammarTerminals.TERMINAL);
+ }
+ case 20: break;
+ case 4:
+ {
+ }
+ case 21: break;
+ case 5:
+ { return sym(GrammarTerminals.STAR);
+ }
+ case 22: break;
+ case 6:
+ { return sym(GrammarTerminals.EQUALS);
+ }
+ case 23: break;
+ case 7:
+ { return sym(GrammarTerminals.BAR);
+ }
+ case 24: break;
+ case 8:
+ { return sym(GrammarTerminals.SEMICOLON);
+ }
+ case 25: break;
+ case 9:
+ { return sym(GrammarTerminals.HASH);
+ }
+ case 26: break;
+ case 10:
+ { return sym(GrammarTerminals.COMMA);
+ }
+ case 27: break;
+ case 11:
+ { return sym(GrammarTerminals.LPAREN);
+ }
+ case 28: break;
+ case 12:
+ { return sym(GrammarTerminals.RPAREN);
+ }
+ case 29: break;
+ case 13:
+ { return sym(GrammarTerminals.PLUS);
+ }
+ case 30: break;
+ case 14:
+ { return sym(GrammarTerminals.OPTIONAL);
+ }
+ case 31: break;
+ case 15:
+ { return sym(GrammarTerminals.SHIFT);
+ }
+ case 32: break;
+ case 16:
+ { return sym(GrammarTerminals.REDUCE);
+ }
+ case 33: break;
+ case 17:
+ { return sym(GrammarTerminals.INITIAL);
+ }
+ case 34: break;
+ default:
+ zzScanError(ZZ_NO_MATCH);
+ }
+ }
+ }
+ }
+
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+
+public abstract class GrammarParser {
+ public static final boolean TRACE = true;
+
+ private static final int INITIAL_CAPACITY = 16;
+ private static final int STATE_COUNT = 19;
+ private static final int TERMINAL_COUNT = 16;
+ private static final int NONTERMINAL_COUNT = 6;
+ private static final int PRODUCT_COUNT = 8;
+
+ private static final int[] ACTION_ROW_ID = new int[STATE_COUNT];
+ private static final int[] ACTION_COLUMN_ID = new int[TERMINAL_COUNT];
+ private static final short[] ACTION_TABLE = new short[56];
+ private static final int[] ERROR_TABLE = new int[10];
+ private static final int[] GOTO_ROW_ID = new int[STATE_COUNT];
+ private static final int[] GOTO_COLUMN_ID = new int[NONTERMINAL_COUNT];
+ private static final short[] GOTO_TABLE = new short[12];
+ private static final int[] PRODUCT_LHS = new int[PRODUCT_COUNT];
+
+ private static final short STATE_MASK = (short)0x0fff;
+ private static final short REDUCE_MASK = (short)0x8000;
+ private static final short POP_MASK = (short)0x4000;
+ private static final short PUSH_MASK = (short)0x2000;
+ private static final short ERROR_ACTION = (short)0xffff;
+ private static final short ACCEPT_ACTION = (short)0xfffe;
+
+ public static final String[] TERMINAL_NAMES = new String[] {
+ "NONTERMINAL",
+ "EQUALS",
+ "BAR",
+ "SEMICOLON",
+ "INITIAL",
+ "HASH",
+ "TERMINAL",
+ "COMMA",
+ "SHIFT",
+ "REDUCE",
+ "STAR",
+ "PLUS",
+ "OPTIONAL",
+ "LPAREN",
+ "RPAREN",
+ "EOF"
+ };
+
+ public static final String[] NONTERMINAL_NAMES = new String[] {
+ "file",
+ "declaration",
+ "prod",
+ "regexps",
+ "regexp",
+ "init$1"
+ };
+
+ static {
+ try {
+ DataInputStream input = new DataInputStream(GrammarParser.class.getResourceAsStream("GrammarParser.dat"));
+ for(int i=0;i<ACTION_ROW_ID.length;++i)
+ ACTION_ROW_ID[i] = input.readInt();
+ for(int i=0;i<ACTION_COLUMN_ID.length;++i)
+ ACTION_COLUMN_ID[i] = input.readInt();
+ for(int i=0;i<ACTION_TABLE.length;++i)
+ ACTION_TABLE[i] = input.readShort();
+ for(int i=0;i<ERROR_TABLE.length;++i)
+ ERROR_TABLE[i] = input.readInt();
+ for(int i=0;i<GOTO_ROW_ID.length;++i)
+ GOTO_ROW_ID[i] = input.readInt();
+ for(int i=0;i<GOTO_COLUMN_ID.length;++i)
+ GOTO_COLUMN_ID[i] = input.readInt();
+ for(int i=0;i<GOTO_TABLE.length;++i)
+ GOTO_TABLE[i] = input.readShort();
+ for(int i=0;i<PRODUCT_LHS.length;++i)
+ PRODUCT_LHS[i] = input.readInt();
+ input.close();
+ } catch(IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private static short getAction(int state, int symbol) {
+ int id = TERMINAL_COUNT*state + symbol;
+ if( ((ERROR_TABLE[id>>5] >> (id&31))&1) != 0 )
+ return ERROR_ACTION;
+ return ACTION_TABLE[ACTION_ROW_ID[state] + ACTION_COLUMN_ID[symbol]];
+ }
+
+ private static short getGoto(int state, int symbol) {
+ return GOTO_TABLE[GOTO_ROW_ID[state] + GOTO_COLUMN_ID[symbol]];
+ }
+
+ protected abstract Token nextToken();
+
+ private Object[] symbolStack = new Object[INITIAL_CAPACITY];
+ private int symbolStackLength = 0;
+
+ private int[] stateStack = new int[INITIAL_CAPACITY];
+ private int[] symbolStackPositionStack = new int[INITIAL_CAPACITY];
+ private int stateStackLength = 0;
+
+ // For reduce
+ private int reductionLength;
+
+ protected int length() {
+ return reductionLength;
+ }
+
+ protected Object get(int i) {
+ if(i < 0 || i >= reductionLength)
+ throw new IndexOutOfBoundsException();
+ return symbolStack[symbolStackLength+i];
+ }
+
+ private String parseErrorDescription(int state, Token token, int tokenId) {
+ StringBuilder b = new StringBuilder();
+ b.append("Unexpected token '").append(token)
+ .append("' (").append(TERMINAL_NAMES[tokenId])
+ .append("). Expected one of ");
+ ArrayList<String> possibleTerminals = new ArrayList<String>();
+ for(int i=0;i<TERMINAL_COUNT;++i)
+ if(getAction(state, i) != ERROR_ACTION)
+ possibleTerminals.add(TERMINAL_NAMES[i]);
+ Collections.sort(possibleTerminals);
+ for(int i=0;i<possibleTerminals.size();++i) {
+ if(i > 0)
+ b.append(", ");
+ b.append(possibleTerminals.get(i));
+ }
+ b.append('.');
+ return b.toString();
+ }
+
+ protected abstract RuntimeException syntaxError(Token token, String description);
+
+ private static String describeAction(boolean isGoto, int action) {
+ if(action == ERROR_ACTION)
+ return "ERROR";
+ if(action == ACCEPT_ACTION)
+ return "ACCEPT";
+ StringBuilder b = new StringBuilder();
+ if(isGoto)
+ b.append("GOTO ");
+ else {
+ if((action & REDUCE_MASK) != 0) {
+ action ^= REDUCE_MASK;
+ b.append("REDUCE");
+ }
+ else
+ b.append("SHIFT");
+ }
+ if((action & POP_MASK) != 0) {
+ action ^= POP_MASK;
+ b.append(" POP");
+ }
+ if((action & PUSH_MASK) != 0) {
+ action ^= PUSH_MASK;
+ b.append(" PUSH");
+ }
+ b.append(' ').append(action);
+ return b.toString();
+ }
+
+ private void printState(int state) {
+ System.out.print("state=" + state + ":");
+ for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {
+ Object s = symbolStack[i];
+ if(s instanceof Token)
+ System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
+ else if(s == null)
+ System.out.print(" null");
+ else
+ System.out.print(" " + s.getClass().getSimpleName());
+ while(j>=0 && symbolStackPositionStack[j]==i)
+ System.out.print(" (" + stateStack[j--] + ")");
+ }
+ System.out.println();
+ }
+
+ private Object parse(int state) {
+ while(true) {
+ Token token = nextToken();
+ int tokenId = token.id;
+ if(TRACE)
+ System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---");
+ while(true) {
+ if(TRACE)
+ printState(state);
+ short action = getAction(state, tokenId);
+ if(TRACE)
+ System.out.println(" -> action=" + describeAction(false, action));
+ //System.out.println(STATE_DESCRIPTIONS[state]);
+ if((action & REDUCE_MASK) != 0) {
+ if(action == ACCEPT_ACTION)
+ return symbolStack[symbolStackLength-1];
+ if(action == ERROR_ACTION)
+ throw syntaxError(token, parseErrorDescription(state, token, tokenId));
+ int popAmount = (action >>> 13)&3;
+ if(TRACE) {
+ if(popAmount > 0)
+ System.out.println(" POP " + popAmount);
+ }
+ stateStackLength -= popAmount;
+ action &= STATE_MASK;
+
+ int reductionBegin = symbolStackPositionStack[--stateStackLength];
+
+ reductionLength = symbolStackLength-reductionBegin;
+ symbolStackLength = reductionBegin;
+
+ if(symbolStackLength == symbolStack.length)
+ symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+ Object symbol = reduce(action);
+ postReduce(symbol);
+ symbolStack[symbolStackLength] = symbol;
+
+ state = stateStack[stateStackLength];
+ if(TRACE) {
+ ++symbolStackLength;
+ printState(state);
+ --symbolStackLength;
+ System.out.println(" nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]);
+ }
+ action = getGoto(state, PRODUCT_LHS[action]);
+ if(TRACE)
+ System.out.println(" -> action=" + describeAction(true, action));
+
+ // Pop state
+ if((action & POP_MASK) != 0) {
+ --stateStackLength;
+ }
+ // Push state
+ if((action & PUSH_MASK) != 0) {
+ if(stateStackLength == stateStack.length) {
+ stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+ symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+ }
+ symbolStackPositionStack[stateStackLength] = symbolStackLength;
+ stateStack[stateStackLength++] = state;
+ }
+ state = action & STATE_MASK;
+ ++symbolStackLength;
+ }
+ else {
+ // Pop state
+ if((action & POP_MASK) != 0) {
+ --stateStackLength;
+ }
+ // Push state
+ if((action & PUSH_MASK) != 0) {
+ if(stateStackLength == stateStack.length) {
+ stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+ symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+ }
+ symbolStackPositionStack[stateStackLength] = symbolStackLength;
+ stateStack[stateStackLength++] = state;
+ }
+
+ // New state
+ state = action & STATE_MASK;
+
+ // Push symbol
+ if(symbolStackLength == symbolStack.length)
+ symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+ symbolStack[symbolStackLength++] = token;
+ break;
+ }
+ }
+ }
+ }
+
+ public Object parseFile() {
+ return parse(0);
+ }
+
+
+ protected Object reduce(int productionId) {
+ try {
+ switch(productionId) {
+ case 0:
+ return reduceFile();
+ case 1:
+ return reduceProduction();
+ case 2:
+ return reduceInitial();
+ case 3:
+ return reduceProductionRhs();
+ case 4:
+ return reduceConcatenation();
+ case 5:
+ return reduceTerminal();
+ case 6:
+ return reduceUnion();
+
+ default:
+ throw new RuntimeException("Internal parser error.");
+ }
+ } catch(RuntimeException e) {
+ StringBuilder b = new StringBuilder();
+ b.append("Failed to reduce");
+ for(int i=0;i<length();++i) {
+ Object obj = get(i);
+ b.append("\n (").append(i).append(") \"").append(obj).append('\"');
+ if(obj instanceof Token)
+ b.append(" (").append(TERMINAL_NAMES[((Token)obj).id]).append(")");
+ else
+ b.append(" [").append(obj.getClass().getSimpleName()).append("]");
+ }
+ throw new RuntimeException(b.toString(), e);
+ }
+ }
+
+ /**
+ * file ::= declaration declaration*
+ */
+ protected abstract Object reduceFile();
+ /**
+ * declaration ::= NONTERMINAL EQUALS prod (BAR prod)* SEMICOLON
+ */
+ protected abstract Object reduceProduction();
+ /**
+ * declaration ::= INITIAL NONTERMINAL SEMICOLON
+ */
+ protected abstract Object reduceInitial();
+ /**
+ * prod ::= regexps HASH (TERMINAL COMMA (SHIFT | REDUCE))* TERMINAL
+ */
+ protected abstract Object reduceProductionRhs();
+ /**
+ * regexps ::= (regexp (regexp | STAR | PLUS | OPTIONAL)*)?
+ */
+ protected abstract Object reduceConcatenation();
+ /**
+ * regexp ::= NONTERMINAL | INITIAL | TERMINAL | SHIFT | REDUCE
+ */
+ protected abstract Object reduceTerminal();
+ /**
+ * regexp ::= LPAREN regexps (BAR regexps)* RPAREN
+ */
+ protected abstract Object reduceUnion();
+
+ protected void postReduce(Object reduced) {
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.Production;
+import org.simantics.scl.compiler.parser.regexp.RAtom;
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntByteHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+
+public class GrammarParserImpl extends GrammarParser {
+ private final GrammarLexer lexer;
+
+ ArrayList<String> terminals = new ArrayList<String>();
+ ArrayList<String> nonterminals = new ArrayList<String>();
+ TObjectIntHashMap<String> symbols = new TObjectIntHashMap<String>();
+ ArrayList<Production> productions = new ArrayList<Production>();
+ TIntArrayList initials = new TIntArrayList();
+
+ public GrammarParserImpl(Reader reader) {
+ lexer = new GrammarLexer(reader);
+ }
+
+ private int getId(String symbol) {
+ if(symbols.contains(symbol))
+ return symbols.get(symbol);
+ int id;
+ if(Character.isUpperCase(symbol.charAt(0))) {
+ id = terminals.size();
+ terminals.add(symbol);
+ }
+ else {
+ id = ~nonterminals.size();
+ nonterminals.add(symbol);
+ }
+ symbols.put(symbol, id);
+ return id;
+ }
+
+ @Override
+ protected Token nextToken() {
+ try {
+ Token token = lexer.nextToken();
+ return token;
+ } catch(Exception e) {
+ if(e instanceof RuntimeException)
+ throw (RuntimeException)e;
+ else
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ protected RuntimeException syntaxError(Token token, String description) {
+ return new RuntimeException(description);
+ }
+
+ @Override
+ protected Object reduceFile() {
+ return null;
+ }
+
+ @Override
+ protected Object reduceProduction() {
+ int lhs = getId(((Token)get(0)).text);
+ for(int i=2;i<length();i+=2) {
+ Production prod = (Production)get(i);
+ prod.lhs = lhs;
+ productions.add(prod);
+ }
+ return null;
+ }
+
+ @Override
+ protected Object reduceInitial() {
+ initials.add(getId(((Token)get(1)).text));
+ return null;
+ }
+
+ @Override
+ protected Object reduceTerminal() {
+ return new RAtom(getId(((Token)get(0)).text));
+ }
+
+ private static Regexp postOp(Regexp regexp, Token op) {
+ switch(op.id) {
+ case GrammarTerminals.STAR: return Regexp.star(regexp);
+ case GrammarTerminals.PLUS: return Regexp.plus(regexp);
+ case GrammarTerminals.OPTIONAL: return Regexp.optional(regexp);
+ default: throw new IllegalStateException();
+ }
+ }
+
+ @Override
+ protected Object reduceConcatenation() {
+ ArrayList<Regexp> regexps = new ArrayList<Regexp>(length());
+ for(int i=0;i<length();++i) {
+ Object obj = get(i);
+ if(obj instanceof Regexp)
+ regexps.add((Regexp)obj);
+ else {
+ Token token = (Token)obj;
+ Regexp regexp = regexps.remove(regexps.size()-1);
+ regexps.add(postOp(regexp, token));
+ }
+ }
+ return Regexp.seq(regexps.toArray(new Regexp[regexps.size()]));
+ }
+
+ @Override
+ protected Object reduceUnion() {
+ Regexp[] regexps = new Regexp[length()/2];
+ for(int i=1;i<length();i+=2)
+ regexps[i/2] = (Regexp)get(i);
+ return Regexp.or(regexps);
+ }
+
+ @Override
+ protected Object reduceProductionRhs() {
+ Regexp rhs = (Regexp)get(0);
+ String name = ((Token)get(2)).text;
+ TIntByteHashMap annotations = new TIntByteHashMap();
+ for(int i=4;i<length();i+=3) {
+ Token type = (Token)get(i);
+ int id = getId(((Token)get(i+1)).text);
+ annotations.put(id, (byte)(type.id == GrammarTerminals.SHIFT ? 0 : 1));
+ }
+ return new Production(name, 0, rhs, annotations);
+ }
+
+ public Grammar getGrammar() {
+ return new Grammar(
+ productions.toArray(new Production[productions.size()]),
+ terminals.toArray(new String[terminals.size()]),
+ nonterminals.toArray(new String[nonterminals.size()]),
+ initials.toArray()
+ );
+ }
+
+ public static Grammar read(InputStream inputStream) throws IOException {
+ GrammarParserImpl parser = new GrammarParserImpl(new InputStreamReader(inputStream, "UTF-8"));
+ parser.parseFile();
+ return parser.getGrammar();
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.grammar.input;
+
+public interface GrammarTerminals {
+ public static final int NONTERMINAL = 0;
+ public static final int EQUALS = 1;
+ public static final int BAR = 2;
+ public static final int SEMICOLON = 3;
+ public static final int INITIAL = 4;
+ public static final int HASH = 5;
+ public static final int TERMINAL = 6;
+ public static final int COMMA = 7;
+ public static final int SHIFT = 8;
+ public static final int REDUCE = 9;
+ public static final int STAR = 10;
+ public static final int PLUS = 11;
+ public static final int OPTIONAL = 12;
+ public static final int LPAREN = 13;
+ public static final int RPAREN = 14;
+ public static final int EOF = 15;
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.grammar.input;
+public class Token {
+ public static final Token[] EMPTY_ARRAY = new Token[0];
+
+ public final String text;
+ public final int id;
+
+ public Token(int type, int beginPos, int endPos, String text) {
+ this.id = type;
+ this.text = text;
+ }
+
+ @Override
+ public String toString() {
+ return text;
+ }
+}
\ No newline at end of file
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp;
+
+public interface Namer {
+
+ String getName(int symbolId);
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class RAtom extends Regexp {
+ public final int symbolId;
+
+ public RAtom(int symbolId) {
+ this.symbolId = symbolId;
+ }
+
+ @Override
+ protected void buildAutomaton(NFA aut, int inState, int outState) {
+ aut.addTransition(inState, symbolId, outState);
+ }
+
+ @Override
+ protected void toString(StringBuilder b, int prec) {
+ b.append((char)symbolId);
+ }
+
+ @Override
+ public void toString(StringBuilder b, Namer grammar, int prec) {
+ if(symbolId == 0x80000000)
+ b.append("#");
+ else
+ b.append(grammar.getName(symbolId));
+ }
+
+ @Override
+ protected int getTypeId() {
+ return ATOM;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(obj == this)
+ return true;
+ if(obj == null || obj.getClass() != getClass())
+ return false;
+ RAtom other = (RAtom)obj;
+ return symbolId == other.symbolId;
+ }
+
+ @Override
+ public int hashCode() {
+ return 31*symbolId + 46;
+ }
+
+ @Override
+ public boolean isNullable() {
+ return false;
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class ROp extends Regexp {
+ public final Regexp exp;
+ public final char op;
+
+ ROp(Regexp exp, char op) {
+ this.exp = exp;
+ this.op = op;
+ }
+
+ @Override
+ public Regexp simplify() {
+ Regexp exp = this.exp.simplify();
+ if(exp instanceof ROp) {
+ ROp other = (ROp)exp;
+ if(other.op == op || other.op == '*')
+ return other;
+ else
+ return new ROp(other.exp, '*');
+ }
+ else if(exp.isNullable()) {
+ if(op == '?')
+ return exp;
+ if(op == '+')
+ return new ROp(exp, '*');
+ }
+ if(exp == this.exp)
+ return this;
+ else
+ return new ROp(exp, op);
+ }
+
+ @Override
+ protected void buildAutomaton(NFA aut, int inState, int outState) {
+ switch(op) {
+ case '?':
+ exp.buildAutomaton(aut, inState, outState);
+ aut.addEpsilonTransition(inState, outState);
+ break;
+ case '*': {
+ int state = aut.newState();
+ aut.addEpsilonTransition(inState, state);
+ exp.buildAutomaton(aut, state, state);
+ aut.addEpsilonTransition(state, outState);
+ } break;
+ case '+': {
+ int state1 = aut.newState();
+ int state2 = aut.newState();
+ aut.addEpsilonTransition(inState, state1);
+ exp.buildAutomaton(aut, state1, state2);
+ aut.addEpsilonTransition(state2, state1);
+ aut.addEpsilonTransition(state2, outState);
+ } break;
+ default:
+ throw new IllegalStateException("Invalid operation '" + op + "'.");
+ }
+ }
+
+ @Override
+ protected void toString(StringBuilder b, int prec) {
+ exp.toString(b, 3);
+ b.append(op);
+ }
+
+ @Override
+ public void toString(StringBuilder b, Namer grammar, int prec) {
+ exp.toString(b, grammar, 3);
+ b.append(op);
+ }
+
+ @Override
+ protected int getTypeId() {
+ return OP;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(obj == this)
+ return true;
+ if(obj == null || obj.getClass() != getClass())
+ return false;
+ ROp other = (ROp)obj;
+ return op == other.op && exp.equals(other.exp);
+ }
+
+ @Override
+ public int hashCode() {
+ return 31*exp.hashCode() + op;
+ }
+
+ @Override
+ public boolean isNullable() {
+ return op != '+' || exp.isNullable();
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp;
+
+import gnu.trove.set.hash.THashSet;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class ROr extends Regexp {
+ public final Regexp[] exps;
+
+ ROr(Regexp[] exps) {
+ this.exps = exps;
+ }
+
+ @Override
+ protected void buildAutomaton(NFA aut, int inState, int outState) {
+ for(Regexp exp : exps)
+ exp.buildAutomaton(aut, inState, outState);
+ }
+
+ @Override
+ protected void toString(StringBuilder b, int prec) {
+ if(prec >= 1)
+ b.append('(');
+ boolean first = true;
+ for(Regexp exp : exps) {
+ if(first)
+ first = false;
+ else
+ b.append(" | ");
+ exp.toString(b, 1);
+ }
+ if(prec >= 1)
+ b.append(')');
+ }
+
+ @Override
+ public void toString(StringBuilder b, Namer grammar, int prec) {
+ if(prec >= 1)
+ b.append('(');
+ boolean first = true;
+ for(Regexp exp : exps) {
+ if(first)
+ first = false;
+ else
+ b.append(" | ");
+ exp.toString(b, grammar, 1);
+ }
+ if(prec >= 1)
+ b.append(')');
+ }
+
+ @Override
+ protected int getTypeId() {
+ return OR;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(obj == this)
+ return true;
+ if(obj == null || obj.getClass() != getClass())
+ return false;
+ ROr other = (ROr)obj;
+ return Arrays.equals(exps, other.exps);
+ }
+
+ @Override
+ public int hashCode() {
+ int r = 34235;
+ for(Regexp exp : exps) {
+ r *= 31;
+ r += exp.hashCode();
+ }
+ return r;
+ }
+
+ @Override
+ public boolean isNullable() {
+ for(Regexp exp : exps)
+ if(exp.isNullable())
+ return true;
+ return false;
+ }
+
+ private Regexp simplify(THashSet<Regexp> set) {
+ boolean qm = set.remove(ONE);
+
+ Regexp exp;
+ simpl: {
+ if(set.size() > 1) {
+ frontLoop: {
+ Iterator<Regexp> it = set.iterator();
+ Regexp common = front(it.next());
+ while(it.hasNext()) {
+ Regexp temp = front(it.next());
+ if(!temp.equals(common))
+ break frontLoop;
+ }
+
+ THashSet<Regexp> set2 = new THashSet<Regexp>();
+ for(Regexp e : set)
+ set2.add(removeFront(e));
+
+ exp = seq(common, simplify(set2));
+ break simpl;
+ }
+
+ backLoop: {
+ Iterator<Regexp> it = set.iterator();
+ Regexp common = back(it.next());
+ while(it.hasNext()) {
+ Regexp temp = back(it.next());
+ if(!temp.equals(common))
+ break backLoop;
+ }
+
+ THashSet<Regexp> set2 = new THashSet<Regexp>();
+ for(Regexp e : set)
+ set2.add(removeBack(e));
+
+ exp = seq(simplify(set2), common);
+ break simpl;
+ }
+ }
+
+ exp = or(set);
+ }
+
+ if(qm && !exp.isNullable())
+ exp = new ROp(exp, '?');
+ return exp;
+ }
+
+ @Override
+ public Regexp simplify() {
+ if(exps.length == 0)
+ return this;
+ THashSet<Regexp> set = new THashSet<Regexp>();
+ for(Regexp exp : exps) {
+ exp = exp.simplify();
+ or(set, exp);
+ }
+ return simplify(set);
+ }
+
+ private static Regexp front(Regexp exp) {
+ if(exp instanceof RSeq) {
+ Regexp[] exps = ((RSeq)exp).exps;
+ if(exps.length == 0)
+ return null;
+ return exps[0];
+ }
+ else
+ return exp;
+ }
+
+ private static Regexp removeFront(Regexp exp) {
+ if(exp instanceof RSeq) {
+ Regexp[] exps = ((RSeq)exp).exps;
+ return seq_(Arrays.asList(exps).subList(1, exps.length));
+ }
+ else
+ return ONE;
+ }
+
+ private static Regexp back(Regexp exp) {
+ if(exp instanceof RSeq) {
+ Regexp[] exps = ((RSeq)exp).exps;
+ if(exps.length == 0)
+ return null;
+ return exps[exps.length-1];
+ }
+ else
+ return exp;
+ }
+
+ private static Regexp removeBack(Regexp exp) {
+ if(exp instanceof RSeq) {
+ Regexp[] exps = ((RSeq)exp).exps;
+ return seq_(Arrays.asList(exps).subList(0, exps.length-1));
+ }
+ else
+ return ONE;
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class RSeq extends Regexp {
+ public final Regexp[] exps;
+
+ RSeq(Regexp[] exps) {
+ this.exps = exps;
+ }
+
+ @Override
+ protected void buildAutomaton(NFA aut, int inState, int outState) {
+ if(exps.length == 0) {
+ aut.addEpsilonTransition(inState, outState);
+ return;
+ }
+ for(int i=exps.length-1;i>0;--i) {
+ int midState = aut.newState();
+ exps[i].buildAutomaton(aut, midState, outState);
+ outState = midState;
+ }
+ exps[0].buildAutomaton(aut, inState, outState);
+ }
+
+ @Override
+ protected void toString(StringBuilder b, int prec) {
+ if(prec >= 2)
+ b.append('(');
+ for(Regexp exp : exps)
+ exp.toString(b, 2);
+ if(prec >= 2)
+ b.append(')');
+ }
+
+ @Override
+ public void toString(StringBuilder b, Namer grammar, int prec) {
+ if(prec >= 2)
+ b.append('(');
+ boolean first = true;
+ for(Regexp exp : exps) {
+ if(first)
+ first = false;
+ else
+ b.append(' ');
+ exp.toString(b, grammar, 2);
+ }
+ if(prec >= 2)
+ b.append(')');
+ }
+
+ @Override
+ protected int getTypeId() {
+ return SEQ;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(obj == this)
+ return true;
+ if(obj == null || obj.getClass() != getClass())
+ return false;
+ RSeq other = (RSeq)obj;
+ return Arrays.equals(exps, other.exps);
+ }
+
+ @Override
+ public int hashCode() {
+ int r = 31340123;
+ for(Regexp exp : exps) {
+ r *= 31;
+ r += exp.hashCode();
+ }
+ return r;
+ }
+
+ @Override
+ public Regexp simplify() {
+ if(exps.length == 0)
+ return this;
+ ArrayList<Regexp> l = new ArrayList<Regexp>(exps.length);
+ for(Regexp exp : this.exps) {
+ exp = exp.simplify();
+ seq(l, exp);
+ }
+ return seq_(l);
+ }
+
+ @Override
+ public boolean isNullable() {
+ for(Regexp exp : exps)
+ if(!exp.isNullable())
+ return false;
+ return true;
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp;
+
+import gnu.trove.set.hash.THashSet;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public abstract class Regexp implements Comparable<Regexp> {
+ static final int ATOM = 0;
+ static final int OP = 1;
+ static final int OR = 2;
+ static final int SEQ = 3;
+
+ protected abstract void buildAutomaton(NFA aut, int inState, int outState);
+
+ public static final Regexp ONE = new RSeq(new Regexp[0]);
+ public static final Regexp ZERO = new ROr(new Regexp[0]);
+
+ Regexp() {
+ }
+
+ public NFA toAutomaton() {
+ NFA aut = new NFA();
+ int inState = aut.newState();
+ int outState = aut.newState();
+ buildAutomaton(aut, inState, outState);
+ aut.setInitialState(inState);
+ aut.setAccepts(outState, true);
+ return aut;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder b = new StringBuilder();
+ toString(b, 0);
+ return b.toString();
+ }
+
+ protected abstract void toString(StringBuilder b, int prec);
+
+ protected abstract int getTypeId();
+
+ public abstract boolean isNullable();
+
+ public Regexp simplify() {
+ return this;
+ }
+
+ private static int parseRegexp(int pos, String regexp, AtomicReference<Regexp> result) {
+ ArrayList<Regexp> dc = new ArrayList<Regexp>();
+ ArrayList<Regexp> cc = new ArrayList<Regexp>();
+ loop: while(true) {
+ if(pos == regexp.length())
+ break loop;
+ char c = regexp.charAt(pos++);
+ switch(c) {
+ case '(': {
+ AtomicReference<Regexp> child = new AtomicReference<Regexp>();
+ pos = parseRegexp(pos, regexp, child);
+ cc.add(child.get());
+ } break;
+ case ')':
+ break loop;
+ case '|': {
+ dc.add(seq(cc));
+ cc.clear();
+ } break;
+ case '*': {
+ if(cc.isEmpty())
+ throw new IllegalArgumentException("Encountered * that is not front of anything.");
+ int p = cc.size()-1;
+ cc.set(p, Regexp.star(cc.get(p)));
+ } break;
+ case '?': {
+ if(cc.isEmpty())
+ throw new IllegalArgumentException("Encountered ? that is not front of anything.");
+ int p = cc.size()-1;
+ cc.set(p, Regexp.optional(cc.get(p)));
+ } break;
+ case '+': {
+ if(cc.isEmpty())
+ throw new IllegalArgumentException("Encountered + that is not front of anything.");
+ int p = cc.size()-1;
+ cc.set(p, Regexp.plus(cc.get(p)));
+ } break;
+ default:
+ cc.add(new RAtom(c));
+ }
+ }
+ dc.add(seq(cc));
+ result.set(or(dc));
+ return pos;
+ }
+
+ public static Regexp of(String regexp) {
+ AtomicReference<Regexp> result = new AtomicReference<Regexp>();
+ int finalPos = parseRegexp(0, regexp, result);
+ if(finalPos < regexp.length())
+ throw new IllegalArgumentException("Extra closing parenteses");
+ return result.get();
+ }
+
+ static void seq(ArrayList<Regexp> l, Regexp exp) {
+ if(l.size() > 0 && l.get(0) == ZERO)
+ return;
+ if(exp instanceof RSeq)
+ for(Regexp e : ((RSeq)exp).exps)
+ seq(l, e);
+ else if(exp == ZERO) {
+ l.clear();
+ l.add(exp);
+ }
+ else
+ l.add(exp);
+ }
+
+ static Regexp seq_(List<Regexp> es) {
+ if(es.size() == 0)
+ return ONE;
+ if(es.size() == 1)
+ return es.get(0);
+ Regexp[] eArray = es.toArray(new Regexp[es.size()]);
+ return new RSeq(eArray);
+ }
+
+ public static Regexp seq(Regexp ... exps) {
+ if(exps.length == 0)
+ return ONE;
+ if(exps.length == 1)
+ return exps[0];
+ ArrayList<Regexp> es = new ArrayList<Regexp>();
+ for(Regexp exp : exps)
+ seq(es, exp);
+ return seq_(es);
+ }
+
+ public static Regexp seq(Collection<Regexp> exps) {
+ return seq(exps.toArray(new Regexp[exps.size()]));
+ }
+
+ static void or(THashSet<Regexp> s, Regexp exp) {
+ if(exp instanceof ROr)
+ for(Regexp e : ((ROr)exp).exps)
+ or(s, e);
+ else
+ s.add(exp);
+ }
+
+ static Regexp or_(THashSet<Regexp> es) {
+ if(es.size() == 0)
+ return ZERO;
+ if(es.size() == 1)
+ return es.iterator().next();
+ Regexp[] eArray = es.toArray(new Regexp[es.size()]);
+ Arrays.sort(eArray);
+ return new ROr(eArray);
+ }
+
+ public static Regexp or(Regexp ... exps) {
+ if(exps.length == 0)
+ return ZERO;
+ if(exps.length == 1)
+ return exps[0];
+ THashSet<Regexp> es = new THashSet<Regexp>();
+ for(Regexp exp : exps)
+ or(es, exp);
+ return or_(es);
+ }
+
+ public static Regexp star(Regexp exp) {
+ if(exp == ONE || exp == ZERO)
+ return ONE;
+ return new ROp(exp, '*');
+ }
+
+ public static Regexp plus(Regexp exp) {
+ return new ROp(exp, '+');
+ }
+
+ public static Regexp optional(Regexp exp) {
+ return new ROp(exp, '?');
+ }
+
+ public static Regexp or(Collection<Regexp> exps) {
+ return or(exps.toArray(new Regexp[exps.size()]));
+ }
+
+ @Override
+ public int compareTo(Regexp o) {
+ int tA = getTypeId();
+ int tB = o.getTypeId();
+ if(tA < tB)
+ return -1;
+ if(tA > tB)
+ return 1;
+ switch(tA) {
+ case ATOM: {
+ int sA = ((RAtom)this).symbolId;
+ int sB = ((RAtom)o).symbolId;
+ if(sA < sB)
+ return -1;
+ if(sA > sB)
+ return 1;
+ return 0;
+ }
+ case OP: {
+ ROp a = (ROp)this;
+ ROp b = (ROp)o;
+ if(a.op < b.op)
+ return -1;
+ if(a.op > b.op)
+ return 1;
+ return a.exp.compareTo(b.exp);
+ }
+ case OR:
+ return compare(((ROr)this).exps, ((ROr)o).exps);
+ case SEQ: {
+ return compare(((RSeq)this).exps, ((RSeq)o).exps);
+ }
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private static int compare(Regexp[] a, Regexp[] b) {
+ if(a.length < b.length)
+ return -1;
+ if(a.length > b.length)
+ return 1;
+ for(int i=0;i<a.length;++i) {
+ int temp = a[i].compareTo(b[i]);
+ if(temp != 0)
+ return temp;
+ }
+ return 0;
+ }
+
+ public abstract void toString(StringBuilder b, Namer grammar, int prec);
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp;
+
+import org.simantics.scl.compiler.parser.regexp.automata.DFA;
+
+
+public class TestRegexp {
+
+ public static void main(String[] args) {
+
+ Regexp exp = Regexp.of("a*b|ac*");
+
+ System.out.println(exp);
+ DFA aut = exp.toAutomaton().determinize().minimize();
+ System.out.println(aut.toRegexp());
+
+ //AutomatonVisualizer.show(aut);
+
+ }
+
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp.automata;
+
+import gnu.trove.procedure.TIntIntProcedure;
+
+
+public interface Automata {
+ int newState();
+ int size();
+ void addTransition(int sourceId, int symbol, int targetId);
+ void forEachTransition(int source, TIntIntProcedure proc);
+ void setAccepts(int id, boolean accepts);
+ boolean getAccepts(int id);
+ void setInitialState(int initialState);
+ int getInitialState();
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp.automata;
+
+import gnu.trove.impl.Constants;
+import gnu.trove.list.array.TByteArrayList;
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+
+import org.simantics.scl.compiler.parser.regexp.RAtom;
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+public class DFA implements Automata {
+
+ private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
+ private TByteArrayList accepts = new TByteArrayList();
+ private int initialState;
+
+ public int newState() {
+ int stateId = transitions.size();
+ transitions.add(new TIntIntHashMap(
+ Constants.DEFAULT_CAPACITY,
+ Constants.DEFAULT_LOAD_FACTOR,
+ 0, -1));
+ accepts.add((byte)0);
+ return stateId;
+ }
+
+ public int size() {
+ return transitions.size();
+ }
+
+ public DFA copy() {
+ DFA copy = new DFA();
+ for(TIntIntHashMap t : transitions)
+ copy.transitions.add(new TIntIntHashMap(t));
+ copy.accepts = new TByteArrayList(accepts);
+ copy.initialState = initialState;
+ return copy;
+ }
+
+ public void addTransition(int sourceId, int symbol, int targetId) {
+ transitions.get(sourceId).put(symbol, targetId);
+ }
+
+ public int getTransition(int sourceId, int symbol) {
+ return transitions.get(sourceId).get(symbol);
+ }
+
+ public void forEachTransition(int source, final TIntIntProcedure proc) {
+ transitions.get(source).forEachEntry(proc);
+ }
+
+ public int[] nextStates(int id) {
+ return transitions.get(id).keys();
+ }
+
+ public void setAccepts(int id, boolean accepts) {
+ this.accepts.set(id, accepts ? (byte)1 : (byte)0);
+ }
+
+ public boolean getAccepts(int id) {
+ return accepts.get(id)==1;
+ }
+
+ public void setInitialState(int initialState) {
+ this.initialState = initialState;
+ }
+
+ public int getInitialState() {
+ return initialState;
+ }
+
+ public DFA minimize() {
+ // Compute relevant input characters for minimization
+ final TIntIntHashMap symbolMap = new TIntIntHashMap();
+ final int[] symbolArray;
+ {
+ final TIntArrayList l = new TIntArrayList();
+ TIntProcedure proc = new TIntProcedure() {
+ @Override
+ public boolean execute(int value) {
+ if(!symbolMap.containsKey(value)) {
+ symbolMap.put(value, l.size());
+ l.add(value);
+ }
+ return true;
+ }
+ };
+ for(TIntIntHashMap tMap : transitions)
+ tMap.forEachKey(proc);
+ symbolArray = l.toArray();
+ }
+ int symbolCount = symbolMap.size();
+ int stateCount = transitions.size();
+
+ // Inverse automata
+ final TIntArrayList[][] inverse = new TIntArrayList[stateCount+1][];
+ for(int i=0;i<inverse.length;++i)
+ inverse[i] = new TIntArrayList[symbolCount];
+ for(int sourceId=0;sourceId<stateCount;++sourceId) {
+ TIntIntHashMap tMap = transitions.get(sourceId);
+ for(int j=0;j<symbolCount;++j) {
+ int targetId = tMap.get(symbolArray[j]);
+ if(targetId == -1)
+ targetId = stateCount;
+ TIntArrayList l = inverse[targetId][j];
+ if(l == null) {
+ l = new TIntArrayList();
+ inverse[targetId][j] = l;
+ }
+ l.add(sourceId);
+ }
+ }
+
+ /*for(int i=0;i<inverse.length;++i)
+ for(int j=0;j<symbolCount;++j)
+ System.out.println(i + " " + j + " -> " + inverse[i][j]);
+ */
+
+ //
+ int[] ids = new int[stateCount+1];
+ final int[] memPartion = new int[stateCount+1];
+ TIntArrayList partionBegin = new TIntArrayList();
+ TIntArrayList partionEnd = new TIntArrayList();
+ TIntArrayList stack = new TIntArrayList();
+ TIntArrayList scheduled = new TIntArrayList();
+
+ // Initial partition
+ {
+ int min = 0;
+ int max = stateCount;
+ ids[min++] = stateCount;
+ memPartion[stateCount] = 0;
+ for(int i=0;i<stateCount;++i) {
+ if(accepts.get(i)==1) {
+ ids[max--] = i;
+ memPartion[i] = 1;
+ }
+ else {
+ ids[min++] = i;
+ memPartion[i] = 0;
+ }
+ }
+ partionBegin.add(0);
+ partionBegin.add(min);
+ partionEnd.add(min);
+ partionEnd.add(ids.length);
+ scheduled.add(0);
+ scheduled.add(0);
+ if(min < ids.length/2) {
+ stack.add(0);
+ scheduled.set(0, 1);
+ }
+ else {
+ stack.add(1);
+ scheduled.set(1, 1);
+ }
+ }
+
+ // Refinement
+ while(!stack.isEmpty()) {
+ int partionId = stack.removeAt(stack.size()-1);
+ scheduled.set(partionId, 0);
+ int begin = partionBegin.get(partionId);
+ int end = partionEnd.get(partionId);
+
+ for(int j=0;j<symbolCount;++j) {
+ TIntHashSet invStates = new TIntHashSet();
+ for(int i=begin;i<end;++i) {
+ int s = ids[i];
+ TIntArrayList inv = inverse[s][j];
+ if(inv != null)
+ invStates.addAll(inv);
+ }
+ int[] invStatesArray = invStates.toArray();
+
+ TIntHashSet partions = new TIntHashSet();
+ for(int s : invStatesArray)
+ partions.add(memPartion[s]);
+ int[] partionsArray = partions.toArray();
+
+ for(int p : partionsArray) {
+ int pBegin = partionBegin.get(p);
+ int pEnd = partionEnd.get(p);
+ boolean splits = false;
+ for(int k=pBegin;k<pEnd;++k)
+ if(!invStates.contains(ids[k])) {
+ splits = true;
+ break;
+ }
+ if(splits) {
+ int p2 = partionBegin.size();
+ int p2End = pEnd;
+ for(int k=pBegin;k<pEnd;++k) {
+ int s = ids[k];
+ if(invStates.contains(s)) {
+ memPartion[s] = p2;
+ --pEnd;
+ ids[k] = ids[pEnd];
+ ids[pEnd] = s;
+ --k;
+ }
+ }
+ partionEnd.set(p, pEnd);
+ partionBegin.add(pEnd);
+ partionEnd.add(p2End);
+
+ if(scheduled.get(p) == 1) {
+ scheduled.add(1);
+ stack.add(p2);
+ }
+ else {
+ if(pEnd - pBegin <= p2End - pEnd) {
+ stack.add(p);
+ scheduled.add(0);
+ scheduled.set(p, 1);
+ }
+ else {
+ stack.add(p2);
+ scheduled.add(1);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Print partitions
+ /*System.out.println("Partition count: " + partionBegin.size());
+ for(int i=0;i<memPartion.length;++i)
+ System.out.println(" " + i + " in " + memPartion[i]);
+ */
+
+ // Put reachable states into new automaton
+ final DFA aut = new DFA();
+ int failurePartion = memPartion[stateCount];
+ final int[] sArray = new int[partionBegin.size()];
+ sArray[failurePartion] = -1;
+ for(int i=0;i<partionBegin.size();++i)
+ if(i != failurePartion)
+ sArray[i] = aut.newState();
+ for(int i=0;i<partionBegin.size();++i)
+ if(i != failurePartion) {
+ final int sourceId = sArray[i];
+ int bId = ids[partionBegin.get(i)];
+ forEachTransition(bId, new TIntIntProcedure() {
+ @Override
+ public boolean execute(int a, int b) {
+ aut.addTransition(sourceId, a, sArray[memPartion[b]]);
+ return true;
+ }
+ });
+ aut.setAccepts(sourceId, getAccepts(bId));
+ }
+ aut.setInitialState(sArray[memPartion[initialState]]);
+ return aut;
+ }
+
+ public Regexp toRegexp(int from, byte[] to) {
+ int stateCount = size();
+ final int[] order = new int[stateCount];
+ for(int i=0;i<stateCount;++i)
+ order[i] = i;
+ order[0] = from;
+ order[from] = 0;
+
+ final Regexp[][] a = new Regexp[stateCount][stateCount];
+ final Regexp[] b = new Regexp[stateCount];
+ for(int i=0;i<stateCount;++i) {
+ b[i] = to[order[i]]==1 ? Regexp.ONE : Regexp.ZERO;
+ for(int j=0;j<stateCount;++j)
+ a[i][j] = Regexp.ZERO;
+ final Regexp[] row = a[i];
+ forEachTransition(order[i], new TIntIntProcedure() {
+ @Override
+ public boolean execute(int symbol, int targetId) {
+ targetId = order[targetId];
+ row[targetId] = Regexp.or(row[targetId], new RAtom(symbol));
+ return true;
+ }
+ });
+ }
+
+ for(int n=stateCount-1;n>=0;--n) {
+ Regexp ss = Regexp.star(a[n][n]);
+ b[n] = Regexp.seq(ss, b[n]);
+ for(int j=0;j<stateCount;++j)
+ a[n][j] = Regexp.seq(ss, a[n][j]);
+ for(int i=0;i<n;++i) {
+ b[i] = Regexp.or(b[i], Regexp.seq(a[i][n], b[n]));
+ for(int j=0;j<n;++j)
+ a[i][j] = Regexp.or(a[i][j], Regexp.seq(a[i][n], a[n][j]));
+ }
+ }
+
+ return b[0];
+ }
+
+ public Regexp toRegexp() {
+ return toRegexp(initialState, accepts.toArray()).simplify();
+ }
+
+ public Regexp toRegexpTo(int position) {
+ int stateCount = size();
+ byte[] targetArray = new byte[stateCount];
+ targetArray[position] = 1;
+ return toRegexp(initialState, targetArray).simplify();
+ }
+
+ public Regexp toRegexpFrom(int position) {
+ return toRegexp(position, accepts.toArray()).simplify();
+ }
+
+ public Regexp toPositionalRegexp(int position) {
+ DFA aut = copy();
+ int s = transitions.size();
+ aut.transitions.add(aut.transitions.get(position));
+ aut.transitions.set(position, new TIntIntHashMap());
+ aut.addTransition(position, 0x80000000, s);
+ aut.accepts.add(accepts.get(position));
+ aut.accepts.set(position, (byte)0);
+ return aut.toRegexp();
+ }
+}
--- /dev/null
+package org.simantics.scl.compiler.parser.regexp.automata;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntObjectHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntObjectProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+
+public class NFA implements Automata {
+
+ private static class State {
+ TIntObjectHashMap<TIntArrayList> transitions = new TIntObjectHashMap<TIntArrayList>();
+ TIntArrayList epsilonTransitions = new TIntArrayList();
+ boolean accepts;
+
+ private void addTransition(int symbol, int target) {
+ TIntArrayList l = transitions.get(symbol);
+ if(l == null) {
+ l = new TIntArrayList();
+ l.add(target);
+ transitions.put(symbol, l);
+ }
+ else if(!l.contains(target))
+ l.add(target);
+ }
+
+ private void addEpsilonTransition(int target) {
+ if(!epsilonTransitions.contains(target))
+ epsilonTransitions.add(target);
+ }
+ }
+
+ private ArrayList<State> states = new ArrayList<State>();
+ private int initialState;
+
+ public int size() {
+ return states.size();
+ }
+
+ public void setInitialState(int initialState) {
+ this.initialState = initialState;
+ }
+
+ public int getInitialState() {
+ return initialState;
+ }
+
+ public int newState() {
+ int id = states.size();
+ states.add(new State());
+ return id;
+ }
+
+ public void setAccepts(int id, boolean accepts) {
+ states.get(id).accepts = accepts;
+ }
+
+ public boolean getAccepts(int id) {
+ return states.get(id).accepts;
+ }
+
+ public void addTransition(int source, int symbol, int target) {
+ states.get(source).addTransition(symbol, target);
+ }
+
+ public void forEachTransition(int source, final TIntIntProcedure proc) {
+ states.get(source).transitions.forEachEntry(new TIntObjectProcedure<TIntArrayList>() {
+ @Override
+ public boolean execute(int a, TIntArrayList b) {
+ for(int i=0;i<b.size();++i)
+ if(!proc.execute(a, b.get(i)))
+ return false;
+ return true;
+ }
+ });
+ }
+
+ public void addEpsilonTransition(int source, int target) {
+ states.get(source).addEpsilonTransition(target);
+ }
+
+ public void forEachEpsilonTransition(int source, final TIntProcedure proc) {
+ states.get(source).epsilonTransitions.forEach(proc);
+ }
+
+ private TIntArrayList closure(final TIntHashSet stateSet) {
+ final TIntArrayList result = new TIntArrayList(stateSet);
+ TIntProcedure proc = new TIntProcedure() {
+ @Override
+ public boolean execute(int value) {
+ if(stateSet.add(value))
+ result.add(value);
+ return true;
+ }
+ };
+ for(int i=0;i<result.size();++i)
+ forEachEpsilonTransition(result.get(i), proc);
+ result.sort();
+ return result;
+ }
+
+ public DFA determinize() {
+ final DFA aut = new DFA();
+
+ final ArrayList<TIntArrayList> stack = new ArrayList<TIntArrayList>();
+ final TObjectIntHashMap<TIntArrayList> map = new TObjectIntHashMap<TIntArrayList>();
+ {
+ TIntHashSet initialSet = new TIntHashSet(4);
+ initialSet.add(getInitialState());
+ TIntArrayList stateList = closure(initialSet);
+
+ int stateId = aut.newState();
+ map.put(stateList, stateId);
+ aut.setInitialState(stateId);
+
+ stack.add(stateList);
+ }
+
+ while(!stack.isEmpty()) {
+ TIntArrayList curList = stack.remove(stack.size()-1);
+ int[] stateArray = curList.toArray();
+ final int id = map.get(curList);
+
+ // Transitions
+ final TIntObjectHashMap<TIntHashSet> transitions =
+ new TIntObjectHashMap<TIntHashSet>();
+ TIntIntProcedure proc = new TIntIntProcedure() {
+ @Override
+ public boolean execute(int symbol, int b) {
+ TIntHashSet set = transitions.get(symbol);
+ if(set == null) {
+ set = new TIntHashSet();
+ transitions.put(symbol, set);
+ }
+ set.add(b);
+ return true;
+ }
+ };
+ for(int s : stateArray)
+ forEachTransition(s, proc);
+
+ // Create transition targets
+ transitions.forEachEntry(new TIntObjectProcedure<TIntHashSet>() {
+ @Override
+ public boolean execute(int symbol, TIntHashSet b) {
+ TIntArrayList stateList = closure(b);
+
+ if(map.containsKey(stateList))
+ aut.addTransition(id, symbol, map.get(stateList));
+ else {
+ int stateId = aut.newState();
+ map.put(stateList, stateId);
+ stack.add(stateList);
+
+ aut.addTransition(id, symbol, stateId);
+ }
+ return true;
+ }
+ });
+
+ // Accepts
+ for(int s : stateArray)
+ if(getAccepts(s)) {
+ aut.setAccepts(id, true);
+ break;
+ }
+ }
+
+ return aut;
+ }
+
+}