]> gerrit.simantics Code Review - simantics/platform.git/commitdiff
Moved SCL parser generator to platform repository. 83/283/1
authorHannu Niemistö <hannu.niemisto@semantum.fi>
Fri, 20 Jan 2017 12:03:44 +0000 (14:03 +0200)
committerHannu Niemistö <hannu.niemisto@semantum.fi>
Fri, 20 Jan 2017 12:03:44 +0000 (14:03 +0200)
In addition, implemented the parser generator's parser with parser
generator itself to remove Antlr-dependency.

refs #6995

Change-Id: I08537c59254ddd6ae49d9c89d36e8596079f0fb2

43 files changed:
bundles/org.simantics.scl.compiler/generateGrammarLexer.xml [new file with mode: 0644]
bundles/org.simantics.scl.compiler/generateSCLLexer.xml [moved from bundles/org.simantics.scl.compiler/generateParser.xml with 100% similarity]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParser.java
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLParserImpl.java
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/GenerateSCLParser.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/ParserGenerator.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressTable.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressedParseTable.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressedTable.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/ErrorTable.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/GCCompress.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/GraphColoring.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/AnaGrammar.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/Prod.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateEnum.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateParser.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/Parser.java.template [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/Item.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ItemSet.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTable.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Grammar.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Production.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GenerateParserParser.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/Grammar.grammar [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.flex [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.dat [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParserImpl.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarTerminals.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/Token.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Namer.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RAtom.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/ROp.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/ROr.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RSeq.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/TestRegexp.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/Automata.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java [new file with mode: 0644]
bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/NFA.java [new file with mode: 0644]

diff --git a/bundles/org.simantics.scl.compiler/generateGrammarLexer.xml b/bundles/org.simantics.scl.compiler/generateGrammarLexer.xml
new file mode 100644 (file)
index 0000000..ff9ecb3
--- /dev/null
@@ -0,0 +1,13 @@
+<project>
+
+       <taskdef name="jflex" classname="jflex.anttask.JFlexTask" classpath="tools/jflex-1.6.1.jar"/>
+       
+       <property name="package" location="src/org/simantics/scl/compiler/parser/grammar/input2"/>
+       
+       <jflex
+           file="${package}/GrammarLexer.flex"
+           destdir="src/"
+               nobak="true"
+       />
+       
+</project>
\ No newline at end of file
index 0312cf6f8819658e0a88b41a88e86565e63acc71..6bcffa14789b96542b16cccf2a5bbf8d7163f0c7 100644 (file)
@@ -323,4 +323,5 @@ symbolWithoutMinus
 /******************************************************************************
  * Auxiliary tokens
  */
-dummy = COMMENT EOL ;
\ No newline at end of file
+dummy = COMMENT EOL                                          # Dummy 
+      ;
\ No newline at end of file
index 532f6e8a3f62941644a6c57a501e7a13fa08381e..6c8ee5be9c0de160fd7123f0027c9dc5a9ff18d4 100644 (file)
@@ -252,18 +252,22 @@ public abstract class SCLParser {
     
     protected abstract RuntimeException syntaxError(Token token, String description);
     
-    private static String describeAction(int action) {
+    private static String describeAction(boolean isGoto, int action) {
         if(action == ERROR_ACTION)
             return "ERROR";
         if(action == ACCEPT_ACTION)
             return "ACCEPT";
         StringBuilder b = new StringBuilder();
-        if((action & REDUCE_MASK) != 0) {
-            action ^= REDUCE_MASK;
-            b.append("REDUCE");
+        if(isGoto)
+            b.append("GOTO ");
+        else {
+            if((action & REDUCE_MASK) != 0) {
+                action ^= REDUCE_MASK;
+                b.append("REDUCE");
+            }
+            else
+                b.append("SHIFT");
         }
-        else
-            b.append("SHIFT");
         if((action & POP_MASK) != 0) {
             action ^= POP_MASK;
             b.append(" POP");
@@ -276,34 +280,46 @@ public abstract class SCLParser {
         return b.toString();
     }
     
+    private void printState(int state) {
+        System.out.print("state=" + state + ":");
+        for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {
+            Object s = symbolStack[i];
+            if(s instanceof Token)
+                System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
+            else if(s == null)
+                System.out.print(" null");
+            else
+                System.out.print(" " + s.getClass().getSimpleName());
+            while(j>=0 && symbolStackPositionStack[j]==i)
+                System.out.print(" (" + stateStack[j--] + ")");
+        }
+        System.out.println();
+    }
+    
     private Object parse(int state) {
         while(true) {
             Token token = nextToken();
             int tokenId = token.id;
+            if(TRACE)
+                System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---");
             while(true) {
+                if(TRACE)
+                    printState(state);
                 short action = getAction(state, tokenId);
-                if(TRACE) {
-                    System.out.println("state=" + state + ", tokenId=" + TERMINAL_NAMES[tokenId] + 
-                        ", action=" + describeAction(action));
-                    System.out.print("   ");
-                    for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {                    
-                        Object s = symbolStack[i];
-                        if(s instanceof Token)
-                            System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
-                        else
-                            System.out.print(" " + s.getClass().getSimpleName());                            
-                        while(j>=0 && symbolStackPositionStack[j]==i)
-                            System.out.print(" (" + stateStack[j--] + ")");
-                    }
-                    System.out.println();
-                }
+                if(TRACE)
+                    System.out.println("    -> action=" + describeAction(false, action));
                 //System.out.println(STATE_DESCRIPTIONS[state]);
                 if((action & REDUCE_MASK) != 0) {
                     if(action == ACCEPT_ACTION)
                         return symbolStack[symbolStackLength-1];
                     if(action == ERROR_ACTION)
                         throw syntaxError(token, parseErrorDescription(state, token, tokenId));
-                    stateStackLength -= (action >>> 13)&3;
+                    int popAmount = (action >>> 13)&3;
+                    if(TRACE) {
+                        if(popAmount > 0)
+                            System.out.println("    POP " + popAmount);
+                    }
+                    stateStackLength -= popAmount;
                     action &= STATE_MASK;
                     
                     int reductionBegin = symbolStackPositionStack[--stateStackLength];
@@ -318,7 +334,16 @@ public abstract class SCLParser {
                     symbolStack[symbolStackLength] = symbol;
                     
                     state = stateStack[stateStackLength];
+                    if(TRACE) {
+                        ++symbolStackLength;
+                        printState(state);
+                        --symbolStackLength;
+                        System.out.println("    nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]);
+                    }
                     action = getGoto(state, PRODUCT_LHS[action]);
+                    if(TRACE)
+                        System.out.println("    -> action=" + describeAction(true, action));
+                        
                     // Pop state
                     if((action & POP_MASK) != 0) {
                         --stateStackLength;
@@ -637,7 +662,7 @@ public abstract class SCLParser {
         case 124:
             return reduceApplyType();
         case 125:
-            return reduceDummy1();
+            return reduceDummy();
 
         default:
             throw new RuntimeException("Internal parser error.");
@@ -1136,7 +1161,7 @@ public abstract class SCLParser {
     /**
      * dummy ::= COMMENT EOL
      */
-    protected abstract Object reduceDummy1();
+    protected abstract Object reduceDummy();
 
     protected void postReduce(Object reduced) {
     }
index 7477c38900b7f7c05e8eef4dcc3c4757c64a8755..5498b16b5f0552df54669752cd807e6dde64a53b 100644 (file)
@@ -882,11 +882,6 @@ public class SCLParserImpl extends SCLParser {
         return new TApplyAst((TypeAst)get(0), parameters);
     }
 
-    @Override
-    protected Object reduceDummy1() {
-        throw new UnsupportedOperationException();
-    }
-
     @SuppressWarnings("unchecked")
     @Override
     protected void postReduce(Object reduced) {
@@ -1279,4 +1274,9 @@ public class SCLParserImpl extends SCLParser {
         return new CHRStatement((ListQualifier[])get(1), (ListQualifier[])get(3));
     }
 
+    @Override
+    protected Object reduceDummy() {
+        throw new UnsupportedOperationException();
+    }
+
 }
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/GenerateSCLParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/GenerateSCLParser.java
new file mode 100644 (file)
index 0000000..abfc214
--- /dev/null
@@ -0,0 +1,19 @@
+package org.simantics.scl.compiler.parser;
+
+import java.io.File;
+
+import org.simantics.scl.compiler.parser.generator.ParserGenerator;
+
+public class GenerateSCLParser {
+
+    public static void main(String[] args) throws Exception {
+        File plugin = new File(GenerateSCLParser.class.getResource(".").getPath().replace("%20", " "))
+        .getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile();
+        System.out.println(plugin);
+        ParserGenerator.createParser(
+                "org.simantics.scl.compiler.internal.parsing.parser",
+                "SCLSyntaxErrorException",
+                new File(plugin, "src/org/simantics/scl/compiler/internal/parsing/parser/SCL.grammar"));
+    }
+    
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/ParserGenerator.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/ParserGenerator.java
new file mode 100644 (file)
index 0000000..886ce72
--- /dev/null
@@ -0,0 +1,54 @@
+package org.simantics.scl.compiler.parser.generator;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.simantics.scl.compiler.parser.generator.compression.CompressedParseTable;
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.java.GenerateEnum;
+import org.simantics.scl.compiler.parser.generator.java.GenerateParser;
+import org.simantics.scl.compiler.parser.generator.table.ParseTable;
+import org.simantics.scl.compiler.parser.generator.table.ParseTableBuilder;
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.input.GrammarParserImpl;
+
+public class ParserGenerator {
+    
+    public static void createParser(String packageName, String exceptionName, File grammarFile) throws IOException {
+        // Read grammar and check it
+        FileInputStream inputStream = new FileInputStream(grammarFile);
+        Grammar grammar = GrammarParserImpl.read(inputStream);
+        inputStream.close();
+        grammar.check();
+        
+        AnaGrammar anaGrammar = new AnaGrammar(grammar);
+        ParseTable table = ParseTableBuilder.build(anaGrammar);
+
+        File directory = grammarFile.getParentFile();
+        String name = grammarFile.getName();
+        {
+            int p = name.lastIndexOf('.');
+            if(p > 0)
+                name = name.substring(0, p);
+        }
+
+        // Write parse table
+        CompressedParseTable compressedTable = table.compress();
+        compressedTable.writeTo(new File(directory, name + "Parser.dat"));
+
+        // Write classes
+        {
+            String className = name + "Terminals"; 
+            GenerateEnum.generate(new File(directory, className + ".java"), 
+                    packageName, className,
+                    anaGrammar.terminalNames);
+        }
+        {
+            String className = name + "Parser";
+            new GenerateParser(packageName, className, anaGrammar, exceptionName, compressedTable)
+            .generate(new File(directory, className + ".java"));
+        }
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressTable.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressTable.java
new file mode 100644 (file)
index 0000000..d2b7971
--- /dev/null
@@ -0,0 +1,101 @@
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.util.Arrays;
+
+public class CompressTable {
+
+    private static class Row implements Comparable<Row> {
+        int id;
+        int density;
+        int minPos;
+        int maxPos;
+        int[] data;
+        int displacement;
+        
+        @Override
+        public int compareTo(Row o) {
+            return o.density < density ? 1 : o.density > density ? -1 : 0;
+        }
+    }
+    
+    public static CompressedTable compress(int[][] table_) {
+        // Sort table by size
+        Row[] table = new Row[table_.length];
+        for(int i=0;i<table.length;++i) {
+            Row row = new Row();
+            row.id = i;
+            row.data = table_[i];
+            int density = 0;
+            int minPos = -1;
+            int maxPos = -1;
+            for(int j=0;j<row.data.length;++j) {
+                int el = row.data[j];
+                if(el != 0) {
+                    ++density;
+                    if(minPos == -1)
+                        minPos = j;
+                    maxPos = j;
+                }
+            }
+            row.density = density;
+            row.minPos = minPos;
+            row.maxPos = maxPos;
+            table[i] = row;
+        }
+        Arrays.sort(table);
+        
+        // Start to fill
+        int width = table_.length == 0 ? 0 : table_[0].length;
+        int capacity = 2*table_.length*width;
+        int[] rowIds = new int[capacity];
+        Arrays.fill(rowIds, -1);
+        int[] values = new int[capacity];
+        for(Row row : table) {
+            if(row.density == 0) {
+                row.displacement = capacity/2;
+                continue;
+            }
+            tryAgain: for(int dId=0;dId<capacity;++dId) {
+                int d = capacity/2 + ((dId&1)==0 ? dId/2 : -1-dId/2);
+                for(int p = row.minPos;p<=row.maxPos;++p) {
+                    int newValue = row.data[p];
+                    
+                    if(newValue != 0) {
+                        int id = d+p;
+                        if(rowIds[id] >= 0)
+                            continue tryAgain;
+                    }
+                }
+                row.displacement = d;
+                for(int p = row.minPos;p<=row.maxPos;++p) {
+                    int val = row.data[p];
+                    if(val != 0) {
+                        int id = d+p;
+                        rowIds[id] = row.id;
+                        values[id] = val;
+                    }
+                }
+                break;
+            }
+        }
+        
+        // Produce final tables
+        int minDis=capacity;
+        int maxDis=0;
+        for(Row row : table) {
+            int d = row.displacement;
+            if(d < minDis)
+                minDis = d;
+            if(d > maxDis)
+                maxDis = d;
+        }
+        int[] displacement = new int[table.length];
+        for(int i=0;i<table.length;++i)
+            displacement[table[i].id] = table[i].displacement-minDis;
+        rowIds = Arrays.copyOfRange(rowIds, minDis, maxDis+width);
+        values = Arrays.copyOfRange(values, minDis, maxDis+width);
+        
+        return new CompressedTable(rowIds, values, displacement);
+    }
+    
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressedParseTable.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressedParseTable.java
new file mode 100644 (file)
index 0000000..039f253
--- /dev/null
@@ -0,0 +1,40 @@
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+public class CompressedParseTable {
+    public final CompressedTable actionTable;
+    public final int[] errorTable;
+    public final CompressedTable gotoTable;
+    public final int[] productionInfo;
+    public final int[] initialStates;
+    public final String[] stateDescriptions;
+    
+    public CompressedParseTable(CompressedTable actionTable,
+            int[] errorTable,
+            CompressedTable gotoTable, int[] productionInfo,
+            int[] initialStates,
+            String[] stateDescriptions) {
+        this.actionTable = actionTable;
+        this.errorTable = errorTable;
+        this.gotoTable = gotoTable;
+        this.productionInfo = productionInfo;
+        this.initialStates = initialStates;
+        this.stateDescriptions = stateDescriptions;
+    }
+    
+    public void writeTo(File file) throws IOException {
+        FileOutputStream stream = new FileOutputStream(file);
+        DataOutputStream output = new DataOutputStream(stream);
+        actionTable.writeTo(output);
+        for(int val : errorTable)
+            output.writeInt(val);
+        gotoTable.writeTo(output);
+        for(int val : productionInfo)
+            output.writeInt(val);
+        output.close();
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressedTable.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/CompressedTable.java
new file mode 100644 (file)
index 0000000..b0a6c45
--- /dev/null
@@ -0,0 +1,26 @@
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+
+public class CompressedTable {
+    public final int[] rowIndex;
+    public final int[] columnIndex;
+    public final int[] table;
+    
+    public CompressedTable(int[] rowIndex, int[] columnIndex, int[] table) {
+        this.rowIndex = rowIndex;
+        this.columnIndex = columnIndex;
+        this.table = table;
+    }
+    
+    public void writeTo(DataOutputStream output) throws IOException {
+        for(int v : rowIndex)
+            output.writeInt(v);
+        for(int v : columnIndex)
+            output.writeInt(v);
+        for(int v : table)
+            output.writeShort(v);
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/ErrorTable.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/ErrorTable.java
new file mode 100644 (file)
index 0000000..cd129bc
--- /dev/null
@@ -0,0 +1,18 @@
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import org.simantics.scl.compiler.parser.generator.table.ParseTableBuilder;
+
+public class ErrorTable {
+    public static int[] createErrorTable(int[][] table) {
+        int columns = table[0].length;
+        int[] result = new int[(table.length*columns+31)/32];
+        int p=0;
+        for(int[] row : table)
+            for(int v : row) {
+                if(v == ParseTableBuilder.ERROR_ACTION)
+                    result[p/32] |= 1 << (p%32); 
+                ++p;
+            }
+        return result;
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/GCCompress.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/GCCompress.java
new file mode 100644 (file)
index 0000000..b714fe7
--- /dev/null
@@ -0,0 +1,116 @@
+package org.simantics.scl.compiler.parser.generator.compression;
+
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.generator.table.ParseTableBuilder;
+
+public class GCCompress {
+    
+    public static final int DONT_CARE = ParseTableBuilder.ERROR_ACTION;
+    
+    private static int[][] compressRows(int[] colors, final int[][] table) {
+        final int columns = table[0].length;
+        int colorCount = GraphColoring.color(colors, new GraphColoring.ColGraph() {
+            @Override
+            public int size() {
+                return table.length;
+            }
+            
+            @Override
+            public boolean areConnected(int a, int b) {
+                int[] aRow = table[a];
+                int[] bRow = table[b];
+                for(int i=0;i<columns;++i) {
+                    int aV = aRow[i];
+                    int bV = bRow[i];
+                    if(aV != DONT_CARE && bV != DONT_CARE && aV != bV)
+                        return true;
+                }
+                return false;
+            }
+        });
+        
+        final int[][] compr = new int[colorCount][columns];
+        for(int i=0;i<compr.length;++i)
+            Arrays.fill(compr[i], DONT_CARE);
+        
+        for(int i=0;i<table.length;++i) {
+            int color = colors[i];
+            int[] inRow = table[i];
+            int[] outRow = compr[color];
+            for(int j=0;j<columns;++j) {
+                int v = inRow[j];
+                if(v != DONT_CARE)
+                    outRow[j] = v;
+            }
+        }
+        
+        return compr;
+    }
+    
+    private static int[][] compressColumns(int[] colors, final int[][] table) {
+        final int columns = table[0].length;
+        int colorCount = GraphColoring.color(colors, new GraphColoring.ColGraph() {
+            @Override
+            public int size() {
+                return columns;
+            }
+            
+            @Override
+            public boolean areConnected(int a, int b) {
+                for(int i=0;i<table.length;++i) {
+                    int[] row = table[i];
+                    int aV = row[a];
+                    int bV = row[b];
+                    if(aV != DONT_CARE && bV != DONT_CARE && aV != bV)
+                        return true;
+                }
+                return false;
+            }
+        });
+        
+        final int[][] compr = new int[table.length][colorCount];
+        for(int i=0;i<compr.length;++i) {
+            Arrays.fill(compr[i], DONT_CARE);
+        }
+        for(int i=0;i<table.length;++i) {
+            for(int j=0;j<columns;++j) {
+                int v = table[i][j];
+                if(v != DONT_CARE) {
+                    int color = colors[j];
+                    compr[i][color] = v;
+                }
+            }
+        }
+        
+        return compr;
+    }
+    
+    public static CompressedTable compress(int[][] table) {
+        System.out.println("Compress:");
+        System.out.println("    Rows: " + table.length);
+        System.out.println("    Columns: " + table[0].length);
+        
+        int[] rowIndex = new int[table.length];
+        int[] columnIndex = new int[table[0].length];
+        
+        table = compressRows(rowIndex, table);
+        table = compressColumns(columnIndex, table);
+        
+        System.out.println("    Compressed rows: " + table.length);
+        System.out.println("    Compressed columns: " + table[0].length);
+        
+        int columns = table[0].length;        
+        
+        int[] compressedTable = new int[table.length * columns];
+        for(int i=0;i<table.length;++i)
+            System.arraycopy(table[i], 0, compressedTable, i*columns, columns);
+        for(int i=0;i<rowIndex.length;++i)
+            rowIndex[i] *= columns;
+        return new CompressedTable(rowIndex, columnIndex, compressedTable);
+        
+        //for(int[] row : table)
+        //    System.out.println(Arrays.toString(row));
+    }
+    
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/GraphColoring.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/compression/GraphColoring.java
new file mode 100644 (file)
index 0000000..f0d261f
--- /dev/null
@@ -0,0 +1,78 @@
+package org.simantics.scl.compiler.parser.generator.compression;
+
+
+public class GraphColoring {
+
+    public static interface ColGraph {
+        int size();
+        boolean areConnected(int a, int b);
+    }
+
+    /**
+     * Finds a coloring of the graph so that all connected vertices
+     * have a different color. Returns the number of colors used.
+     */
+    public static int color(int[] color, ColGraph graph) {
+        int size = graph.size();
+
+        // unassigned/unassignedCount is a list of unassigned vertex ids
+        int[] unassigned = new int[size];
+        for(int i=0;i<size;++i)
+            unassigned[i] = i;
+        int unassignedCount = size;
+
+        // blockedCount[vertexId] is the number of blocked colors for vertexId
+        int[] blockedCount = new int[size];
+        
+        // blocked[color][vertexId] is true, if color is not a possible color for vertexId
+        boolean[][] blocked = new boolean[size][size];
+        
+        // Number of colors currently in use
+        int colorCount = 0;
+
+        while(unassignedCount > 0) {
+            // Choose a vertex is bestId from unassigned list so that
+            // bestBlockedCount=blockedCount[bestId] is maximized
+            int bestId = 0;
+            int bestUnassignedTableId = 0;
+            int bestBlockedCount = -1;
+            for(int i=0;i<unassignedCount;++i) {
+                int id = unassigned[i];
+                int tempCount = blockedCount[id];
+                if(tempCount > bestBlockedCount) {
+                    bestUnassignedTableId = i;
+                    bestId = id;
+                    bestBlockedCount = tempCount;
+                }
+            }
+
+            // Remove from unassigned table
+            unassigned[bestUnassignedTableId] = unassigned[--unassignedCount];
+
+            // Choose color
+            int chosenColor = 0;
+            if(bestBlockedCount == colorCount) // All colors are blocked 
+                chosenColor = colorCount++;
+            else { // There is unblocked color
+                for(int i=0;i<colorCount;++i)
+                    if(!blocked[i][bestId]) {
+                        chosenColor = i;
+                        break;
+                    }
+            }
+            color[bestId] = chosenColor;
+
+            // Update blocked table
+            boolean[] blockedRow = blocked[chosenColor];
+            for(int i=0;i<unassignedCount;++i) {
+                int id = unassigned[i];
+                if(!blockedRow[id] && graph.areConnected(bestId, id)) {
+                    blockedRow[id] = true;
+                    ++blockedCount[id];
+                }
+            }
+        }
+        return colorCount;
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/AnaGrammar.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/AnaGrammar.java
new file mode 100644 (file)
index 0000000..dc79e1e
--- /dev/null
@@ -0,0 +1,234 @@
+package org.simantics.scl.compiler.parser.generator.grammar;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntByteHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.THashSet;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.Production;
+import org.simantics.scl.compiler.parser.regexp.Namer;
+import org.simantics.scl.compiler.parser.regexp.automata.DFA;
+
+public class AnaGrammar implements Namer {
+    
+    public ArrayList<Prod> prods = new ArrayList<Prod>(); 
+    public int[] nonterminalPos;
+    
+    public final String[] terminalNames;
+    public final String[] nonterminalNames;
+
+    public final int[] initialNonterminals;
+    
+    public boolean[] nullable;
+    public int[][] first;
+    
+    
+    public AnaGrammar(Grammar grammar) {
+        initialNonterminals = grammar.initialNonterminals;
+        terminalNames = Arrays.copyOf(grammar.terminalNames, grammar.terminalNames.length+1);
+        terminalNames[terminalNames.length-1] = "EOF";
+        nonterminalNames = Arrays.copyOf(grammar.nonterminalNames,
+                grammar.nonterminalNames.length+initialNonterminals.length);
+        for(int i=1;i<=initialNonterminals.length;++i)
+            nonterminalNames[nonterminalNames.length-i] = "init$" + i;
+        
+        // Convert grammar
+        ArrayList<Prod>[] prodMap = new ArrayList[nonterminalNames.length];
+        for(int i=0;i<nonterminalNames.length;++i)
+            prodMap[i] = new ArrayList<Prod>(); 
+        for(Production production : grammar.productions) {
+            Prod prod = new Prod(production.name, ~production.lhs,
+                    production.rhs.toAutomaton().determinize().minimize(),
+                    production.annotations);
+            prodMap[prod.lhs].add(prod);
+        }
+        
+        // Initial production
+        for(int i=1;i<=initialNonterminals.length;++i) {
+            DFA dfa = new DFA();
+            int s0 = dfa.newState();
+            dfa.setInitialState(s0);
+            int s1 = dfa.newState();
+            dfa.addTransition(s0, initialNonterminals[i-1], s1);
+            int s2 = dfa.newState();
+            dfa.addTransition(s1, terminalNames.length-1, s2);
+            dfa.setAccepts(s2, true);
+            Prod prod = new Prod("Init", nonterminalNames.length-i, dfa, new TIntByteHashMap());
+            prodMap[prod.lhs].add(prod);
+        }
+        
+        TIntArrayList pos = new TIntArrayList();
+        for(int i=0;i<nonterminalNames.length;++i) {
+            pos.add(prods.size());
+            prods.addAll(prodMap[i]);
+        }
+        pos.add(prods.size());
+        nonterminalPos = pos.toArray();
+        
+        // Basic analysis
+        computeNullableNonterminals();
+    }
+    
+    private static class FrontierItem {
+        public final int production;
+        public final int state;
+        
+        public FrontierItem(int production, int state) {
+            this.production = production;
+            this.state = state;
+        }
+        
+        @Override
+        public int hashCode() {
+            return 31*state + production;
+        }
+        
+        @Override
+        public boolean equals(Object obj) {
+            if(this == obj)
+                return true;
+            if(obj == null || obj.getClass() != getClass())
+                return false;
+            FrontierItem other = (FrontierItem)obj;
+            return other.state == state && other.production == production;
+        }
+    }
+    
+    private void computeNullableNonterminals() {
+        int nonterminalCount = nonterminalNames.length;
+        nullable = new boolean[nonterminalCount];
+        final TIntHashSet[] first = new TIntHashSet[nonterminalCount];
+        final TIntHashSet[] gfirst = new TIntHashSet[nonterminalCount];
+        for(int i=0;i<nonterminalCount;++i) {
+            first[i] = new TIntHashSet();
+            gfirst[i] = new TIntHashSet();
+        }
+        final ArrayList<FrontierItem>[] pendingItems = new ArrayList[nonterminalCount];
+        final ArrayList<FrontierItem> stack = new ArrayList<FrontierItem>();
+        THashSet<FrontierItem> handledItems = new THashSet<FrontierItem>();
+        for(int i=0;i<nonterminalCount;++i)
+            pendingItems[i] = new ArrayList<FrontierItem>();
+        
+        for(int i=0;i<prods.size();++i)
+            stack.add(new FrontierItem(i, prods.get(i).rhs.getInitialState()));
+        while(!stack.isEmpty()) {
+            final FrontierItem cur = stack.remove(stack.size()-1);
+            if(!handledItems.add(cur))
+                continue;
+            final Prod curProd = prods.get(cur.production);
+            if(curProd.rhs.getAccepts(cur.state)) {
+                nullable[curProd.lhs] = true;
+                stack.addAll(pendingItems[curProd.lhs]);
+            }
+            curProd.rhs.forEachTransition(cur.state, new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int targetState) {
+                    if(symbol < 0) {
+                        symbol = ~symbol;
+                        gfirst[symbol].add(curProd.lhs);
+                        FrontierItem item = new FrontierItem(cur.production, targetState);
+                        if(nullable[symbol])
+                            stack.add(item);
+                        else
+                            pendingItems[symbol].add(item);
+                    }
+                    else 
+                        first[curProd.lhs].add(symbol);
+                    return true;
+                }
+            });
+        }
+        
+        gclose(first, gfirst);
+        this.first = new int[first.length][];
+        for(int i=0;i<first.length;++i)
+            this.first[i] = first[i].toArray();
+        
+        /*System.out.println("Nullable nonterminals:");
+        for(int i=0;i<nullable.length;++i) {
+            System.out.print("    " + nonterminalNames[i] + " -> ");
+            if(nullable[i])
+                System.out.print(" NULL");
+            for(int s : this.first[i])
+                System.out.print(" " + terminalNames[s]);
+            System.out.println();
+        }*/
+    }
+    
+    public static void gclose(TIntHashSet[] sets, TIntHashSet[] graph_) {
+        int[][] graph = new int[graph_.length][];
+        for(int i=0;i<graph.length;++i)
+            graph[i] = graph_[i].toArray();
+        
+        final TIntArrayList stack = new TIntArrayList();
+        for(int i=0;i<sets.length;++i) {
+            final int i_ = i;
+            sets[i].forEach(new TIntProcedure() {
+                @Override
+                public boolean execute(int value) {
+                    stack.add(i_);
+                    stack.add(value);
+                    return true;
+                }
+            });
+        }
+                
+        while(!stack.isEmpty()) {
+            int sp = stack.size();
+            int value = stack.removeAt(sp-1);
+            int id = stack.removeAt(sp-2);
+            for(int s : graph[id])
+                if(sets[s].add(value)) {
+                    stack.add(s);
+                    stack.add(value);
+                }
+        }
+    }
+
+    public String getName(int symbolId) {
+        if(symbolId >= 0)
+            return terminalNames[symbolId];
+        else
+            return nonterminalNames[~symbolId];
+    }
+
+    private class AlmostAcceptsProc implements  TIntIntProcedure {
+        DFA rhs;
+        TIntHashSet visited = new TIntHashSet();
+        
+        boolean result;
+        
+        public AlmostAcceptsProc(DFA rhs) {
+            this.rhs = rhs;
+        }
+
+        @Override
+        public boolean execute(int a, int b) {
+            if(a < 0 && nullable[~a])
+                visit(b);
+            return !result;
+        }
+
+        public void visit(int position) {
+            if(visited.add(position)) {
+                if(rhs.getAccepts(position)) {
+                    result = true;
+                    return;
+                }
+                rhs.forEachTransition(position, this);
+            }
+        }
+    }
+    
+    public boolean almostAccepts(DFA rhs, int position) {
+        AlmostAcceptsProc proc = new AlmostAcceptsProc(rhs);
+        proc.visit(position);
+        return proc.result;
+    }    
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/Prod.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/Prod.java
new file mode 100644 (file)
index 0000000..6a6afca
--- /dev/null
@@ -0,0 +1,27 @@
+package org.simantics.scl.compiler.parser.generator.grammar;
+
+import org.simantics.scl.compiler.parser.regexp.automata.DFA;
+
+import gnu.trove.map.hash.TIntByteHashMap;
+
+public class Prod {
+    public final String name;
+    public final int lhs;
+    public final DFA rhs;
+    public final TIntByteHashMap annotations;
+    
+    public Prod(String name, int lhs, DFA rhs, TIntByteHashMap annotations) {
+        this.name = name;
+        this.lhs = lhs;
+        this.rhs = rhs;
+        this.annotations = annotations;
+    }
+
+    public String toString(AnaGrammar grammar) {
+        StringBuilder b = new StringBuilder();
+        b.append(grammar.nonterminalNames[lhs]);
+        b.append(" ::= ");
+        rhs.toRegexp().toString(b, grammar, 0);
+        return b.toString();
+    }
+}
\ No newline at end of file
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateEnum.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateEnum.java
new file mode 100644 (file)
index 0000000..efda07e
--- /dev/null
@@ -0,0 +1,20 @@
+package org.simantics.scl.compiler.parser.generator.java;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+
+public class GenerateEnum {
+
+    public static void generate(File file, String packageName, String className, String[] names) throws IOException {
+        PrintStream out = new PrintStream(file);
+        out.println("package " + packageName + ";");
+        out.println();
+        out.println("public interface " + className + " {");
+        for(int i=0;i<names.length;++i)
+            out.println("    public static final int " + names[i] + " = " + i + ";");
+        out.println("}");
+        out.close();
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/GenerateParser.java
new file mode 100644 (file)
index 0000000..1b7df54
--- /dev/null
@@ -0,0 +1,182 @@
+package org.simantics.scl.compiler.parser.generator.java;
+
+import gnu.trove.set.hash.THashSet;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.Arrays;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.simantics.scl.compiler.parser.generator.compression.CompressedParseTable;
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.grammar.Prod;
+
+public class GenerateParser {
+
+    String packageName;
+    String className;
+    String exceptionName;
+    AnaGrammar grammar;
+    CompressedParseTable table;
+    
+    public GenerateParser(String packageName, String className,
+            AnaGrammar grammar, String exceptionName, CompressedParseTable table) {
+        this.packageName = packageName;
+        this.className = className;
+        this.grammar = grammar;
+        this.exceptionName = exceptionName;
+        this.table = table;
+    }
+
+    private String readTemplate() throws IOException {
+        InputStream stream = getClass().getResourceAsStream("Parser.java.template");
+        byte[] buffer = new byte[10000];
+        int pos = 0;
+        while(true) {
+            int c = stream.read(buffer, pos, buffer.length-pos);
+            if(c <= 0)
+                break;
+            pos += c;
+            if(pos == buffer.length)
+                buffer = Arrays.copyOf(buffer, buffer.length*2);
+        }
+        return new String(buffer, 0, pos, "UTF-8");
+    }
+    
+    private static final Pattern PATTERN = Pattern.compile("\\$[A-Za-z0-9]+\\$");
+    
+    public void generate(File file) throws IOException {
+        PrintStream out = new PrintStream(file, "UTF-8");
+        
+        int aPos = 0;
+        String template = readTemplate();
+        Matcher matcher = PATTERN.matcher(template);
+        while(matcher.find()) {
+            int start = matcher.start();
+            int end = matcher.end();
+            out.print(template.substring(aPos, start));
+            String varName = template.substring(start+1, end-1);
+            generateVar(out, varName);
+            aPos = end;
+        }
+        out.print(template.substring(aPos));
+        
+        out.close();
+    }
+    
+    private void generateVar(PrintStream out, String varName) {
+        if("package".equals(varName)) {
+            out.print(packageName);
+        }
+        else if("class".equals(varName)) {
+            out.print(className);
+        }
+        else if("terminalCount".equals(varName)) {
+            out.print(grammar.terminalNames.length);
+        }
+        else if("nonterminalCount".equals(varName)) {
+            out.print(grammar.nonterminalNames.length);
+        }
+        else if("productCount".equals(varName)) {
+            out.print(grammar.prods.size());
+        }
+        else if("stateCount".equals(varName)) {
+            out.print(table.actionTable.rowIndex.length);
+        }
+        else if("parseMethods".equals(varName)) {
+            for(int i=0;i<grammar.initialNonterminals.length;++i) {
+                String ntName = grammar.getName(grammar.initialNonterminals[i]);
+                ntName = ntName.substring(0, 1).toUpperCase() + ntName.substring(1);
+                out.println("    public Object parse" + ntName + "() {");
+                out.println("        return parse(" + table.initialStates[i] + ");");
+                out.println("    }");
+            }
+        }
+        else if("terminalNames".equals(varName)) {
+            for(int i=0;i<grammar.terminalNames.length;++i) {
+                if(i > 0)
+                    out.println(",");
+                out.print("        \"" + grammar.terminalNames[i] + "\"");
+            }
+        }
+        else if("nonterminalNames".equals(varName)) {
+            for(int i=0;i<grammar.nonterminalNames.length;++i) {
+                if(i > 0)
+                    out.println(",");
+                out.print("        \"" + grammar.nonterminalNames[i] + "\"");
+            }
+        }
+        else if("stateDescriptions".equals(varName)) {
+            for(int i=0;i<table.stateDescriptions.length;++i) {
+                if(i > 0)
+                    out.println(",");
+                out.print("        \"" + table.stateDescriptions[i].replace("\n", "\\n") + "\"");
+            }
+        }
+        else if("reduceCases".equals(varName)) {
+            for(int i=0;i<grammar.prods.size();++i) {
+                Prod prod = grammar.prods.get(i);
+                if(grammar.nonterminalNames[prod.lhs].startsWith("init$"))
+                    continue;
+                out.println("        case " + i + ":");
+                out.println("            return reduce" + prod.name + "();");
+            }
+        }    
+        else if("reduceDelegates".equals(varName)) {
+            for(int i=0;i<grammar.prods.size();++i) {
+                Prod prod = grammar.prods.get(i);
+                if(grammar.nonterminalNames[prod.lhs].startsWith("init$"))
+                    out.println("        null,");
+                else {
+                    out.println("        new ReduceDelegate() {");
+                    out.println("            public Object reduce(" + className + " parser) {");
+                    out.println("                return parser.reduce" + prod.name + "();");
+                    out.println("            }");
+                    out.println("        },");
+                }
+            }
+        }  
+        else if("reduceMethods".equals(varName)) {
+            THashSet<String> usedNames = new THashSet<String>();
+            for(int i=0;i<grammar.prods.size();++i) {
+                Prod prod = grammar.prods.get(i);
+                if(grammar.nonterminalNames[prod.lhs].startsWith("init$"))
+                    continue;
+                if(usedNames.add(prod.name)) {
+                    out.println("    /**");
+                    out.println("     * " + prod.toString(grammar).replace("*", "&#42;"));
+                    out.println("     */");
+                    out.println("    protected abstract Object reduce" + prod.name + "();");
+                }
+            }
+        }
+        else if("Token".equals(varName)) {
+            out.print("Token");
+        }
+        else if("Symbol".equals(varName)) {
+            out.print("Object");
+        }
+        else if("tokenId".equals(varName)) {
+            out.print("id");
+        }
+        else if("imports".equals(varName)) {
+            out.println("import org.simantics.scl.compiler.internal.parsing.Token;");
+        }
+        else if("actionTableLength".equals(varName)) {
+            out.print(table.actionTable.table.length);
+        }
+        else if("gotoTableLength".equals(varName)) {
+            out.print(table.gotoTable.table.length);
+        }
+        else if("errorTableLength".equals(varName)) {
+            out.print(table.errorTable.length);
+        }
+        else if("Exception".equals(varName)) {
+            out.print(exceptionName);
+        }
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/Parser.java.template b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/java/Parser.java.template
new file mode 100644 (file)
index 0000000..233e7ac
--- /dev/null
@@ -0,0 +1,287 @@
+package $package$;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
+
+$imports$
+public abstract class $class$ {    
+    public static final boolean TRACE = false;
+
+    private static final int INITIAL_CAPACITY = 16;
+    private static final int STATE_COUNT = $stateCount$;
+    private static final int TERMINAL_COUNT = $terminalCount$;
+    private static final int NONTERMINAL_COUNT = $nonterminalCount$;
+    private static final int PRODUCT_COUNT = $productCount$;
+    
+    private static final int[] ACTION_ROW_ID = new int[STATE_COUNT];
+    private static final int[] ACTION_COLUMN_ID = new int[TERMINAL_COUNT];
+    private static final short[] ACTION_TABLE = new short[$actionTableLength$];
+    private static final int[] ERROR_TABLE = new int[$errorTableLength$];
+    private static final int[] GOTO_ROW_ID = new int[STATE_COUNT];
+    private static final int[] GOTO_COLUMN_ID = new int[NONTERMINAL_COUNT];
+    private static final short[] GOTO_TABLE = new short[$gotoTableLength$];
+    private static final int[] PRODUCT_LHS = new int[PRODUCT_COUNT];
+
+    private static final short STATE_MASK = (short)0x0fff;
+    private static final short REDUCE_MASK = (short)0x8000;
+    private static final short POP_MASK = (short)0x4000;
+    private static final short PUSH_MASK = (short)0x2000;
+    private static final short ERROR_ACTION = (short)0xffff;
+    private static final short ACCEPT_ACTION = (short)0xfffe;
+    
+    public static final String[] TERMINAL_NAMES = new String[] {
+$terminalNames$
+    };
+
+    public static final String[] NONTERMINAL_NAMES = new String[] {
+$nonterminalNames$
+    };
+        
+    static {
+        try {
+            DataInputStream input = new DataInputStream($class$.class.getResourceAsStream("$class$.dat"));
+            for(int i=0;i<ACTION_ROW_ID.length;++i)
+                ACTION_ROW_ID[i] = input.readInt();
+            for(int i=0;i<ACTION_COLUMN_ID.length;++i)
+                ACTION_COLUMN_ID[i] = input.readInt();    
+            for(int i=0;i<ACTION_TABLE.length;++i)
+                ACTION_TABLE[i] = input.readShort();
+            for(int i=0;i<ERROR_TABLE.length;++i)
+                ERROR_TABLE[i] = input.readInt();
+            for(int i=0;i<GOTO_ROW_ID.length;++i)
+                GOTO_ROW_ID[i] = input.readInt();
+            for(int i=0;i<GOTO_COLUMN_ID.length;++i)
+                GOTO_COLUMN_ID[i] = input.readInt();    
+            for(int i=0;i<GOTO_TABLE.length;++i)
+                GOTO_TABLE[i] = input.readShort();
+            for(int i=0;i<PRODUCT_LHS.length;++i)
+                PRODUCT_LHS[i] = input.readInt();
+            input.close();
+        } catch(IOException e) {
+            e.printStackTrace();
+        }
+    }
+    
+    private static short getAction(int state, int symbol) {
+        int id = TERMINAL_COUNT*state + symbol;
+        if( ((ERROR_TABLE[id>>5] >> (id&31))&1) != 0 )
+            return ERROR_ACTION;
+        return ACTION_TABLE[ACTION_ROW_ID[state] + ACTION_COLUMN_ID[symbol]];
+    }
+    
+    private static short getGoto(int state, int symbol) {
+        return GOTO_TABLE[GOTO_ROW_ID[state] + GOTO_COLUMN_ID[symbol]];
+    }
+    
+    protected abstract $Token$ nextToken();
+    
+    private $Symbol$[] symbolStack = new $Symbol$[INITIAL_CAPACITY];
+    private int symbolStackLength = 0;
+    
+    private int[] stateStack = new int[INITIAL_CAPACITY];
+    private int[] symbolStackPositionStack = new int[INITIAL_CAPACITY];
+    private int stateStackLength = 0;
+    
+    // For reduce
+    private int reductionLength;
+    
+    protected int length() {
+        return reductionLength;
+    }
+    
+    protected $Symbol$ get(int i) {
+        if(i < 0 || i >= reductionLength)
+            throw new IndexOutOfBoundsException();
+        return symbolStack[symbolStackLength+i];
+    }
+    
+    private String parseErrorDescription(int state, $Token$ token, int tokenId) {
+        StringBuilder b = new StringBuilder();
+        b.append("Unexpected token '").append(token)
+         .append("' (").append(TERMINAL_NAMES[tokenId])
+         .append("). Expected one of ");
+        ArrayList<String> possibleTerminals = new ArrayList<String>();
+        for(int i=0;i<TERMINAL_COUNT;++i)
+            if(getAction(state, i) != ERROR_ACTION)
+                possibleTerminals.add(TERMINAL_NAMES[i]);
+        Collections.sort(possibleTerminals);
+        for(int i=0;i<possibleTerminals.size();++i) {
+            if(i > 0)
+                b.append(", ");
+            b.append(possibleTerminals.get(i));
+        }
+        b.append('.');
+        return b.toString();
+    }
+    
+    protected abstract RuntimeException syntaxError($Token$ token, String description);
+    
+    private static String describeAction(boolean isGoto, int action) {
+        if(action == ERROR_ACTION)
+            return "ERROR";
+        if(action == ACCEPT_ACTION)
+            return "ACCEPT";
+        StringBuilder b = new StringBuilder();
+        if(isGoto)
+            b.append("GOTO ");
+        else {
+            if((action & REDUCE_MASK) != 0) {
+                action ^= REDUCE_MASK;
+                b.append("REDUCE");
+            }
+            else
+                b.append("SHIFT");
+        }
+        if((action & POP_MASK) != 0) {
+            action ^= POP_MASK;
+            b.append(" POP");
+        }
+        if((action & PUSH_MASK) != 0) {
+            action ^= PUSH_MASK;
+            b.append(" PUSH");
+        }
+        b.append(' ').append(action);
+        return b.toString();
+    }
+    
+    private void printState(int state) {
+        System.out.print("state=" + state + ":");
+        for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {
+            Object s = symbolStack[i];
+            if(s instanceof Token)
+                System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
+            else if(s == null)
+                System.out.print(" null");
+            else
+                System.out.print(" " + s.getClass().getSimpleName());
+            while(j>=0 && symbolStackPositionStack[j]==i)
+                System.out.print(" (" + stateStack[j--] + ")");
+        }
+        System.out.println();
+    }
+    
+    private $Symbol$ parse(int state) {
+        while(true) {
+            $Token$ token = nextToken();
+            int tokenId = token.$tokenId$;
+            if(TRACE)
+                System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---");
+            while(true) {
+                if(TRACE)
+                    printState(state);
+                short action = getAction(state, tokenId);
+                if(TRACE)
+                    System.out.println("    -> action=" + describeAction(false, action));
+                //System.out.println(STATE_DESCRIPTIONS[state]);
+                if((action & REDUCE_MASK) != 0) {
+                    if(action == ACCEPT_ACTION)
+                        return symbolStack[symbolStackLength-1];
+                    if(action == ERROR_ACTION)
+                        throw syntaxError(token, parseErrorDescription(state, token, tokenId));
+                    int popAmount = (action >>> 13)&3;
+                    if(TRACE) {
+                        if(popAmount > 0)
+                            System.out.println("    POP " + popAmount);
+                    }
+                    stateStackLength -= popAmount;
+                    action &= STATE_MASK;
+                    
+                    int reductionBegin = symbolStackPositionStack[--stateStackLength];
+                    
+                    reductionLength = symbolStackLength-reductionBegin;
+                    symbolStackLength = reductionBegin;
+                    
+                    if(symbolStackLength == symbolStack.length)
+                        symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+                    $Symbol$ symbol = reduce(action);
+                    postReduce(symbol);
+                    symbolStack[symbolStackLength] = symbol;
+                    
+                    state = stateStack[stateStackLength];
+                    if(TRACE) {
+                        ++symbolStackLength;
+                        printState(state);
+                        --symbolStackLength;
+                        System.out.println("    nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]);
+                    }
+                    action = getGoto(state, PRODUCT_LHS[action]);
+                    if(TRACE)
+                        System.out.println("    -> action=" + describeAction(true, action));
+                        
+                    // Pop state
+                    if((action & POP_MASK) != 0) {
+                        --stateStackLength;
+                    }
+                    // Push state
+                    if((action & PUSH_MASK) != 0) {
+                        if(stateStackLength == stateStack.length) {
+                            stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+                            symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+                        }
+                        symbolStackPositionStack[stateStackLength] = symbolStackLength;
+                        stateStack[stateStackLength++] = state;
+                    }
+                    state = action & STATE_MASK;
+                    ++symbolStackLength;
+                }
+                else {
+                    // Pop state
+                    if((action & POP_MASK) != 0) {
+                        --stateStackLength;
+                    }
+                    // Push state
+                    if((action & PUSH_MASK) != 0) {
+                        if(stateStackLength == stateStack.length) {
+                            stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+                            symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+                        }
+                        symbolStackPositionStack[stateStackLength] = symbolStackLength;
+                        stateStack[stateStackLength++] = state;
+                    }
+                    
+                    // New state
+                    state = action & STATE_MASK;
+                    
+                    // Push symbol
+                    if(symbolStackLength == symbolStack.length)
+                        symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+                    symbolStack[symbolStackLength++] = token;
+                    break;
+                }
+            }
+        }
+    }
+    
+$parseMethods$
+
+    protected $Symbol$ reduce(int productionId) {
+        try {
+        switch(productionId) {
+$reduceCases$
+        default:
+            throw new RuntimeException("Internal parser error.");
+        }
+        } catch($Exception$ e) {
+            StringBuilder b = new StringBuilder();
+            b.append("Failed to reduce");
+            for(int i=0;i<length();++i) {
+                Object obj = get(i);
+                b.append("\n    (").append(i).append(") \"").append(obj).append('\"');
+                if(obj instanceof $Token$)
+                    b.append(" (").append(TERMINAL_NAMES[(($Token$)obj).$tokenId$]).append(")");
+                else
+                    b.append(" [").append(obj.getClass().getSimpleName()).append("]");
+            }
+            throw new RuntimeException(b.toString(), e);
+        } 
+    }
+
+$reduceMethods$
+    protected void postReduce($Symbol$ reduced) {
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/Item.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/Item.java
new file mode 100644 (file)
index 0000000..33d9519
--- /dev/null
@@ -0,0 +1,81 @@
+package org.simantics.scl.compiler.parser.generator.table;
+
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.grammar.Prod;
+
+
+public class Item implements Comparable<Item> {
+    public final int production;
+    public final int position;
+    public int stackPos;
+    
+    public Item(int production, int position, int stackPos) {
+        this.production = production;
+        this.position = position;
+        this.stackPos = stackPos;
+    }
+
+    public int[] nextSymbols(AnaGrammar grammar) {
+        Prod prod = grammar.prods.get(production);
+        return prod.rhs.nextStates(position);
+    }
+
+    public int nextPosition(AnaGrammar grammar, int symbol) {
+        Prod prod = grammar.prods.get(production);
+        return prod.rhs.getTransition(position, symbol);
+    }
+    
+    @Override
+    public int hashCode() {
+        return (production * 31 + position) * 31 + stackPos;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null || getClass() != obj.getClass())
+            return false;
+        Item other = (Item) obj;
+        return production == other.production
+                && position == other.position
+                && stackPos == other.stackPos;
+    }
+
+    @Override
+    public int compareTo(Item o) {
+        if(production < o.production)
+            return -1;
+        if(production > o.production)
+            return 1;
+        if(position < o.position)
+            return -1;
+        if(position > o.position)
+            return 1;
+        return 0;        
+    }
+    
+    public String toString(AnaGrammar grammar) {
+        Prod prod = grammar.prods.get(production);
+        StringBuilder b = new StringBuilder();
+        b.append(grammar.nonterminalNames[prod.lhs]);
+        b.append(" ::= ");
+        /*prod.rhs.toRegexpTo(position).toString(b, grammar, 0);
+        b.append(" . ");
+        prod.rhs.toRegexpFrom(position).toString(b, grammar, 0);*/
+        prod.rhs.toPositionalRegexp(position).toString(b, grammar, 0);
+        if(stackPos >= 0)
+            b.append(" (stack ").append(stackPos).append(')');
+        /*for(int i=0;i<position;++i)
+            b.append(' ').append(grammar.getName(prod.rhs[i]));
+        b.append(" *");
+        for(int i=position;i<prod.rhs.length;++i)
+            b.append(' ').append(grammar.getName(prod.rhs[i]));*/
+        return b.toString();
+    }
+
+    public boolean isReducible(AnaGrammar grammar) {
+        return grammar.prods.get(production).rhs.getAccepts(position);
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ItemSet.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ItemSet.java
new file mode 100644 (file)
index 0000000..0b82409
--- /dev/null
@@ -0,0 +1,61 @@
+package org.simantics.scl.compiler.parser.generator.table;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+
+public class ItemSet {
+    public final Item[] items;
+    int hash;
+
+    public ItemSet(Item[] items) {
+        this.items = items;
+        Arrays.sort(items);
+    }
+    
+    public ItemSet(Collection<Item> items) {
+        this(items.toArray(new Item[items.size()]));
+    }
+    
+    private static final int PROD = 31*31;
+    
+    @Override
+    public int hashCode() {
+        if(hash == 0) {
+            int h = 1;
+            for(Item item : items)
+                h = PROD*h + item.hashCode();
+            hash = h;
+        }
+        return hash;
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+        if(this == obj)
+            return true;
+        if(obj == null || obj.getClass() != getClass())
+            return false;
+        ItemSet other = (ItemSet)obj;
+        if(other.items.length != items.length)
+            return false;
+        for(int i=0;i<items.length;++i)
+            if(!items[i].equals(other.items[i]))
+                return false;
+        return true;
+    }
+    
+    public String toString(AnaGrammar grammar) {
+        StringBuilder b = new StringBuilder();
+            boolean first = true;
+        for(Item item : items) {
+            if(first)
+                first = false;
+            else
+                b.append('\n');
+            b.append("    ").append(item.toString(grammar));
+        }
+        return b.toString();
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTable.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTable.java
new file mode 100644 (file)
index 0000000..d79e968
--- /dev/null
@@ -0,0 +1,86 @@
+package org.simantics.scl.compiler.parser.generator.table;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.generator.compression.CompressedParseTable;
+import org.simantics.scl.compiler.parser.generator.compression.ErrorTable;
+import org.simantics.scl.compiler.parser.generator.compression.GCCompress;
+
+public class ParseTable {
+    public final int stateCount;
+    public final int[][] actionTable;
+    public final int[][] gotoTable;
+    public final int[] productionInfo;
+    public final int[] initialStates;
+    public final String[] stateDescriptions;
+    
+    public ParseTable(
+            int stateCount,
+            int[][] actionTable,
+            int[][] gotoTable,
+            int[] productionInfo,
+            int[] initialStates,
+            String[] stateDescriptions) {
+        this.stateCount = stateCount;
+        this.actionTable = actionTable;
+        this.gotoTable = gotoTable;
+        this.productionInfo = productionInfo;
+        this.initialStates = initialStates;
+        this.stateDescriptions = stateDescriptions;
+    }
+    
+    @Override
+    public String toString() {
+        StringBuilder b = new StringBuilder();
+        b.append("productionInfo = ").append(Arrays.toString(productionInfo)).append('\n');
+        b.append("actions:\n");
+        for(int[] actions : actionTable) {
+            b.append("    {");
+            for(int i=0;i<actions.length;++i) {
+                if(i > 0)
+                    b.append(", ");
+                b.append(actions[i]);
+            }
+            b.append("},\n");
+        }
+        b.append("gotos:\n");
+        for(int[] gotos : gotoTable) {
+            b.append("    {");
+            for(int i=0;i<gotos.length;++i) {
+                if(i > 0)
+                    b.append(", ");
+                b.append(gotos[i]);
+            }
+            b.append("},\n");
+        }
+        return b.toString();
+    }
+    
+    public CompressedParseTable compress() {
+        return new CompressedParseTable(
+                GCCompress.compress(actionTable), 
+                ErrorTable.createErrorTable(actionTable),
+                GCCompress.compress(gotoTable),
+                productionInfo,
+                initialStates,
+                stateDescriptions);
+    }
+    
+    public void writeTo(File file) throws IOException {
+        FileOutputStream stream = new FileOutputStream(file);
+        DataOutputStream output = new DataOutputStream(stream);
+        for(int[] row : actionTable)
+            for(int val : row)
+                output.writeInt(val);
+        for(int[] row : gotoTable)
+            for(int val : row)
+                output.writeInt(val);
+        for(int val : productionInfo)
+            output.writeInt(val);
+        output.close();
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java
new file mode 100644 (file)
index 0000000..da837fb
--- /dev/null
@@ -0,0 +1,515 @@
+package org.simantics.scl.compiler.parser.generator.table;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.grammar.Prod;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.list.array.TLongArrayList;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.map.hash.TIntObjectHashMap;
+import gnu.trove.map.hash.TLongIntHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntObjectProcedure;
+import gnu.trove.procedure.TObjectIntProcedure;
+import gnu.trove.set.hash.THashSet;
+import gnu.trove.set.hash.TIntHashSet;
+import gnu.trove.set.hash.TLongHashSet;
+
+public class ParseTableBuilder {
+    public final static int MAX_STACK_ID = 10;
+    
+    private static final int STATE_MASK = 0x0fff;
+    private static final int REDUCE_MASK = 0x8000;
+    private static final int POP_MASK = 0x4000;
+    private static final int PUSH_MASK = 0x2000;
+    public static final int ERROR_ACTION = 0xffff;
+    private static final int ACCEPT_ACTION = 0xfffe;
+
+    final AnaGrammar grammar;
+    private TObjectIntHashMap<ItemSet> states = new TObjectIntHashMap<ItemSet>();
+    private ArrayList<ItemSet> itemSets = new ArrayList<ItemSet>();
+    private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
+    private ArrayList<TIntIntHashMap> stackOps = new ArrayList<TIntIntHashMap>();
+    private TIntArrayList backTransSymbols = new TIntArrayList();
+    private ArrayList<TIntArrayList> backLinks = new ArrayList<TIntArrayList>();
+    int[] initialStates;
+    TIntHashSet finalStates = new TIntHashSet(); 
+
+    private ParseTableBuilder(AnaGrammar grammar) {
+        this.grammar = grammar;
+    }
+        
+    private static boolean isNonterminal(int symbol) {
+        return symbol < 0;
+    }
+    
+    private void close(ArrayList<Item> items) {
+        THashSet<Item> itemSet = new THashSet<Item>(items);
+        for(int i=0;i<items.size();++i) {
+            Item item = items.get(i);
+            for(int nextSymbol : item.nextSymbols(grammar))
+                if(isNonterminal(nextSymbol)) {
+                    nextSymbol = ~nextSymbol;
+                    int pEnd = grammar.nonterminalPos[nextSymbol+1];
+                    for(int pId=grammar.nonterminalPos[nextSymbol];pId<pEnd;++pId) {
+                        Item newItem = new Item(pId, grammar.prods.get(pId).rhs.getInitialState(), -1);
+                        if(itemSet.add(newItem))
+                            items.add(newItem);
+                    }
+                }                
+        }
+    }
+    
+    private int getState(int backTransSymbol, ArrayList<Item> items) {
+        // Create state
+        close(items);
+        final ItemSet itemSet = new ItemSet(items);
+        if(states.contains(itemSet))
+            return states.get(itemSet);
+        final int newState = states.size();
+        states.put(itemSet, newState);
+        itemSets.add(itemSet);
+        backTransSymbols.add(backTransSymbol);
+        backLinks.add(new TIntArrayList(2));
+        
+        // Create transitions
+        TIntObjectHashMap<ArrayList<Item>> transitionMap = new TIntObjectHashMap<ArrayList<Item>>();
+        //close(items);
+        for(Item item : items) {
+            for(int s : item.nextSymbols(grammar)) {
+                ArrayList<Item> l = transitionMap.get(s);
+                if(l == null) {
+                    l = new ArrayList<Item>();
+                    transitionMap.put(s, l);
+                }
+                l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos));
+            }
+        }
+        
+        final TIntIntHashMap trans = new TIntIntHashMap();
+        final TIntIntHashMap stackOpMap = new TIntIntHashMap();
+        transitions.add(trans);
+        stackOps.add(stackOpMap);
+        if(transitionMap.remove(grammar.terminalNames.length-1)!=null) {
+            finalStates.add(newState);
+        }
+        transitionMap.forEachEntry(new TIntObjectProcedure<ArrayList<Item>>() {
+            @Override
+            public boolean execute(int a, ArrayList<Item> b) {
+                boolean stackShift = false;
+                int minStackPos = Integer.MAX_VALUE;
+                for(Item item : b) {
+                    if(item.stackPos == -1)
+                        stackShift = true;
+                    else
+                        minStackPos = Math.min(minStackPos, item.stackPos);
+                }
+                int stackOp = 0;
+                if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) {
+                    stackOp |= POP_MASK;
+                    //System.out.println("minStackPos = " + minStackPos);
+                    for(Item item : b)
+                        if(item.stackPos >= 0)
+                            --item.stackPos;
+                }
+                boolean stackOverflow = false;
+                if(stackShift) {
+                    stackOp |= PUSH_MASK;
+                    for(Item item : b) {
+                        ++item.stackPos;
+                        if(item.stackPos > MAX_STACK_ID)
+                            stackOverflow = true;
+                    }
+                }
+                stackOpMap.put(a, stackOp);
+                System.out.println(newState + " " + grammar.getName(a) + " " + stackOp);
+                
+                if(stackOverflow) {
+                    System.err.println("Stack overflow when following " + grammar.getName(a) + " at");
+                    System.err.println(itemSet.toString(grammar));
+                }
+                else {
+                    int state = getState(a, b);
+                    trans.put(a, state);
+                    backLinks.get(state).add(newState);
+                }
+                return true;
+            }
+            
+        });
+        return newState;
+    }
+
+    TLongArrayList sMap = new TLongArrayList();
+    TLongIntHashMap sMapInv = new TLongIntHashMap();
+    TIntHashSet[] follow;
+        
+    private static int getState(long s) {
+        return (int)(s >> 32);
+    }
+    
+    private static int getSymbol(long s) {
+        return (int)s;
+    }
+    
+    private static long getS(int state, int symbol) {
+        return (((long)state) << 32) | (long)symbol;
+    }
+    
+    private void computeFollow() {
+        for(int i=0;i<itemSets.size();++i) {
+            final int source = i;
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int target) {
+                    if(symbol < 0) {
+                        long s = getS(source, ~symbol);
+                        int id = sMap.size();
+                        sMap.add(s);
+                        sMapInv.put(s, id);
+                    }
+                    return true;
+                }
+            });
+        }
+
+        // initfollow
+        follow = new TIntHashSet[sMap.size()];
+        final TIntHashSet[] gread = new TIntHashSet[follow.length];
+        final TIntHashSet[] gla = new TIntHashSet[sMap.size()];
+        for(int i=0;i<follow.length;++i) {
+            gread[i] = new TIntHashSet();
+            gla[i] = new TIntHashSet();
+        }
+        for(int i=0;i<follow.length;++i) {
+            final int id = i;
+            long s = sMap.get(i);
+            int source = getState(s);
+            int symbol = getSymbol(s);
+            final int target = transitions.get(source).get(~symbol);
+            final TIntHashSet drSet = new TIntHashSet();
+            transitions.get(target).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol2, int target2) {
+                    if(symbol2 >= 0)
+                        drSet.add(symbol2);
+                    else if(grammar.nullable[~symbol2])
+                        gread[sMapInv.get(getS(target, ~symbol2))].add(id);
+                    return true;
+                }
+            });
+            if(finalStates.contains(target))
+                drSet.add(grammar.terminalNames.length-1);
+            follow[i] = drSet;
+            
+            ItemSet set = itemSets.get(target);
+            for(Item targetItem : set.items) {
+                Prod prod = grammar.prods.get(targetItem.production);
+                if(grammar.almostAccepts(prod.rhs, targetItem.position)) {
+                    for(Item sourceItem : itemSets.get(source).items) {
+                        if(sourceItem.production == targetItem.production &&
+                                prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) {
+                            TLongHashSet visited = new TLongHashSet(); 
+                            traceBack(gla, id, visited, source, sourceItem);
+                        }
+                    }
+                    
+                }
+            }
+        }
+        //System.out.println("follow: " + Arrays.toString(follow));
+        //System.out.println("gread: " + Arrays.toString(gread));
+        //System.out.println("gla: " + Arrays.toString(gla));
+        AnaGrammar.gclose(follow, gread);
+        AnaGrammar.gclose(follow, gla);
+        
+        /*System.out.println("Gla:");
+        for(int i=0;i<gla.length;++i) {
+            int iState = getState(sMap.get(i));
+            int iSymbol = getSymbol(sMap.get(i));
+            for(int j : gla[i].toArray()) {
+                int jState = getState(sMap.get(j));
+                int jSymbol = getSymbol(sMap.get(j));
+                System.out.println("-- from --");
+                System.out.println(itemSets.get(iState).toString(grammar));
+                System.out.println("    symbol: " + grammar.nonterminalNames[iSymbol]);
+                System.out.println("-- to --");
+                System.out.println(itemSets.get(jState).toString(grammar));
+                System.out.println("    symbol: " + grammar.nonterminalNames[jSymbol]);
+            }
+        }*/
+        
+        /*for(int i=0;i<follow.length;++i) {
+            long s = sMap.get(i);
+            int source = getState(s);
+            int symbol = getSymbol(s);
+            System.out.println("------------------------------");
+            System.out.println(itemSets.get(source).toString(grammar));
+            System.out.println("Symbol: " + grammar.nonterminalNames[symbol]);
+            System.out.print("Follow:");
+            for(int sym : follow[i].toArray())
+                System.out.print(" " + grammar.terminalNames[sym]);
+            System.out.println();
+        }*/
+    }
+    
+    private void traceBack(TIntHashSet[] gla, int initialId, TLongHashSet visited, int state, Item item) {
+        if(visited.add( (((long)state)<<32) | (long)item.position )) {
+            Prod prod = grammar.prods.get(item.production);
+            if(item.stackPos == -1) {
+                int id = sMapInv.get(getS(state, prod.lhs));
+                gla[id].add(initialId);
+            }
+            
+            int backTransSymbol = backTransSymbols.get(state);
+            for(int prevState : backLinks.get(state).toArray())
+                for(Item prevItem : itemSets.get(prevState).items)
+                    if(prevItem.production == item.production &&
+                            prod.rhs.getTransition(prevItem.position, backTransSymbol) == item.position)
+                        traceBack(gla, initialId, visited, prevState, prevItem);
+        }
+    }
+    
+    private void lookback( TLongHashSet visited, TIntHashSet la, int production, int state, int position) {
+        if(visited.add( (((long)state)<<32) | (long)position )) {
+            int backTransSymbol = backTransSymbols.get(state);
+            Prod prod = grammar.prods.get(production);
+            boolean mightBeInitial = prod.rhs.getTransition(prod.rhs.getInitialState(), backTransSymbol) == position;
+            for(int prevState : backLinks.get(state).toArray()) {
+                for(Item item : itemSets.get(prevState).items) {
+                    if(item.production == production &&
+                            prod.rhs.getTransition(item.position, backTransSymbol) == position)
+                        lookback(visited, la, production, prevState, item.position);
+                    if(mightBeInitial && grammar.prods.get(item.production).rhs.getTransition(item.position, ~prod.lhs) >= 0) {
+                        int id = sMapInv.get(getS(prevState, prod.lhs));
+                        la.addAll(follow[id]);
+                    }
+                }
+            }
+        }
+    }
+
+    private void createReduceActions() {
+        computeFollow();
+        for(int i=0;i<itemSets.size();++i) {
+            TIntIntHashMap trans = transitions.get(i);
+            if(finalStates.contains(i))
+                trans.put(grammar.terminalNames.length-1, ACCEPT_ACTION);
+            
+            TIntObjectHashMap<TIntHashSet> laMap = new TIntObjectHashMap<TIntHashSet>();
+            TIntIntHashMap stackPosMap = new TIntIntHashMap(); 
+            for(Item item : itemSets.get(i).items) {
+                Prod prod = grammar.prods.get(item.production);
+                if(prod.rhs.getAccepts(item.position)) {
+                    TIntHashSet la = laMap.get(item.production);
+                    if(la == null) {
+                        la = new TIntHashSet();
+                        laMap.put(item.production, la);
+                    }
+                    
+                    TLongHashSet visited = new TLongHashSet();
+                    lookback(visited, la, item.production, i, item.position);
+                    
+                    if(stackPosMap.containsKey(item.production)) {
+                        stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice
+                    }
+                    else
+                        stackPosMap.put(item.production, item.stackPos);
+                }
+            }
+            
+            // Create transitions
+            for(int production : laMap.keys()) {
+                int stackPos = 0; //stackPosMap.get(production);
+                TIntHashSet la = laMap.get(production);
+                for(int symbol : la.toArray()) {
+                    if(trans.contains(symbol)) {
+                        int oldAction = trans.get(symbol);
+                        if(oldAction >= 0) {
+                            Prod prod = grammar.prods.get(production);
+                            if(prod.annotations.containsKey(symbol)) {
+                                byte v = prod.annotations.get(symbol);
+                                if(v == 1)
+                                    trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
+                            }
+                            else {
+                                System.err.println("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
+                                System.err.println(itemSets.get(i).toString(grammar));
+                            }
+                        }
+                        else {
+                            System.err.println("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
+                            System.err.println(itemSets.get(i).toString(grammar));
+                        }
+                    }
+                    else
+                        trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
+                }
+            }
+            
+            // Check stacking conflicts
+            /*trans.forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int a, int b) {
+                    if(b >= 0) {
+                        boolean kernelState = false;
+                        boolean nonkernelState = false;
+                        for(Item item : itemSets.get(b).items) {
+                            Prod prod = grammar.prods.get(item.production);
+                            if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a))
+                                nonkernelState = true;
+                            else if(item.position != prod.rhs.getInitialState())
+                                kernelState = true;
+                        }
+                        
+                        
+                        if(kernelState && nonkernelState) {
+                            System.err.println("Stacking conflict when following " + grammar.getName(a) + " to");
+                            System.err.println(itemSets.get(b).toString(grammar));
+                        }
+                    }
+                    return true;
+                }
+            });*/
+        }
+    }
+    
+    public static ParseTable build(AnaGrammar grammar) {
+        ParseTableBuilder builder = new ParseTableBuilder(grammar);
+        
+        builder.initialStates = new int[grammar.initialNonterminals.length];
+        for(int i=0;i<grammar.initialNonterminals.length;++i) {
+            ArrayList<Item> seed = new ArrayList<Item>();
+            int prodId = grammar.prods.size()-i-1;
+            seed.add(new Item(prodId, 
+                    grammar.prods.get(prodId).rhs.getInitialState(), 0));
+            builder.initialStates[i] = builder.getState(REDUCE_MASK, seed);
+        }
+        
+        builder.createReduceActions();
+        
+        System.out.println("States: " + builder.itemSets.size());
+        
+        //builder.visualize();
+        
+        builder.printParseTable();
+        return builder.getParseTable();
+    }
+
+    private ParseTable getParseTable() {
+        int[] productionInfo = new int[grammar.prods.size()];
+        for(int i=0;i<productionInfo.length;++i) {
+            Prod prod = grammar.prods.get(i);
+            productionInfo[i] = prod.lhs;
+        }
+        
+        int[][] actionTable = new int[transitions.size()][];
+        int[][] gotoTable = new int[transitions.size()][];
+        for(int i=0;i<transitions.size();++i) {
+            final int[] actions = new int[grammar.terminalNames.length]; 
+            Arrays.fill(actions, ERROR_ACTION);
+            actionTable[i] = actions;
+            final int[] gotos = new int[grammar.nonterminalNames.length];
+            Arrays.fill(gotos, ERROR_ACTION);
+            gotoTable[i] = gotos;
+            final TIntIntHashMap stackOpMap = stackOps.get(i); 
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int a, int b) {
+                    int action = b | stackOpMap.get(a);
+                    if(a >= 0)
+                        actions[a] = action;
+                    else
+                        gotos[~a] = action;
+                    return true;
+                }
+            });
+        }
+        
+        String[] stateDescriptions = new String[itemSets.size()];
+        for(int i=0;i<stateDescriptions.length;++i) {
+            final StringBuilder b = new StringBuilder();
+            b.append(itemSets.get(i).toString(grammar));
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int action) {
+                    if(symbol >= 0) {
+                        b.append("\n    ").append(grammar.terminalNames[symbol]).append(" ->");
+                        if((action & REDUCE_MASK) == 0) {
+                            if((action & POP_MASK) != 0)
+                                b.append(" POP");
+                            if((action & PUSH_MASK) != 0)
+                                b.append(" PUSH");
+                            b.append(" SHIFT(").append(action&STATE_MASK).append(")");
+                        }
+                        else {
+                            if(action == 0xfffffffe)
+                                b.append(" ACCEPT");
+                            else
+                                b.append(" REDUCE(").append(action&STATE_MASK).append(")");
+                        }
+                    }
+                    else {
+                        b.append("\n    ").append(grammar.nonterminalNames[~symbol]).append(" ->")
+                         .append(" GOTO(").append(action).append(")");
+                    }
+                    return true;
+                }
+            });
+            stateDescriptions[i] = b.toString();
+        }
+        
+        //printParseTable();
+        return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo,
+                initialStates, stateDescriptions);
+    }
+
+    private void printParseTable() {
+        final ItemSet[] stateSets = new ItemSet[states.size()];
+        states.forEachEntry(new TObjectIntProcedure<ItemSet>() {
+            @Override
+            public boolean execute(ItemSet a, int b) {
+                stateSets[b] = a;
+                return true;
+            }
+        });
+        for(int i=0;i<stateSets.length;++i) {
+            System.out.println("--- State " + i + " ---");
+            System.out.println(stateSets[i].toString(grammar));
+            final TIntIntHashMap stackOp = stackOps.get(i);
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int a, int b) {
+                    int sOp = stackOp.get(a);
+                    System.out.print(grammar.getName(a) + " -> ");
+                    if(sOp != 0) {
+                        System.out.print("[");
+                        if((sOp & PUSH_MASK) != 0) {
+                            sOp ^= PUSH_MASK;
+                            System.out.print("PUSH ");
+                        }
+                        if((sOp & POP_MASK) != 0) {
+                            sOp ^= POP_MASK;
+                            System.out.print("POP ");
+                        }
+                        if(sOp != 0)
+                            System.out.print(sOp);
+                        System.out.print("] ");
+                    }
+                    if((b & REDUCE_MASK) != 0) {
+                        b ^= REDUCE_MASK;
+                        System.out.println("reduce " + b); // grammar.prods.get(~b).toString(grammar));
+                    }
+                    else {
+                        System.out.println("shift " + b);
+                    }
+                    return true;
+                }
+            });
+        }
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Grammar.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Grammar.java
new file mode 100644 (file)
index 0000000..e80858c
--- /dev/null
@@ -0,0 +1,46 @@
+package org.simantics.scl.compiler.parser.grammar;
+
+import org.simantics.scl.compiler.parser.regexp.Namer;
+
+public class Grammar implements Namer {
+    public final Production[] productions;
+    public final String[] terminalNames;
+    public final String[] nonterminalNames;
+    public final int[] initialNonterminals;
+    
+    public Grammar(Production[] productions, String[] terminalNames,
+            String[] nonterminalNames, int[] initialNonterminals) {
+        this.productions = productions;
+        this.terminalNames = terminalNames;
+        this.nonterminalNames = nonterminalNames;
+        this.initialNonterminals = initialNonterminals;
+    }
+
+    public String getName(int symbolId) {
+        if(symbolId >= 0)
+            return terminalNames[symbolId];
+        else
+            return nonterminalNames[~symbolId];
+    }
+    
+    @Override
+    public String toString() {
+        StringBuilder b = new StringBuilder();
+        for(Production prod : productions) {
+            b.append(prod.toString(this));
+            b.append("\n");
+        }
+        /*for(int i=0;i<terminalNames.length;++i)
+            b.append(terminalNames[i]).append(' ').append(i).append('\n');*/
+        return b.toString();
+    }
+
+    public void check() {
+        int[] prodCount = new int[nonterminalNames.length];
+        for(Production prod : productions)
+            ++prodCount[~prod.lhs];
+        for(int i=0;i<nonterminalNames.length;++i)
+            if(prodCount[i]==0)
+                System.err.println("Nonterminal " + nonterminalNames[i] + " does not have productions.");
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Production.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/Production.java
new file mode 100644 (file)
index 0000000..c8429ad
--- /dev/null
@@ -0,0 +1,28 @@
+package org.simantics.scl.compiler.parser.grammar;
+
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+import gnu.trove.map.hash.TIntByteHashMap;
+
+public class Production {
+    public final String name;
+    public int lhs;
+    public final Regexp rhs;
+    public final TIntByteHashMap annotations;
+    
+    public Production(String name, int lhs, Regexp rhs,
+            TIntByteHashMap annotations) {
+        this.name = name;
+        this.lhs = lhs;
+        this.rhs = rhs;
+        this.annotations = annotations;
+    }
+
+    public String toString(Grammar grammar) {
+        StringBuilder b = new StringBuilder();
+        b.append(grammar.getName(lhs));
+        b.append(" = ");
+        rhs.toString(b, grammar, 0);
+        return b.toString();
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GenerateParserParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GenerateParserParser.java
new file mode 100644 (file)
index 0000000..0f4d16e
--- /dev/null
@@ -0,0 +1,19 @@
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import java.io.File;
+
+import org.simantics.scl.compiler.parser.generator.ParserGenerator;
+
+public class GenerateParserParser {
+
+    public static void main(String[] args) throws Exception {
+        File plugin = new File(GenerateParserParser.class.getResource(".").getPath().replace("%20", " "))
+        .getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile().getParentFile();
+        System.out.println(plugin);
+        ParserGenerator.createParser(
+                "org.simantics.scl.copiler.parser.grammar.input",
+                "RuntimeException",
+                new File(plugin, "src/org/simantics/scl/compiler/parser/grammar/input/Grammar.grammar")); 
+    }
+    
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/Grammar.grammar b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/Grammar.grammar
new file mode 100644 (file)
index 0000000..1d1715f
--- /dev/null
@@ -0,0 +1,23 @@
+initial file ;
+
+file
+    = declaration+                                    # File
+    ;
+
+declaration
+    = NONTERMINAL EQUALS prod (BAR prod)* SEMICOLON   # Production
+    | INITIAL NONTERMINAL SEMICOLON                   # Initial
+    ;
+
+prod
+    = regexps HASH TERMINAL (COMMA (SHIFT|REDUCE) TERMINAL)* # ProductionRhs
+    ;
+
+regexps 
+    = (regexp (regexp | STAR | PLUS | OPTIONAL)*)?         # Concatenation 
+    ;
+
+regexp
+    = (TERMINAL | NONTERMINAL | SHIFT | REDUCE | INITIAL)  # Terminal
+    | LPAREN regexps (BAR regexps)* RPAREN                 # Union
+    ;
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.flex b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.flex
new file mode 100644 (file)
index 0000000..3d1bd8a
--- /dev/null
@@ -0,0 +1,56 @@
+package org.simantics.parser.grammar.input2;
+
+import org.simantics.parser.grammar.input2.Token;
+
+%%
+
+%public
+%char
+%unicode
+%class GrammarLexer
+%function nextToken
+%type Token
+%yylexthrow RuntimeException
+%scanerror RuntimeException
+%eofval{
+    return sym(GrammarTerminals.EOF);
+%eofval}
+
+%{  
+    private Token sym(int id) {
+        return new Token(id, yychar, yychar+yylength(), yytext());
+    }
+%}
+
+
+idChar         = [a-zA-Z0-9_]
+nonterminal    = [a-z]{idChar}*
+terminal       = [A-Z]{idChar}*
+
+whitespace      = [ \n\r\t]+
+c_comment       = "//" [^\n\r]*
+cpp_comment     = "/*" ~"*/"
+
+%%
+
+<YYINITIAL> {
+  {c_comment}     { }
+  {cpp_comment}   { }
+  "="             { return sym(GrammarTerminals.EQUALS); }
+  "|"             { return sym(GrammarTerminals.BAR); }
+  ";"             { return sym(GrammarTerminals.SEMICOLON); }
+  "#"             { return sym(GrammarTerminals.HASH); }
+  ","             { return sym(GrammarTerminals.COMMA); }
+  "("             { return sym(GrammarTerminals.LPAREN); }
+  ")"             { return sym(GrammarTerminals.RPAREN); }
+  "*"             { return sym(GrammarTerminals.STAR); }
+  "+"             { return sym(GrammarTerminals.PLUS); }
+  "?"             { return sym(GrammarTerminals.OPTIONAL); }
+  initial         { return sym(GrammarTerminals.INITIAL); }
+  shift           { return sym(GrammarTerminals.SHIFT); }
+  reduce          { return sym(GrammarTerminals.REDUCE); }
+  {terminal}      { return sym(GrammarTerminals.TERMINAL); }
+  {nonterminal}   { return sym(GrammarTerminals.NONTERMINAL); }
+  {whitespace}    { }
+  .               { throw new RuntimeException("Illegal character '" + yytext() + "'."); }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarLexer.java
new file mode 100644 (file)
index 0000000..ce4092a
--- /dev/null
@@ -0,0 +1,653 @@
+/* The following code was generated by JFlex 1.6.1 */
+
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import org.simantics.scl.compiler.parser.grammar.input.Token;
+
+
+/**
+ * This class is a scanner generated by 
+ * <a href="http://www.jflex.de/">JFlex</a> 1.6.1
+ * from the specification file <tt>C:/Users/hannu/git/scl/org.simantics.parser/src/org/simantics/parser/grammar/input2/GrammarLexer.flex</tt>
+ */
+public class GrammarLexer {
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+  private static final int ZZ_BUFFERSIZE = 16384;
+
+  /** lexical states */
+  public static final int YYINITIAL = 0;
+
+  /**
+   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+   * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+   *                  at the beginning of a line
+   * l is of the form l = 2*k, k a non negative integer
+   */
+  private static final int ZZ_LEXSTATE[] = { 
+     0, 0
+  };
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final String ZZ_CMAP_PACKED = 
+    "\11\0\1\4\1\6\1\36\1\36\1\6\22\0\1\4\2\0\1\13"+
+    "\4\0\1\15\1\16\1\7\1\17\1\14\2\0\1\5\12\1\1\0"+
+    "\1\12\1\0\1\10\1\0\1\20\1\0\32\3\4\0\1\1\1\0"+
+    "\1\24\1\2\1\35\1\33\1\32\1\30\1\2\1\27\1\21\2\2"+
+    "\1\25\1\2\1\22\3\2\1\31\1\26\1\23\1\34\5\2\1\0"+
+    "\1\11\10\0\1\36\u1fa2\0\1\36\1\36\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\udfe6\0";
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+  /** 
+   * Translates DFA states to action switch labels.
+   */
+  private static final int [] ZZ_ACTION = zzUnpackAction();
+
+  private static final String ZZ_ACTION_PACKED_0 =
+    "\1\0\1\1\1\2\1\3\1\4\1\1\1\5\1\6"+
+    "\1\7\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+
+    "\3\2\1\4\1\0\3\2\1\0\3\2\1\4\4\2"+
+    "\1\17\2\2\1\20\1\21";
+
+  private static int [] zzUnpackAction() {
+    int [] result = new int[38];
+    int offset = 0;
+    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAction(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /** 
+   * Translates a state to a row index in the transition table
+   */
+  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
+
+  private static final String ZZ_ROWMAP_PACKED_0 =
+    "\0\0\0\37\0\76\0\135\0\174\0\233\0\37\0\37"+
+    "\0\37\0\37\0\37\0\37\0\37\0\37\0\37\0\37"+
+    "\0\272\0\331\0\370\0\u0117\0\u0136\0\u0155\0\u0174\0\u0193"+
+    "\0\u01b2\0\u01d1\0\u01f0\0\u020f\0\37\0\u022e\0\u024d\0\u026c"+
+    "\0\u028b\0\76\0\u02aa\0\u02c9\0\76\0\76";
+
+  private static int [] zzUnpackRowMap() {
+    int [] result = new int[38];
+    int offset = 0;
+    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackRowMap(String packed, int offset, int [] result) {
+    int i = 0;  /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int high = packed.charAt(i++) << 16;
+      result[j++] = high | packed.charAt(i++);
+    }
+    return j;
+  }
+
+  /** 
+   * The transition table of the DFA
+   */
+  private static final int [] ZZ_TRANS = zzUnpackTrans();
+
+  private static final String ZZ_TRANS_PACKED_0 =
+    "\2\2\1\3\1\4\1\5\1\6\1\5\1\7\1\10"+
+    "\1\11\1\12\1\13\1\14\1\15\1\16\1\17\1\20"+
+    "\1\21\4\3\1\22\2\3\1\23\4\3\41\0\3\3"+
+    "\15\0\15\3\2\0\3\4\15\0\15\4\5\0\1\5"+
+    "\1\0\1\5\35\0\1\24\1\0\1\25\30\0\3\3"+
+    "\15\0\1\3\1\26\13\3\2\0\3\3\15\0\6\3"+
+    "\1\27\6\3\2\0\3\3\15\0\11\3\1\30\3\3"+
+    "\1\0\6\24\1\0\30\24\7\25\1\31\27\25\1\0"+
+    "\3\3\15\0\1\32\14\3\2\0\3\3\15\0\1\33"+
+    "\14\3\2\0\3\3\15\0\12\3\1\34\2\3\1\0"+
+    "\5\25\1\35\1\25\1\31\27\25\1\0\3\3\15\0"+
+    "\2\3\1\36\12\3\2\0\3\3\15\0\7\3\1\37"+
+    "\5\3\2\0\3\3\15\0\13\3\1\40\1\3\2\0"+
+    "\3\3\15\0\1\41\14\3\2\0\3\3\15\0\2\3"+
+    "\1\42\12\3\2\0\3\3\15\0\14\3\1\43\2\0"+
+    "\3\3\15\0\3\3\1\44\11\3\2\0\3\3\15\0"+
+    "\11\3\1\45\3\3\2\0\3\3\15\0\4\3\1\46"+
+    "\10\3\1\0";
+
+  private static int [] zzUnpackTrans() {
+    int [] result = new int[744];
+    int offset = 0;
+    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackTrans(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      value--;
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unknown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+  /**
+   * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+   */
+  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+  private static final String ZZ_ATTRIBUTE_PACKED_0 =
+    "\1\0\1\11\4\1\12\11\4\1\1\0\3\1\1\0"+
+    "\3\1\1\11\11\1";
+
+  private static int [] zzUnpackAttribute() {
+    int [] result = new int[38];
+    int offset = 0;
+    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAttribute(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true <=> the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+  /* user code: */
+    private Token sym(int id) {
+        return new Token(id, yychar, yychar+yylength(), yytext());
+    }
+
+
+  /**
+   * Creates a new scanner
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  public GrammarLexer(java.io.Reader in) {
+    this.zzReader = in;
+  }
+
+
+  /** 
+   * Unpacks the compressed character translation table.
+   *
+   * @param packed   the packed character translation table
+   * @return         the unpacked character translation table
+   */
+  private static char [] zzUnpackCMap(String packed) {
+    char [] map = new char[0x110000];
+    int i = 0;  /* index in packed string  */
+    int j = 0;  /* index in unpacked array */
+    while (i < 146) {
+      int  count = packed.charAt(i++);
+      char value = packed.charAt(i++);
+      do map[j++] = value; while (--count > 0);
+    }
+    return map;
+  }
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[zzBuffer.length*2];
+      System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+      zzBuffer = newBuffer;
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+    }
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
+        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+        }
+      }
+      /* potentially more input available */
+      return false;
+    }
+
+    /* numRead < 0 ==> end of stream */
+    return true;
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+  private void zzScanError(int errorCode) throws RuntimeException {
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+    throw new RuntimeException(message);
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+  public void yypushback(int number)  throws RuntimeException {
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  public Token nextToken() throws java.io.IOException, RuntimeException, RuntimeException {
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+    int [] zzTransL = ZZ_TRANS;
+    int [] zzRowMapL = ZZ_ROWMAP;
+    int [] zzAttrL = ZZ_ATTRIBUTE;
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+      yychar+= zzMarkedPosL-zzStartRead;
+
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+      zzState = ZZ_LEXSTATE[zzLexicalState];
+
+      // set up zzAction for empty match case:
+      int zzAttributes = zzAttrL[zzState];
+      if ( (zzAttributes & 1) == 1 ) {
+        zzAction = zzState;
+      }
+
+
+      zzForAction: {
+        while (true) {
+    
+          if (zzCurrentPosL < zzEndReadL) {
+            zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
+            zzCurrentPosL += Character.charCount(zzInput);
+          }
+          else if (zzAtEOF) {
+            zzInput = YYEOF;
+            break zzForAction;
+          }
+          else {
+            // store back cached positions
+            zzCurrentPos  = zzCurrentPosL;
+            zzMarkedPos   = zzMarkedPosL;
+            boolean eof = zzRefill();
+            // get translated positions and possibly new buffer
+            zzCurrentPosL  = zzCurrentPos;
+            zzMarkedPosL   = zzMarkedPos;
+            zzBufferL      = zzBuffer;
+            zzEndReadL     = zzEndRead;
+            if (eof) {
+              zzInput = YYEOF;
+              break zzForAction;
+            }
+            else {
+              zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
+              zzCurrentPosL += Character.charCount(zzInput);
+            }
+          }
+          int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
+          if (zzNext == -1) break zzForAction;
+          zzState = zzNext;
+
+          zzAttributes = zzAttrL[zzState];
+          if ( (zzAttributes & 1) == 1 ) {
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+            if ( (zzAttributes & 8) == 8 ) break zzForAction;
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+          {     return sym(GrammarTerminals.EOF);
+ }
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { throw new RuntimeException("Illegal character '" + yytext() + "'.");
+            }
+          case 18: break;
+          case 2: 
+            { return sym(GrammarTerminals.NONTERMINAL);
+            }
+          case 19: break;
+          case 3: 
+            { return sym(GrammarTerminals.TERMINAL);
+            }
+          case 20: break;
+          case 4: 
+            { 
+            }
+          case 21: break;
+          case 5: 
+            { return sym(GrammarTerminals.STAR);
+            }
+          case 22: break;
+          case 6: 
+            { return sym(GrammarTerminals.EQUALS);
+            }
+          case 23: break;
+          case 7: 
+            { return sym(GrammarTerminals.BAR);
+            }
+          case 24: break;
+          case 8: 
+            { return sym(GrammarTerminals.SEMICOLON);
+            }
+          case 25: break;
+          case 9: 
+            { return sym(GrammarTerminals.HASH);
+            }
+          case 26: break;
+          case 10: 
+            { return sym(GrammarTerminals.COMMA);
+            }
+          case 27: break;
+          case 11: 
+            { return sym(GrammarTerminals.LPAREN);
+            }
+          case 28: break;
+          case 12: 
+            { return sym(GrammarTerminals.RPAREN);
+            }
+          case 29: break;
+          case 13: 
+            { return sym(GrammarTerminals.PLUS);
+            }
+          case 30: break;
+          case 14: 
+            { return sym(GrammarTerminals.OPTIONAL);
+            }
+          case 31: break;
+          case 15: 
+            { return sym(GrammarTerminals.SHIFT);
+            }
+          case 32: break;
+          case 16: 
+            { return sym(GrammarTerminals.REDUCE);
+            }
+          case 33: break;
+          case 17: 
+            { return sym(GrammarTerminals.INITIAL);
+            }
+          case 34: break;
+          default:
+            zzScanError(ZZ_NO_MATCH);
+        }
+      }
+    }
+  }
+
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.dat b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.dat
new file mode 100644 (file)
index 0000000..f1c4e8a
Binary files /dev/null and b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.dat differ
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParser.java
new file mode 100644 (file)
index 0000000..d8526ca
--- /dev/null
@@ -0,0 +1,350 @@
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+
+public abstract class GrammarParser {    
+    public static final boolean TRACE = true;
+
+    private static final int INITIAL_CAPACITY = 16;
+    private static final int STATE_COUNT = 19;
+    private static final int TERMINAL_COUNT = 16;
+    private static final int NONTERMINAL_COUNT = 6;
+    private static final int PRODUCT_COUNT = 8;
+    
+    private static final int[] ACTION_ROW_ID = new int[STATE_COUNT];
+    private static final int[] ACTION_COLUMN_ID = new int[TERMINAL_COUNT];
+    private static final short[] ACTION_TABLE = new short[56];
+    private static final int[] ERROR_TABLE = new int[10];
+    private static final int[] GOTO_ROW_ID = new int[STATE_COUNT];
+    private static final int[] GOTO_COLUMN_ID = new int[NONTERMINAL_COUNT];
+    private static final short[] GOTO_TABLE = new short[12];
+    private static final int[] PRODUCT_LHS = new int[PRODUCT_COUNT];
+
+    private static final short STATE_MASK = (short)0x0fff;
+    private static final short REDUCE_MASK = (short)0x8000;
+    private static final short POP_MASK = (short)0x4000;
+    private static final short PUSH_MASK = (short)0x2000;
+    private static final short ERROR_ACTION = (short)0xffff;
+    private static final short ACCEPT_ACTION = (short)0xfffe;
+    
+    public static final String[] TERMINAL_NAMES = new String[] {
+        "NONTERMINAL",
+        "EQUALS",
+        "BAR",
+        "SEMICOLON",
+        "INITIAL",
+        "HASH",
+        "TERMINAL",
+        "COMMA",
+        "SHIFT",
+        "REDUCE",
+        "STAR",
+        "PLUS",
+        "OPTIONAL",
+        "LPAREN",
+        "RPAREN",
+        "EOF"
+    };
+
+    public static final String[] NONTERMINAL_NAMES = new String[] {
+        "file",
+        "declaration",
+        "prod",
+        "regexps",
+        "regexp",
+        "init$1"
+    };
+        
+    static {
+        try {
+            DataInputStream input = new DataInputStream(GrammarParser.class.getResourceAsStream("GrammarParser.dat"));
+            for(int i=0;i<ACTION_ROW_ID.length;++i)
+                ACTION_ROW_ID[i] = input.readInt();
+            for(int i=0;i<ACTION_COLUMN_ID.length;++i)
+                ACTION_COLUMN_ID[i] = input.readInt();    
+            for(int i=0;i<ACTION_TABLE.length;++i)
+                ACTION_TABLE[i] = input.readShort();
+            for(int i=0;i<ERROR_TABLE.length;++i)
+                ERROR_TABLE[i] = input.readInt();
+            for(int i=0;i<GOTO_ROW_ID.length;++i)
+                GOTO_ROW_ID[i] = input.readInt();
+            for(int i=0;i<GOTO_COLUMN_ID.length;++i)
+                GOTO_COLUMN_ID[i] = input.readInt();    
+            for(int i=0;i<GOTO_TABLE.length;++i)
+                GOTO_TABLE[i] = input.readShort();
+            for(int i=0;i<PRODUCT_LHS.length;++i)
+                PRODUCT_LHS[i] = input.readInt();
+            input.close();
+        } catch(IOException e) {
+            e.printStackTrace();
+        }
+    }
+    
+    private static short getAction(int state, int symbol) {
+        int id = TERMINAL_COUNT*state + symbol;
+        if( ((ERROR_TABLE[id>>5] >> (id&31))&1) != 0 )
+            return ERROR_ACTION;
+        return ACTION_TABLE[ACTION_ROW_ID[state] + ACTION_COLUMN_ID[symbol]];
+    }
+    
+    private static short getGoto(int state, int symbol) {
+        return GOTO_TABLE[GOTO_ROW_ID[state] + GOTO_COLUMN_ID[symbol]];
+    }
+    
+    protected abstract Token nextToken();
+    
+    private Object[] symbolStack = new Object[INITIAL_CAPACITY];
+    private int symbolStackLength = 0;
+    
+    private int[] stateStack = new int[INITIAL_CAPACITY];
+    private int[] symbolStackPositionStack = new int[INITIAL_CAPACITY];
+    private int stateStackLength = 0;
+    
+    // For reduce
+    private int reductionLength;
+    
+    protected int length() {
+        return reductionLength;
+    }
+    
+    protected Object get(int i) {
+        if(i < 0 || i >= reductionLength)
+            throw new IndexOutOfBoundsException();
+        return symbolStack[symbolStackLength+i];
+    }
+    
+    private String parseErrorDescription(int state, Token token, int tokenId) {
+        StringBuilder b = new StringBuilder();
+        b.append("Unexpected token '").append(token)
+         .append("' (").append(TERMINAL_NAMES[tokenId])
+         .append("). Expected one of ");
+        ArrayList<String> possibleTerminals = new ArrayList<String>();
+        for(int i=0;i<TERMINAL_COUNT;++i)
+            if(getAction(state, i) != ERROR_ACTION)
+                possibleTerminals.add(TERMINAL_NAMES[i]);
+        Collections.sort(possibleTerminals);
+        for(int i=0;i<possibleTerminals.size();++i) {
+            if(i > 0)
+                b.append(", ");
+            b.append(possibleTerminals.get(i));
+        }
+        b.append('.');
+        return b.toString();
+    }
+    
+    protected abstract RuntimeException syntaxError(Token token, String description);
+    
+    private static String describeAction(boolean isGoto, int action) {
+        if(action == ERROR_ACTION)
+            return "ERROR";
+        if(action == ACCEPT_ACTION)
+            return "ACCEPT";
+        StringBuilder b = new StringBuilder();
+        if(isGoto)
+            b.append("GOTO ");
+        else {
+            if((action & REDUCE_MASK) != 0) {
+                action ^= REDUCE_MASK;
+                b.append("REDUCE");
+            }
+            else
+                b.append("SHIFT");
+        }
+        if((action & POP_MASK) != 0) {
+            action ^= POP_MASK;
+            b.append(" POP");
+        }
+        if((action & PUSH_MASK) != 0) {
+            action ^= PUSH_MASK;
+            b.append(" PUSH");
+        }
+        b.append(' ').append(action);
+        return b.toString();
+    }
+    
+    private void printState(int state) {
+        System.out.print("state=" + state + ":");
+        for(int i=symbolStackLength-1,j=stateStackLength-1;i>=0;--i) {
+            Object s = symbolStack[i];
+            if(s instanceof Token)
+                System.out.print(" " + TERMINAL_NAMES[((Token)s).id]);
+            else if(s == null)
+                System.out.print(" null");
+            else
+                System.out.print(" " + s.getClass().getSimpleName());
+            while(j>=0 && symbolStackPositionStack[j]==i)
+                System.out.print(" (" + stateStack[j--] + ")");
+        }
+        System.out.println();
+    }
+    
+    private Object parse(int state) {
+        while(true) {
+            Token token = nextToken();
+            int tokenId = token.id;
+            if(TRACE)
+                System.out.println("---> token " + TERMINAL_NAMES[tokenId] + " \"" + token.text + "\" <---");
+            while(true) {
+                if(TRACE)
+                    printState(state);
+                short action = getAction(state, tokenId);
+                if(TRACE)
+                    System.out.println("    -> action=" + describeAction(false, action));
+                //System.out.println(STATE_DESCRIPTIONS[state]);
+                if((action & REDUCE_MASK) != 0) {
+                    if(action == ACCEPT_ACTION)
+                        return symbolStack[symbolStackLength-1];
+                    if(action == ERROR_ACTION)
+                        throw syntaxError(token, parseErrorDescription(state, token, tokenId));
+                    int popAmount = (action >>> 13)&3;
+                    if(TRACE) {
+                        if(popAmount > 0)
+                            System.out.println("    POP " + popAmount);
+                    }
+                    stateStackLength -= popAmount;
+                    action &= STATE_MASK;
+                    
+                    int reductionBegin = symbolStackPositionStack[--stateStackLength];
+                    
+                    reductionLength = symbolStackLength-reductionBegin;
+                    symbolStackLength = reductionBegin;
+                    
+                    if(symbolStackLength == symbolStack.length)
+                        symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+                    Object symbol = reduce(action);
+                    postReduce(symbol);
+                    symbolStack[symbolStackLength] = symbol;
+                    
+                    state = stateStack[stateStackLength];
+                    if(TRACE) {
+                        ++symbolStackLength;
+                        printState(state);
+                        --symbolStackLength;
+                        System.out.println("    nonterminal=" + NONTERMINAL_NAMES[PRODUCT_LHS[action]]);
+                    }
+                    action = getGoto(state, PRODUCT_LHS[action]);
+                    if(TRACE)
+                        System.out.println("    -> action=" + describeAction(true, action));
+                        
+                    // Pop state
+                    if((action & POP_MASK) != 0) {
+                        --stateStackLength;
+                    }
+                    // Push state
+                    if((action & PUSH_MASK) != 0) {
+                        if(stateStackLength == stateStack.length) {
+                            stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+                            symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+                        }
+                        symbolStackPositionStack[stateStackLength] = symbolStackLength;
+                        stateStack[stateStackLength++] = state;
+                    }
+                    state = action & STATE_MASK;
+                    ++symbolStackLength;
+                }
+                else {
+                    // Pop state
+                    if((action & POP_MASK) != 0) {
+                        --stateStackLength;
+                    }
+                    // Push state
+                    if((action & PUSH_MASK) != 0) {
+                        if(stateStackLength == stateStack.length) {
+                            stateStack = Arrays.copyOf(stateStack, stateStackLength*2);
+                            symbolStackPositionStack = Arrays.copyOf(symbolStackPositionStack, stateStackLength*2);
+                        }
+                        symbolStackPositionStack[stateStackLength] = symbolStackLength;
+                        stateStack[stateStackLength++] = state;
+                    }
+                    
+                    // New state
+                    state = action & STATE_MASK;
+                    
+                    // Push symbol
+                    if(symbolStackLength == symbolStack.length)
+                        symbolStack = Arrays.copyOf(symbolStack, symbolStackLength*2);
+                    symbolStack[symbolStackLength++] = token;
+                    break;
+                }
+            }
+        }
+    }
+    
+    public Object parseFile() {
+        return parse(0);
+    }
+
+
+    protected Object reduce(int productionId) {
+        try {
+        switch(productionId) {
+        case 0:
+            return reduceFile();
+        case 1:
+            return reduceProduction();
+        case 2:
+            return reduceInitial();
+        case 3:
+            return reduceProductionRhs();
+        case 4:
+            return reduceConcatenation();
+        case 5:
+            return reduceTerminal();
+        case 6:
+            return reduceUnion();
+
+        default:
+            throw new RuntimeException("Internal parser error.");
+        }
+        } catch(RuntimeException e) {
+            StringBuilder b = new StringBuilder();
+            b.append("Failed to reduce");
+            for(int i=0;i<length();++i) {
+                Object obj = get(i);
+                b.append("\n    (").append(i).append(") \"").append(obj).append('\"');
+                if(obj instanceof Token)
+                    b.append(" (").append(TERMINAL_NAMES[((Token)obj).id]).append(")");
+                else
+                    b.append(" [").append(obj.getClass().getSimpleName()).append("]");
+            }
+            throw new RuntimeException(b.toString(), e);
+        } 
+    }
+
+    /**
+     * file ::= declaration declaration&#42;
+     */
+    protected abstract Object reduceFile();
+    /**
+     * declaration ::= NONTERMINAL EQUALS prod (BAR prod)&#42; SEMICOLON
+     */
+    protected abstract Object reduceProduction();
+    /**
+     * declaration ::= INITIAL NONTERMINAL SEMICOLON
+     */
+    protected abstract Object reduceInitial();
+    /**
+     * prod ::= regexps HASH (TERMINAL COMMA (SHIFT | REDUCE))&#42; TERMINAL
+     */
+    protected abstract Object reduceProductionRhs();
+    /**
+     * regexps ::= (regexp (regexp | STAR | PLUS | OPTIONAL)&#42;)?
+     */
+    protected abstract Object reduceConcatenation();
+    /**
+     * regexp ::= NONTERMINAL | INITIAL | TERMINAL | SHIFT | REDUCE
+     */
+    protected abstract Object reduceTerminal();
+    /**
+     * regexp ::= LPAREN regexps (BAR regexps)&#42; RPAREN
+     */
+    protected abstract Object reduceUnion();
+
+    protected void postReduce(Object reduced) {
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParserImpl.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParserImpl.java
new file mode 100644 (file)
index 0000000..a5d3b8d
--- /dev/null
@@ -0,0 +1,152 @@
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.Production;
+import org.simantics.scl.compiler.parser.regexp.RAtom;
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntByteHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+
+public class GrammarParserImpl extends GrammarParser {
+    private final GrammarLexer lexer;
+    
+    ArrayList<String> terminals = new ArrayList<String>();
+    ArrayList<String> nonterminals = new ArrayList<String>();
+    TObjectIntHashMap<String> symbols = new TObjectIntHashMap<String>();
+    ArrayList<Production> productions = new ArrayList<Production>();
+    TIntArrayList initials = new TIntArrayList();
+
+    public GrammarParserImpl(Reader reader) {
+        lexer = new GrammarLexer(reader);
+    }
+    
+    private int getId(String symbol) {
+        if(symbols.contains(symbol))
+            return symbols.get(symbol);
+        int id;
+        if(Character.isUpperCase(symbol.charAt(0))) {
+            id = terminals.size();
+            terminals.add(symbol);
+        }
+        else {
+            id = ~nonterminals.size();
+            nonterminals.add(symbol);
+        }
+        symbols.put(symbol, id);
+        return id;
+    }
+    
+    @Override
+    protected Token nextToken() {
+        try {
+            Token token = lexer.nextToken();
+            return token;
+        } catch(Exception e) {
+            if(e instanceof RuntimeException)
+                throw (RuntimeException)e;
+            else
+                throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    protected RuntimeException syntaxError(Token token, String description) {
+        return new RuntimeException(description);
+    }
+
+    @Override
+    protected Object reduceFile() {
+        return null;
+    }
+
+    @Override
+    protected Object reduceProduction() {
+        int lhs = getId(((Token)get(0)).text);
+        for(int i=2;i<length();i+=2) {
+            Production prod = (Production)get(i);
+            prod.lhs = lhs;
+            productions.add(prod);
+        }
+        return null;
+    }
+
+    @Override
+    protected Object reduceInitial() {
+        initials.add(getId(((Token)get(1)).text));
+        return null;
+    }
+    
+    @Override
+    protected Object reduceTerminal() {
+        return new RAtom(getId(((Token)get(0)).text));
+    }
+
+    private static Regexp postOp(Regexp regexp, Token op) {
+        switch(op.id) {
+        case GrammarTerminals.STAR: return Regexp.star(regexp);
+        case GrammarTerminals.PLUS: return Regexp.plus(regexp);
+        case GrammarTerminals.OPTIONAL: return Regexp.optional(regexp);
+        default: throw new IllegalStateException();
+        }
+    }
+
+    @Override
+    protected Object reduceConcatenation() {
+        ArrayList<Regexp> regexps = new ArrayList<Regexp>(length());
+        for(int i=0;i<length();++i) {
+            Object obj = get(i);
+            if(obj instanceof Regexp)
+                regexps.add((Regexp)obj);
+            else {
+                Token token = (Token)obj;
+                Regexp regexp = regexps.remove(regexps.size()-1);
+                regexps.add(postOp(regexp, token));
+            }
+        }
+        return Regexp.seq(regexps.toArray(new Regexp[regexps.size()]));
+    }
+    
+    @Override
+    protected Object reduceUnion() {
+        Regexp[] regexps = new Regexp[length()/2];
+        for(int i=1;i<length();i+=2)
+            regexps[i/2] = (Regexp)get(i);
+        return Regexp.or(regexps);
+    }
+    
+    @Override
+    protected Object reduceProductionRhs() {
+        Regexp rhs = (Regexp)get(0);
+        String name = ((Token)get(2)).text;
+        TIntByteHashMap annotations = new TIntByteHashMap();
+        for(int i=4;i<length();i+=3) {
+            Token type = (Token)get(i);
+            int id = getId(((Token)get(i+1)).text);
+            annotations.put(id, (byte)(type.id == GrammarTerminals.SHIFT ? 0 : 1));
+        }
+        return new Production(name, 0, rhs, annotations);
+    }
+
+    public Grammar getGrammar() {
+        return new Grammar(
+                productions.toArray(new Production[productions.size()]),
+                terminals.toArray(new String[terminals.size()]),
+                nonterminals.toArray(new String[nonterminals.size()]),
+                initials.toArray()
+                );
+    }
+
+    public static Grammar read(InputStream inputStream) throws IOException {
+        GrammarParserImpl parser = new GrammarParserImpl(new InputStreamReader(inputStream, "UTF-8"));
+        parser.parseFile();
+        return parser.getGrammar();
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarTerminals.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarTerminals.java
new file mode 100644 (file)
index 0000000..7cab82d
--- /dev/null
@@ -0,0 +1,20 @@
+package org.simantics.scl.compiler.parser.grammar.input;
+
+public interface GrammarTerminals {
+    public static final int NONTERMINAL = 0;
+    public static final int EQUALS = 1;
+    public static final int BAR = 2;
+    public static final int SEMICOLON = 3;
+    public static final int INITIAL = 4;
+    public static final int HASH = 5;
+    public static final int TERMINAL = 6;
+    public static final int COMMA = 7;
+    public static final int SHIFT = 8;
+    public static final int REDUCE = 9;
+    public static final int STAR = 10;
+    public static final int PLUS = 11;
+    public static final int OPTIONAL = 12;
+    public static final int LPAREN = 13;
+    public static final int RPAREN = 14;
+    public static final int EOF = 15;
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/Token.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/Token.java
new file mode 100644 (file)
index 0000000..e59997c
--- /dev/null
@@ -0,0 +1,17 @@
+package org.simantics.scl.compiler.parser.grammar.input;
+public class Token {
+    public static final Token[] EMPTY_ARRAY = new Token[0];
+    
+    public final String text;
+    public final int id;
+    
+    public Token(int type, int beginPos, int endPos, String text) {
+        this.id = type;
+        this.text = text;
+    }
+    
+    @Override
+    public String toString() {
+        return text;
+    }
+}
\ No newline at end of file
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Namer.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Namer.java
new file mode 100644 (file)
index 0000000..2212815
--- /dev/null
@@ -0,0 +1,7 @@
+package org.simantics.scl.compiler.parser.regexp;
+
+public interface Namer {
+
+    String getName(int symbolId);
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RAtom.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RAtom.java
new file mode 100644 (file)
index 0000000..6faa21a
--- /dev/null
@@ -0,0 +1,55 @@
+package org.simantics.scl.compiler.parser.regexp;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class RAtom extends Regexp {
+    public final int symbolId;
+
+    public RAtom(int symbolId) {
+        this.symbolId = symbolId;
+    }
+
+    @Override
+    protected void buildAutomaton(NFA aut, int inState, int outState) {
+        aut.addTransition(inState, symbolId, outState);
+    }
+
+    @Override
+    protected void toString(StringBuilder b, int prec) {
+        b.append((char)symbolId);
+    }
+
+    @Override
+    public void toString(StringBuilder b, Namer grammar, int prec) {
+        if(symbolId == 0x80000000)
+            b.append("#");
+        else
+            b.append(grammar.getName(symbolId));
+    }
+    
+    @Override
+    protected int getTypeId() {
+        return ATOM;
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+        if(obj == this)
+            return true;
+        if(obj == null || obj.getClass() != getClass())
+            return false;
+        RAtom other = (RAtom)obj;
+        return symbolId == other.symbolId;
+    }
+    
+    @Override
+    public int hashCode() {
+        return 31*symbolId + 46;
+    }
+
+    @Override
+    public boolean isNullable() {
+        return false;
+    }
+
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/ROp.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/ROp.java
new file mode 100644 (file)
index 0000000..5668df2
--- /dev/null
@@ -0,0 +1,98 @@
+package org.simantics.scl.compiler.parser.regexp;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class ROp extends Regexp {
+    public final Regexp exp;
+    public final char op;
+    
+    ROp(Regexp exp, char op) {
+        this.exp = exp;
+        this.op = op;
+    }
+    
+    @Override
+    public Regexp simplify() {
+        Regexp exp = this.exp.simplify();
+        if(exp instanceof ROp) {
+            ROp other = (ROp)exp;
+            if(other.op == op || other.op == '*')
+                return other;
+            else
+                return new ROp(other.exp, '*');
+        }
+        else if(exp.isNullable()) {
+            if(op == '?')
+                return exp;
+            if(op == '+')
+                return new ROp(exp, '*');
+        }
+        if(exp == this.exp)
+            return this;
+        else
+            return new ROp(exp, op);
+    }
+
+    @Override
+    protected void buildAutomaton(NFA aut, int inState, int outState) {
+        switch(op) {
+        case '?':
+            exp.buildAutomaton(aut, inState, outState);
+            aut.addEpsilonTransition(inState, outState);
+            break;
+        case '*': {
+            int state = aut.newState();
+            aut.addEpsilonTransition(inState, state);
+            exp.buildAutomaton(aut, state, state);
+            aut.addEpsilonTransition(state, outState);
+        } break;
+        case '+': {
+            int state1 = aut.newState();
+            int state2 = aut.newState();
+            aut.addEpsilonTransition(inState, state1);
+            exp.buildAutomaton(aut, state1, state2);
+            aut.addEpsilonTransition(state2, state1);
+            aut.addEpsilonTransition(state2, outState);
+        } break;
+        default:
+            throw new IllegalStateException("Invalid operation '" + op + "'.");
+        }
+    }
+    
+    @Override
+    protected void toString(StringBuilder b, int prec) {
+        exp.toString(b, 3);
+        b.append(op);
+    }
+    
+    @Override
+    public void toString(StringBuilder b, Namer grammar, int prec) {
+        exp.toString(b, grammar, 3);
+        b.append(op);
+    }
+    
+    @Override
+    protected int getTypeId() {
+        return OP;
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+        if(obj == this)
+            return true;
+        if(obj == null || obj.getClass() != getClass())
+            return false;
+        ROp other = (ROp)obj;
+        return op == other.op && exp.equals(other.exp);
+    }
+    
+    @Override
+    public int hashCode() {
+        return 31*exp.hashCode() + op;
+    }
+
+    @Override
+    public boolean isNullable() {
+        return op != '+' || exp.isNullable();
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/ROr.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/ROr.java
new file mode 100644 (file)
index 0000000..e46131e
--- /dev/null
@@ -0,0 +1,188 @@
+package org.simantics.scl.compiler.parser.regexp;
+
+import gnu.trove.set.hash.THashSet;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class ROr extends Regexp {
+    public final Regexp[] exps;
+
+    ROr(Regexp[] exps) {
+        this.exps = exps;
+    }
+
+    @Override
+    protected void buildAutomaton(NFA aut, int inState, int outState) {
+        for(Regexp exp : exps)
+            exp.buildAutomaton(aut, inState, outState);
+    }
+        
+    @Override
+    protected void toString(StringBuilder b, int prec) {
+        if(prec >= 1)
+            b.append('(');
+        boolean first = true;
+        for(Regexp exp : exps) {
+            if(first)
+                first = false;
+            else
+                b.append(" | ");
+            exp.toString(b, 1);
+        }
+        if(prec >= 1)
+            b.append(')');
+    }
+    
+    @Override
+    public void toString(StringBuilder b, Namer grammar, int prec) {
+        if(prec >= 1)
+            b.append('(');
+        boolean first = true;
+        for(Regexp exp : exps) {
+            if(first)
+                first = false;
+            else
+                b.append(" | ");
+            exp.toString(b, grammar, 1);
+        }
+        if(prec >= 1)
+            b.append(')');
+    }
+    
+    @Override
+    protected int getTypeId() {
+        return OR;
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+        if(obj == this)
+            return true;
+        if(obj == null || obj.getClass() != getClass())
+            return false;
+        ROr other = (ROr)obj;
+        return Arrays.equals(exps, other.exps);
+    }
+    
+    @Override
+    public int hashCode() {
+        int r = 34235;
+        for(Regexp exp : exps) {
+            r *= 31;
+            r += exp.hashCode();
+        }
+        return r;
+    }
+    
+    @Override
+    public boolean isNullable() {
+        for(Regexp exp : exps)
+            if(exp.isNullable())
+                return true;
+        return false;
+    }
+    
+    private Regexp simplify(THashSet<Regexp> set) {
+        boolean qm = set.remove(ONE);
+        
+        Regexp exp;
+        simpl: {
+            if(set.size() > 1) {
+                frontLoop: {
+                    Iterator<Regexp> it = set.iterator();
+                    Regexp common = front(it.next());
+                    while(it.hasNext()) {
+                        Regexp temp = front(it.next());
+                        if(!temp.equals(common))
+                            break frontLoop;
+                    }
+                    
+                    THashSet<Regexp> set2 = new THashSet<Regexp>();
+                    for(Regexp e : set)
+                        set2.add(removeFront(e));
+                    
+                    exp = seq(common, simplify(set2)); 
+                    break simpl;
+                }
+            
+                backLoop: {
+                    Iterator<Regexp> it = set.iterator();
+                    Regexp common = back(it.next());
+                    while(it.hasNext()) {
+                        Regexp temp = back(it.next());
+                        if(!temp.equals(common))
+                            break backLoop;
+                    }
+                    
+                    THashSet<Regexp> set2 = new THashSet<Regexp>();
+                    for(Regexp e : set)
+                        set2.add(removeBack(e));
+                    
+                    exp = seq(simplify(set2), common); 
+                    break simpl;
+                }
+            }
+        
+            exp = or(set);
+        }
+        
+        if(qm && !exp.isNullable())
+            exp = new ROp(exp, '?');
+        return exp;
+    }
+    
+    @Override
+    public Regexp simplify() {
+        if(exps.length == 0)
+            return this;
+        THashSet<Regexp> set = new THashSet<Regexp>();
+        for(Regexp exp : exps) {
+            exp = exp.simplify();
+            or(set, exp);
+        }
+        return simplify(set);
+    }
+    
+    private static Regexp front(Regexp exp) {
+        if(exp instanceof RSeq) {
+            Regexp[] exps = ((RSeq)exp).exps;
+            if(exps.length == 0)
+                return null;
+            return exps[0];
+        }
+        else
+            return exp;
+    }
+    
+    private static Regexp removeFront(Regexp exp) {
+        if(exp instanceof RSeq) {
+            Regexp[] exps = ((RSeq)exp).exps;
+            return seq_(Arrays.asList(exps).subList(1, exps.length));
+        }
+        else
+            return ONE;
+    }
+    
+    private static Regexp back(Regexp exp) {
+        if(exp instanceof RSeq) {
+            Regexp[] exps = ((RSeq)exp).exps; 
+            if(exps.length == 0)
+                return null;
+            return exps[exps.length-1];
+        }
+        else
+            return exp;
+    }
+    
+    private static Regexp removeBack(Regexp exp) {
+        if(exp instanceof RSeq) {
+            Regexp[] exps = ((RSeq)exp).exps;
+            return seq_(Arrays.asList(exps).subList(0, exps.length-1));
+        }
+        else
+            return ONE;
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RSeq.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/RSeq.java
new file mode 100644 (file)
index 0000000..354ee5b
--- /dev/null
@@ -0,0 +1,99 @@
+package org.simantics.scl.compiler.parser.regexp;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public class RSeq extends Regexp {
+    public final Regexp[] exps;
+
+    RSeq(Regexp[] exps) {
+        this.exps = exps;
+    }
+
+    @Override
+    protected void buildAutomaton(NFA aut, int inState, int outState) {
+        if(exps.length == 0) {
+            aut.addEpsilonTransition(inState, outState);
+            return;
+        }
+        for(int i=exps.length-1;i>0;--i) {
+            int midState = aut.newState();
+            exps[i].buildAutomaton(aut, midState, outState);
+            outState = midState;
+        }
+        exps[0].buildAutomaton(aut, inState, outState);
+    }
+    
+    @Override
+    protected void toString(StringBuilder b, int prec) {
+        if(prec >= 2)
+            b.append('(');
+        for(Regexp exp : exps)
+            exp.toString(b, 2);
+        if(prec >= 2)
+            b.append(')');
+    }
+    
+    @Override
+    public void toString(StringBuilder b, Namer grammar, int prec) {
+        if(prec >= 2)
+            b.append('(');
+        boolean first = true;
+        for(Regexp exp : exps) {
+            if(first)
+                first = false;
+            else
+                b.append(' ');
+            exp.toString(b, grammar, 2);
+        }
+        if(prec >= 2)
+            b.append(')');
+    }
+    
+    @Override
+    protected int getTypeId() {
+        return SEQ;
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+        if(obj == this)
+            return true;
+        if(obj == null || obj.getClass() != getClass())
+            return false;
+        RSeq other = (RSeq)obj;
+        return Arrays.equals(exps, other.exps);
+    }
+    
+    @Override
+    public int hashCode() {
+        int r = 31340123;
+        for(Regexp exp : exps) {
+            r *= 31;
+            r += exp.hashCode();
+        }
+        return r;
+    }
+
+    @Override
+    public Regexp simplify() {
+        if(exps.length == 0)
+            return this;
+        ArrayList<Regexp> l = new ArrayList<Regexp>(exps.length);
+        for(Regexp exp : this.exps) {
+            exp = exp.simplify();
+            seq(l, exp);
+        }
+        return seq_(l);
+    }
+    
+    @Override
+    public boolean isNullable() {
+        for(Regexp exp : exps)
+            if(!exp.isNullable())
+                return false;
+        return true;
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java
new file mode 100644 (file)
index 0000000..8211b6a
--- /dev/null
@@ -0,0 +1,244 @@
+package org.simantics.scl.compiler.parser.regexp;
+
+import gnu.trove.set.hash.THashSet;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.simantics.scl.compiler.parser.regexp.automata.NFA;
+
+public abstract class Regexp implements Comparable<Regexp> {
+    static final int ATOM = 0;
+    static final int OP = 1;
+    static final int OR = 2;
+    static final int SEQ = 3;
+    
+    protected abstract void buildAutomaton(NFA aut, int inState, int outState);
+    
+    public static final Regexp ONE = new RSeq(new Regexp[0]);
+    public static final Regexp ZERO = new ROr(new Regexp[0]);
+
+    Regexp() {
+    }
+    
+    public NFA toAutomaton() {
+        NFA aut = new NFA();
+        int inState = aut.newState();
+        int outState = aut.newState();
+        buildAutomaton(aut, inState, outState);
+        aut.setInitialState(inState);
+        aut.setAccepts(outState, true);
+        return aut;
+    }
+    
+    @Override
+    public String toString() {
+        StringBuilder b = new StringBuilder();
+        toString(b, 0);
+        return b.toString();
+    }
+    
+    protected abstract void toString(StringBuilder b, int prec);
+
+    protected abstract int getTypeId();
+    
+    public abstract boolean isNullable();
+    
+    public Regexp simplify() {
+        return this;
+    }
+    
+    private static int parseRegexp(int pos, String regexp, AtomicReference<Regexp> result) {
+        ArrayList<Regexp> dc = new ArrayList<Regexp>();
+        ArrayList<Regexp> cc = new ArrayList<Regexp>();
+        loop: while(true) {
+            if(pos == regexp.length())
+                break loop;
+            char c = regexp.charAt(pos++);
+            switch(c) {
+            case '(': {
+                AtomicReference<Regexp> child = new AtomicReference<Regexp>();                
+                pos = parseRegexp(pos, regexp, child);
+                cc.add(child.get());
+            } break;
+            case ')':
+                break loop;
+            case '|': {
+                dc.add(seq(cc));
+                cc.clear();
+            } break;                
+            case '*': {
+                if(cc.isEmpty())
+                    throw new IllegalArgumentException("Encountered * that is not front of anything.");
+                int p = cc.size()-1;
+                cc.set(p, Regexp.star(cc.get(p)));
+            } break;
+            case '?': {
+                if(cc.isEmpty())
+                    throw new IllegalArgumentException("Encountered ? that is not front of anything.");
+                int p = cc.size()-1;
+                cc.set(p, Regexp.optional(cc.get(p)));
+            } break;
+            case '+': {
+                if(cc.isEmpty())
+                    throw new IllegalArgumentException("Encountered + that is not front of anything.");
+                int p = cc.size()-1;
+                cc.set(p, Regexp.plus(cc.get(p)));
+            } break;
+            default:
+                cc.add(new RAtom(c));
+            }
+        }
+        dc.add(seq(cc));
+        result.set(or(dc));
+        return pos;
+    }
+    
+    public static Regexp of(String regexp) {
+        AtomicReference<Regexp> result = new AtomicReference<Regexp>(); 
+        int finalPos = parseRegexp(0, regexp, result);
+        if(finalPos < regexp.length())
+            throw new IllegalArgumentException("Extra closing parenteses");
+        return result.get();
+    }
+    
+    static void seq(ArrayList<Regexp> l, Regexp exp) {
+        if(l.size() > 0 && l.get(0) == ZERO)
+            return;
+        if(exp instanceof RSeq)
+            for(Regexp e : ((RSeq)exp).exps)
+                seq(l, e);
+        else if(exp == ZERO) {
+            l.clear();
+            l.add(exp);
+        }
+        else
+            l.add(exp);
+    }
+    
+    static Regexp seq_(List<Regexp> es) {
+        if(es.size() == 0)
+            return ONE;
+        if(es.size() == 1)
+            return es.get(0);
+        Regexp[] eArray = es.toArray(new Regexp[es.size()]);
+        return new RSeq(eArray);
+    }
+    
+    public static Regexp seq(Regexp ... exps) {
+        if(exps.length == 0)
+            return ONE;
+        if(exps.length == 1)
+            return exps[0];
+        ArrayList<Regexp> es = new ArrayList<Regexp>();
+        for(Regexp exp : exps)
+            seq(es, exp);
+        return seq_(es);
+    }
+    
+    public static Regexp seq(Collection<Regexp> exps) {
+        return seq(exps.toArray(new Regexp[exps.size()]));
+    }
+    
+    static void or(THashSet<Regexp> s, Regexp exp) {
+        if(exp instanceof ROr)
+            for(Regexp e : ((ROr)exp).exps)
+                or(s, e);
+        else
+            s.add(exp);
+    }
+    
+    static Regexp or_(THashSet<Regexp> es) {
+        if(es.size() == 0)
+            return ZERO;
+        if(es.size() == 1)
+            return es.iterator().next();
+        Regexp[] eArray = es.toArray(new Regexp[es.size()]);
+        Arrays.sort(eArray);
+        return new ROr(eArray);
+    }
+    
+    public static Regexp or(Regexp ... exps) {
+        if(exps.length == 0)
+            return ZERO;
+        if(exps.length == 1)
+            return exps[0];
+        THashSet<Regexp> es = new THashSet<Regexp>();
+        for(Regexp exp : exps)
+            or(es, exp);
+        return or_(es);
+    }
+    
+    public static Regexp star(Regexp exp) {
+        if(exp == ONE || exp == ZERO)
+            return ONE;
+        return new ROp(exp, '*');
+    }
+    
+    public static Regexp plus(Regexp exp) {
+        return new ROp(exp, '+');
+    }
+    
+    public static Regexp optional(Regexp exp) {
+        return new ROp(exp, '?');
+    }
+    
+    public static Regexp or(Collection<Regexp> exps) {
+        return or(exps.toArray(new Regexp[exps.size()]));
+    }
+    
+    @Override
+    public int compareTo(Regexp o) {
+        int tA = getTypeId();
+        int tB = o.getTypeId();
+        if(tA < tB)
+            return -1;
+        if(tA > tB)
+            return 1;
+        switch(tA) {
+        case ATOM: {
+            int sA = ((RAtom)this).symbolId;
+            int sB = ((RAtom)o).symbolId;
+            if(sA < sB)
+                return -1;
+            if(sA > sB)
+                return 1;
+            return 0;
+        }
+        case OP: {
+            ROp a = (ROp)this;
+            ROp b = (ROp)o;
+            if(a.op < b.op)
+                return -1;
+            if(a.op > b.op)
+                return 1;
+            return a.exp.compareTo(b.exp);
+        }
+        case OR:
+            return compare(((ROr)this).exps, ((ROr)o).exps);
+        case SEQ: {
+            return compare(((RSeq)this).exps, ((RSeq)o).exps);
+        }
+        default:
+            throw new IllegalArgumentException();
+        }
+    }
+    
+    private static int compare(Regexp[] a, Regexp[] b) {
+        if(a.length < b.length)
+            return -1;
+        if(a.length > b.length)
+            return 1;
+        for(int i=0;i<a.length;++i) {
+            int temp = a[i].compareTo(b[i]);
+            if(temp != 0)
+                return temp;
+        }
+        return 0;
+    }
+
+    public abstract void toString(StringBuilder b, Namer grammar, int prec);
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/TestRegexp.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/TestRegexp.java
new file mode 100644 (file)
index 0000000..ad2b166
--- /dev/null
@@ -0,0 +1,20 @@
+package org.simantics.scl.compiler.parser.regexp;
+
+import org.simantics.scl.compiler.parser.regexp.automata.DFA;
+
+
+public class TestRegexp {
+
+    public static void main(String[] args) {
+        
+        Regexp exp = Regexp.of("a*b|ac*");
+        
+        System.out.println(exp);
+        DFA aut = exp.toAutomaton().determinize().minimize();
+        System.out.println(aut.toRegexp());
+        
+        //AutomatonVisualizer.show(aut);
+        
+    }
+    
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/Automata.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/Automata.java
new file mode 100644 (file)
index 0000000..7479345
--- /dev/null
@@ -0,0 +1,15 @@
+package org.simantics.scl.compiler.parser.regexp.automata;
+
+import gnu.trove.procedure.TIntIntProcedure;
+
+
+public interface Automata {
+    int newState();    
+    int size();
+    void addTransition(int sourceId, int symbol, int targetId);
+    void forEachTransition(int source, TIntIntProcedure proc);
+    void setAccepts(int id, boolean accepts);    
+    boolean getAccepts(int id);
+    void setInitialState(int initialState);
+    int getInitialState();
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java
new file mode 100644 (file)
index 0000000..c3ac114
--- /dev/null
@@ -0,0 +1,328 @@
+package org.simantics.scl.compiler.parser.regexp.automata;
+
+import gnu.trove.impl.Constants;
+import gnu.trove.list.array.TByteArrayList;
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+
+import org.simantics.scl.compiler.parser.regexp.RAtom;
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+public class DFA implements Automata {
+
+    private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>(); 
+    private TByteArrayList accepts = new TByteArrayList();
+    private int initialState;
+    
+    public int newState() {
+        int stateId = transitions.size();
+        transitions.add(new TIntIntHashMap(
+                Constants.DEFAULT_CAPACITY,
+                Constants.DEFAULT_LOAD_FACTOR,
+                0, -1));
+        accepts.add((byte)0);
+        return stateId;
+    }
+    
+    public int size() {
+        return transitions.size();
+    }
+    
+    public DFA copy() {
+        DFA copy = new DFA();
+        for(TIntIntHashMap t : transitions)
+            copy.transitions.add(new TIntIntHashMap(t));
+        copy.accepts = new TByteArrayList(accepts);
+        copy.initialState = initialState;
+        return copy;
+    }
+
+    public void addTransition(int sourceId, int symbol, int targetId) {
+        transitions.get(sourceId).put(symbol, targetId);
+    }
+    
+    public int getTransition(int sourceId, int symbol) {
+        return transitions.get(sourceId).get(symbol);
+    }
+    
+    public void forEachTransition(int source, final TIntIntProcedure proc) {        
+        transitions.get(source).forEachEntry(proc);
+    }
+    
+    public int[] nextStates(int id) {
+        return transitions.get(id).keys();
+    }
+    
+    public void setAccepts(int id, boolean accepts) {
+        this.accepts.set(id, accepts ? (byte)1 : (byte)0);
+    }
+    
+    public boolean getAccepts(int id) {
+        return accepts.get(id)==1;
+    }
+
+    public void setInitialState(int initialState) {
+        this.initialState = initialState;
+    }
+    
+    public int getInitialState() {
+        return initialState;
+    }
+
+    public DFA minimize() {
+        // Compute relevant input characters for minimization
+        final TIntIntHashMap symbolMap = new TIntIntHashMap();
+        final int[] symbolArray;
+        {
+            final TIntArrayList l = new TIntArrayList();
+            TIntProcedure proc = new TIntProcedure() {
+                @Override
+                public boolean execute(int value) {
+                    if(!symbolMap.containsKey(value)) {
+                        symbolMap.put(value, l.size());
+                        l.add(value);
+                    }
+                    return true;
+                }
+            };
+            for(TIntIntHashMap tMap : transitions)
+                tMap.forEachKey(proc);
+            symbolArray = l.toArray();
+        }
+        int symbolCount = symbolMap.size();        
+        int stateCount = transitions.size();
+        
+        // Inverse automata
+        final TIntArrayList[][] inverse = new TIntArrayList[stateCount+1][];
+        for(int i=0;i<inverse.length;++i)
+            inverse[i] = new TIntArrayList[symbolCount];
+        for(int sourceId=0;sourceId<stateCount;++sourceId) {
+            TIntIntHashMap tMap = transitions.get(sourceId);
+            for(int j=0;j<symbolCount;++j) {
+                int targetId = tMap.get(symbolArray[j]);
+                if(targetId == -1)
+                    targetId = stateCount;
+                TIntArrayList l = inverse[targetId][j];
+                if(l == null) {
+                    l = new TIntArrayList();
+                    inverse[targetId][j] = l;   
+                }
+                l.add(sourceId);
+            }
+        }
+        
+        /*for(int i=0;i<inverse.length;++i)
+            for(int j=0;j<symbolCount;++j)
+                System.out.println(i + " " + j + " -> " + inverse[i][j]);
+        */
+        
+        // 
+        int[] ids = new int[stateCount+1];
+        final int[] memPartion = new int[stateCount+1];        
+        TIntArrayList partionBegin = new TIntArrayList();
+        TIntArrayList partionEnd = new TIntArrayList();
+        TIntArrayList stack = new TIntArrayList();
+        TIntArrayList scheduled = new TIntArrayList();
+        
+        // Initial partition
+        {
+            int min = 0;
+            int max = stateCount;
+            ids[min++] = stateCount;
+            memPartion[stateCount] = 0;
+            for(int i=0;i<stateCount;++i) {
+                if(accepts.get(i)==1) {
+                    ids[max--] = i;
+                    memPartion[i] = 1;
+                }
+                else {
+                    ids[min++] = i;
+                    memPartion[i] = 0;
+                }
+            }
+            partionBegin.add(0);
+            partionBegin.add(min);
+            partionEnd.add(min);
+            partionEnd.add(ids.length);
+            scheduled.add(0);
+            scheduled.add(0);
+            if(min < ids.length/2) {
+                stack.add(0);
+                scheduled.set(0, 1);
+            }
+            else {
+                stack.add(1);
+                scheduled.set(1, 1);
+            }
+        }
+        
+        // Refinement
+        while(!stack.isEmpty()) {
+            int partionId = stack.removeAt(stack.size()-1);
+            scheduled.set(partionId, 0);
+            int begin = partionBegin.get(partionId);
+            int end = partionEnd.get(partionId);
+            
+            for(int j=0;j<symbolCount;++j) {
+                TIntHashSet invStates = new TIntHashSet();
+                for(int i=begin;i<end;++i) {
+                    int s = ids[i];
+                    TIntArrayList inv = inverse[s][j];
+                    if(inv != null)
+                        invStates.addAll(inv);
+                }
+                int[] invStatesArray = invStates.toArray();
+                
+                TIntHashSet partions = new TIntHashSet();
+                for(int s : invStatesArray)
+                    partions.add(memPartion[s]);
+                int[] partionsArray = partions.toArray();
+                
+                for(int p : partionsArray) {
+                    int pBegin = partionBegin.get(p);
+                    int pEnd = partionEnd.get(p);
+                    boolean splits = false;
+                    for(int k=pBegin;k<pEnd;++k)
+                        if(!invStates.contains(ids[k])) {
+                            splits = true;
+                            break;
+                        }
+                    if(splits) {
+                        int p2 = partionBegin.size();
+                        int p2End = pEnd;
+                        for(int k=pBegin;k<pEnd;++k) {
+                            int s = ids[k];
+                            if(invStates.contains(s)) {
+                                memPartion[s] = p2;
+                                --pEnd;
+                                ids[k] = ids[pEnd];
+                                ids[pEnd] = s;
+                                --k;
+                            }
+                        }
+                        partionEnd.set(p, pEnd);
+                        partionBegin.add(pEnd);
+                        partionEnd.add(p2End);
+                        
+                        if(scheduled.get(p) == 1) {
+                            scheduled.add(1);
+                            stack.add(p2);
+                        }
+                        else {
+                            if(pEnd - pBegin <= p2End - pEnd) {
+                                stack.add(p);
+                                scheduled.add(0);
+                                scheduled.set(p, 1);
+                            }
+                            else {
+                                stack.add(p2);
+                                scheduled.add(1);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        
+        // Print partitions
+        /*System.out.println("Partition count: " + partionBegin.size());
+        for(int i=0;i<memPartion.length;++i)
+            System.out.println("    " + i + " in " + memPartion[i]);
+        */
+        
+        // Put reachable states into new automaton
+        final DFA aut = new DFA();
+        int failurePartion = memPartion[stateCount];
+        final int[] sArray = new int[partionBegin.size()];
+        sArray[failurePartion] = -1;
+        for(int i=0;i<partionBegin.size();++i)
+            if(i != failurePartion)
+                sArray[i] = aut.newState();
+        for(int i=0;i<partionBegin.size();++i)
+            if(i != failurePartion) {
+                final int sourceId = sArray[i];
+                int bId = ids[partionBegin.get(i)];
+                forEachTransition(bId, new TIntIntProcedure() {
+                    @Override
+                    public boolean execute(int a, int b) {
+                        aut.addTransition(sourceId, a, sArray[memPartion[b]]);
+                        return true;
+                    }
+                });
+                aut.setAccepts(sourceId, getAccepts(bId));
+            }
+        aut.setInitialState(sArray[memPartion[initialState]]);
+        return aut;
+    }
+    
+    public Regexp toRegexp(int from, byte[] to) {
+        int stateCount = size();
+        final int[] order = new int[stateCount];
+        for(int i=0;i<stateCount;++i)
+            order[i] = i;
+        order[0] = from;
+        order[from] = 0;
+        
+        final Regexp[][] a = new Regexp[stateCount][stateCount];
+        final Regexp[] b = new Regexp[stateCount];
+        for(int i=0;i<stateCount;++i) {
+            b[i] = to[order[i]]==1 ? Regexp.ONE : Regexp.ZERO;
+            for(int j=0;j<stateCount;++j)
+                a[i][j] = Regexp.ZERO;
+            final Regexp[] row = a[i];
+            forEachTransition(order[i], new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int targetId) {
+                    targetId = order[targetId];
+                    row[targetId] = Regexp.or(row[targetId], new RAtom(symbol));
+                    return true;
+                }
+            });
+        }
+        
+        for(int n=stateCount-1;n>=0;--n) {
+            Regexp ss = Regexp.star(a[n][n]);
+            b[n] = Regexp.seq(ss, b[n]);
+            for(int j=0;j<stateCount;++j)
+                a[n][j] = Regexp.seq(ss, a[n][j]);
+            for(int i=0;i<n;++i) {
+                b[i] = Regexp.or(b[i], Regexp.seq(a[i][n], b[n]));
+                for(int j=0;j<n;++j)
+                    a[i][j] = Regexp.or(a[i][j], Regexp.seq(a[i][n], a[n][j]));
+            }
+        }
+        
+        return b[0];
+    }
+
+    public Regexp toRegexp() {
+        return toRegexp(initialState, accepts.toArray()).simplify();
+    }
+    
+    public Regexp toRegexpTo(int position) {
+        int stateCount = size();
+        byte[] targetArray = new byte[stateCount];
+        targetArray[position] = 1;
+        return toRegexp(initialState, targetArray).simplify();
+    }
+    
+    public Regexp toRegexpFrom(int position) {
+        return toRegexp(position, accepts.toArray()).simplify();
+    }
+
+    public Regexp toPositionalRegexp(int position) {
+        DFA aut = copy();
+        int s = transitions.size();
+        aut.transitions.add(aut.transitions.get(position));
+        aut.transitions.set(position, new TIntIntHashMap());
+        aut.addTransition(position, 0x80000000, s);
+        aut.accepts.add(accepts.get(position));
+        aut.accepts.set(position, (byte)0);
+        return aut.toRegexp();
+    }
+}
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/NFA.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/NFA.java
new file mode 100644 (file)
index 0000000..6c07752
--- /dev/null
@@ -0,0 +1,176 @@
+package org.simantics.scl.compiler.parser.regexp.automata;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntObjectHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntObjectProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+
+public class NFA implements Automata {
+    
+    private static class State {
+        TIntObjectHashMap<TIntArrayList> transitions = new TIntObjectHashMap<TIntArrayList>();
+        TIntArrayList epsilonTransitions = new TIntArrayList();
+        boolean accepts;
+        
+        private void addTransition(int symbol, int target) {
+            TIntArrayList l = transitions.get(symbol);
+            if(l == null) {
+                l = new TIntArrayList();
+                l.add(target);
+                transitions.put(symbol, l);
+            }
+            else if(!l.contains(target))
+                l.add(target);
+        }
+        
+        private void addEpsilonTransition(int target) {
+            if(!epsilonTransitions.contains(target))
+                epsilonTransitions.add(target);
+        }
+    }
+    
+    private ArrayList<State> states = new ArrayList<State>();
+    private int initialState;
+    
+    public int size() {
+        return states.size();
+    }
+    
+    public void setInitialState(int initialState) {
+        this.initialState = initialState;
+    }
+    
+    public int getInitialState() {
+        return initialState;
+    }
+    
+    public int newState() {
+        int id = states.size();
+        states.add(new State());
+        return id;
+    }
+    
+    public void setAccepts(int id, boolean accepts) {
+        states.get(id).accepts = accepts;
+    }
+    
+    public boolean getAccepts(int id) {
+        return states.get(id).accepts;
+    }
+    
+    public void addTransition(int source, int symbol, int target) {
+        states.get(source).addTransition(symbol, target);
+    }
+    
+    public void forEachTransition(int source, final TIntIntProcedure proc) {
+        states.get(source).transitions.forEachEntry(new TIntObjectProcedure<TIntArrayList>() {
+            @Override
+            public boolean execute(int a, TIntArrayList b) {
+                for(int i=0;i<b.size();++i)
+                    if(!proc.execute(a, b.get(i)))
+                        return false;
+                return true;
+            }
+        });
+    }
+    
+    public void addEpsilonTransition(int source, int target) {
+        states.get(source).addEpsilonTransition(target);
+    }
+    
+    public void forEachEpsilonTransition(int source, final TIntProcedure proc) {
+        states.get(source).epsilonTransitions.forEach(proc);
+    }
+    
+    private TIntArrayList closure(final TIntHashSet stateSet) {
+        final TIntArrayList result = new TIntArrayList(stateSet);
+        TIntProcedure proc = new TIntProcedure() {                
+            @Override
+            public boolean execute(int value) {
+                if(stateSet.add(value))
+                    result.add(value);
+                return true;
+            }
+        };
+        for(int i=0;i<result.size();++i)
+            forEachEpsilonTransition(result.get(i), proc);                
+        result.sort();
+        return result;
+    }
+    
+    public DFA determinize() {
+        final DFA aut = new DFA();
+        
+        final ArrayList<TIntArrayList> stack = new ArrayList<TIntArrayList>(); 
+        final TObjectIntHashMap<TIntArrayList> map = new TObjectIntHashMap<TIntArrayList>();
+        {
+            TIntHashSet initialSet = new TIntHashSet(4);
+            initialSet.add(getInitialState());
+            TIntArrayList stateList = closure(initialSet);
+            
+            int stateId = aut.newState();
+            map.put(stateList, stateId);
+            aut.setInitialState(stateId);
+            
+            stack.add(stateList);
+        }
+        
+        while(!stack.isEmpty()) {
+            TIntArrayList curList = stack.remove(stack.size()-1);
+            int[] stateArray = curList.toArray();
+            final int id = map.get(curList);
+            
+            // Transitions
+            final TIntObjectHashMap<TIntHashSet> transitions =
+                    new TIntObjectHashMap<TIntHashSet>();
+            TIntIntProcedure proc = new TIntIntProcedure() {                
+                @Override
+                public boolean execute(int symbol, int b) {
+                    TIntHashSet set = transitions.get(symbol);
+                    if(set == null) {
+                        set = new TIntHashSet();
+                        transitions.put(symbol, set);
+                    }
+                    set.add(b);
+                    return true;
+                }
+            };
+            for(int s : stateArray)
+                forEachTransition(s, proc);
+            
+            // Create transition targets
+            transitions.forEachEntry(new TIntObjectProcedure<TIntHashSet>() {
+                @Override
+                public boolean execute(int symbol, TIntHashSet b) {
+                    TIntArrayList stateList = closure(b);
+                    
+                    if(map.containsKey(stateList))
+                        aut.addTransition(id, symbol, map.get(stateList));
+                    else {
+                        int stateId = aut.newState();
+                        map.put(stateList, stateId);                        
+                        stack.add(stateList);
+                        
+                        aut.addTransition(id, symbol, stateId);
+                    }                    
+                    return true;
+                }
+            });
+            
+            // Accepts
+            for(int s : stateArray)
+                if(getAccepts(s)) {
+                    aut.setAccepts(id, true);
+                    break;
+                }
+        }
+        
+        return aut;
+    }
+    
+}