]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java
Moved SCL parser generator to platform repository.
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / parser / generator / table / ParseTableBuilder.java
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java
new file mode 100644 (file)
index 0000000..da837fb
--- /dev/null
@@ -0,0 +1,515 @@
+package org.simantics.scl.compiler.parser.generator.table;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
+import org.simantics.scl.compiler.parser.generator.grammar.Prod;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.list.array.TLongArrayList;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.map.hash.TIntObjectHashMap;
+import gnu.trove.map.hash.TLongIntHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntObjectProcedure;
+import gnu.trove.procedure.TObjectIntProcedure;
+import gnu.trove.set.hash.THashSet;
+import gnu.trove.set.hash.TIntHashSet;
+import gnu.trove.set.hash.TLongHashSet;
+
+public class ParseTableBuilder {
+    public final static int MAX_STACK_ID = 10;
+    
+    private static final int STATE_MASK = 0x0fff;
+    private static final int REDUCE_MASK = 0x8000;
+    private static final int POP_MASK = 0x4000;
+    private static final int PUSH_MASK = 0x2000;
+    public static final int ERROR_ACTION = 0xffff;
+    private static final int ACCEPT_ACTION = 0xfffe;
+
+    final AnaGrammar grammar;
+    private TObjectIntHashMap<ItemSet> states = new TObjectIntHashMap<ItemSet>();
+    private ArrayList<ItemSet> itemSets = new ArrayList<ItemSet>();
+    private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
+    private ArrayList<TIntIntHashMap> stackOps = new ArrayList<TIntIntHashMap>();
+    private TIntArrayList backTransSymbols = new TIntArrayList();
+    private ArrayList<TIntArrayList> backLinks = new ArrayList<TIntArrayList>();
+    int[] initialStates;
+    TIntHashSet finalStates = new TIntHashSet(); 
+
+    private ParseTableBuilder(AnaGrammar grammar) {
+        this.grammar = grammar;
+    }
+        
+    private static boolean isNonterminal(int symbol) {
+        return symbol < 0;
+    }
+    
+    private void close(ArrayList<Item> items) {
+        THashSet<Item> itemSet = new THashSet<Item>(items);
+        for(int i=0;i<items.size();++i) {
+            Item item = items.get(i);
+            for(int nextSymbol : item.nextSymbols(grammar))
+                if(isNonterminal(nextSymbol)) {
+                    nextSymbol = ~nextSymbol;
+                    int pEnd = grammar.nonterminalPos[nextSymbol+1];
+                    for(int pId=grammar.nonterminalPos[nextSymbol];pId<pEnd;++pId) {
+                        Item newItem = new Item(pId, grammar.prods.get(pId).rhs.getInitialState(), -1);
+                        if(itemSet.add(newItem))
+                            items.add(newItem);
+                    }
+                }                
+        }
+    }
+    
+    private int getState(int backTransSymbol, ArrayList<Item> items) {
+        // Create state
+        close(items);
+        final ItemSet itemSet = new ItemSet(items);
+        if(states.contains(itemSet))
+            return states.get(itemSet);
+        final int newState = states.size();
+        states.put(itemSet, newState);
+        itemSets.add(itemSet);
+        backTransSymbols.add(backTransSymbol);
+        backLinks.add(new TIntArrayList(2));
+        
+        // Create transitions
+        TIntObjectHashMap<ArrayList<Item>> transitionMap = new TIntObjectHashMap<ArrayList<Item>>();
+        //close(items);
+        for(Item item : items) {
+            for(int s : item.nextSymbols(grammar)) {
+                ArrayList<Item> l = transitionMap.get(s);
+                if(l == null) {
+                    l = new ArrayList<Item>();
+                    transitionMap.put(s, l);
+                }
+                l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos));
+            }
+        }
+        
+        final TIntIntHashMap trans = new TIntIntHashMap();
+        final TIntIntHashMap stackOpMap = new TIntIntHashMap();
+        transitions.add(trans);
+        stackOps.add(stackOpMap);
+        if(transitionMap.remove(grammar.terminalNames.length-1)!=null) {
+            finalStates.add(newState);
+        }
+        transitionMap.forEachEntry(new TIntObjectProcedure<ArrayList<Item>>() {
+            @Override
+            public boolean execute(int a, ArrayList<Item> b) {
+                boolean stackShift = false;
+                int minStackPos = Integer.MAX_VALUE;
+                for(Item item : b) {
+                    if(item.stackPos == -1)
+                        stackShift = true;
+                    else
+                        minStackPos = Math.min(minStackPos, item.stackPos);
+                }
+                int stackOp = 0;
+                if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) {
+                    stackOp |= POP_MASK;
+                    //System.out.println("minStackPos = " + minStackPos);
+                    for(Item item : b)
+                        if(item.stackPos >= 0)
+                            --item.stackPos;
+                }
+                boolean stackOverflow = false;
+                if(stackShift) {
+                    stackOp |= PUSH_MASK;
+                    for(Item item : b) {
+                        ++item.stackPos;
+                        if(item.stackPos > MAX_STACK_ID)
+                            stackOverflow = true;
+                    }
+                }
+                stackOpMap.put(a, stackOp);
+                System.out.println(newState + " " + grammar.getName(a) + " " + stackOp);
+                
+                if(stackOverflow) {
+                    System.err.println("Stack overflow when following " + grammar.getName(a) + " at");
+                    System.err.println(itemSet.toString(grammar));
+                }
+                else {
+                    int state = getState(a, b);
+                    trans.put(a, state);
+                    backLinks.get(state).add(newState);
+                }
+                return true;
+            }
+            
+        });
+        return newState;
+    }
+
+    TLongArrayList sMap = new TLongArrayList();
+    TLongIntHashMap sMapInv = new TLongIntHashMap();
+    TIntHashSet[] follow;
+        
+    private static int getState(long s) {
+        return (int)(s >> 32);
+    }
+    
+    private static int getSymbol(long s) {
+        return (int)s;
+    }
+    
+    private static long getS(int state, int symbol) {
+        return (((long)state) << 32) | (long)symbol;
+    }
+    
+    private void computeFollow() {
+        for(int i=0;i<itemSets.size();++i) {
+            final int source = i;
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int target) {
+                    if(symbol < 0) {
+                        long s = getS(source, ~symbol);
+                        int id = sMap.size();
+                        sMap.add(s);
+                        sMapInv.put(s, id);
+                    }
+                    return true;
+                }
+            });
+        }
+
+        // initfollow
+        follow = new TIntHashSet[sMap.size()];
+        final TIntHashSet[] gread = new TIntHashSet[follow.length];
+        final TIntHashSet[] gla = new TIntHashSet[sMap.size()];
+        for(int i=0;i<follow.length;++i) {
+            gread[i] = new TIntHashSet();
+            gla[i] = new TIntHashSet();
+        }
+        for(int i=0;i<follow.length;++i) {
+            final int id = i;
+            long s = sMap.get(i);
+            int source = getState(s);
+            int symbol = getSymbol(s);
+            final int target = transitions.get(source).get(~symbol);
+            final TIntHashSet drSet = new TIntHashSet();
+            transitions.get(target).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol2, int target2) {
+                    if(symbol2 >= 0)
+                        drSet.add(symbol2);
+                    else if(grammar.nullable[~symbol2])
+                        gread[sMapInv.get(getS(target, ~symbol2))].add(id);
+                    return true;
+                }
+            });
+            if(finalStates.contains(target))
+                drSet.add(grammar.terminalNames.length-1);
+            follow[i] = drSet;
+            
+            ItemSet set = itemSets.get(target);
+            for(Item targetItem : set.items) {
+                Prod prod = grammar.prods.get(targetItem.production);
+                if(grammar.almostAccepts(prod.rhs, targetItem.position)) {
+                    for(Item sourceItem : itemSets.get(source).items) {
+                        if(sourceItem.production == targetItem.production &&
+                                prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) {
+                            TLongHashSet visited = new TLongHashSet(); 
+                            traceBack(gla, id, visited, source, sourceItem);
+                        }
+                    }
+                    
+                }
+            }
+        }
+        //System.out.println("follow: " + Arrays.toString(follow));
+        //System.out.println("gread: " + Arrays.toString(gread));
+        //System.out.println("gla: " + Arrays.toString(gla));
+        AnaGrammar.gclose(follow, gread);
+        AnaGrammar.gclose(follow, gla);
+        
+        /*System.out.println("Gla:");
+        for(int i=0;i<gla.length;++i) {
+            int iState = getState(sMap.get(i));
+            int iSymbol = getSymbol(sMap.get(i));
+            for(int j : gla[i].toArray()) {
+                int jState = getState(sMap.get(j));
+                int jSymbol = getSymbol(sMap.get(j));
+                System.out.println("-- from --");
+                System.out.println(itemSets.get(iState).toString(grammar));
+                System.out.println("    symbol: " + grammar.nonterminalNames[iSymbol]);
+                System.out.println("-- to --");
+                System.out.println(itemSets.get(jState).toString(grammar));
+                System.out.println("    symbol: " + grammar.nonterminalNames[jSymbol]);
+            }
+        }*/
+        
+        /*for(int i=0;i<follow.length;++i) {
+            long s = sMap.get(i);
+            int source = getState(s);
+            int symbol = getSymbol(s);
+            System.out.println("------------------------------");
+            System.out.println(itemSets.get(source).toString(grammar));
+            System.out.println("Symbol: " + grammar.nonterminalNames[symbol]);
+            System.out.print("Follow:");
+            for(int sym : follow[i].toArray())
+                System.out.print(" " + grammar.terminalNames[sym]);
+            System.out.println();
+        }*/
+    }
+    
+    private void traceBack(TIntHashSet[] gla, int initialId, TLongHashSet visited, int state, Item item) {
+        if(visited.add( (((long)state)<<32) | (long)item.position )) {
+            Prod prod = grammar.prods.get(item.production);
+            if(item.stackPos == -1) {
+                int id = sMapInv.get(getS(state, prod.lhs));
+                gla[id].add(initialId);
+            }
+            
+            int backTransSymbol = backTransSymbols.get(state);
+            for(int prevState : backLinks.get(state).toArray())
+                for(Item prevItem : itemSets.get(prevState).items)
+                    if(prevItem.production == item.production &&
+                            prod.rhs.getTransition(prevItem.position, backTransSymbol) == item.position)
+                        traceBack(gla, initialId, visited, prevState, prevItem);
+        }
+    }
+    
+    private void lookback( TLongHashSet visited, TIntHashSet la, int production, int state, int position) {
+        if(visited.add( (((long)state)<<32) | (long)position )) {
+            int backTransSymbol = backTransSymbols.get(state);
+            Prod prod = grammar.prods.get(production);
+            boolean mightBeInitial = prod.rhs.getTransition(prod.rhs.getInitialState(), backTransSymbol) == position;
+            for(int prevState : backLinks.get(state).toArray()) {
+                for(Item item : itemSets.get(prevState).items) {
+                    if(item.production == production &&
+                            prod.rhs.getTransition(item.position, backTransSymbol) == position)
+                        lookback(visited, la, production, prevState, item.position);
+                    if(mightBeInitial && grammar.prods.get(item.production).rhs.getTransition(item.position, ~prod.lhs) >= 0) {
+                        int id = sMapInv.get(getS(prevState, prod.lhs));
+                        la.addAll(follow[id]);
+                    }
+                }
+            }
+        }
+    }
+
+    private void createReduceActions() {
+        computeFollow();
+        for(int i=0;i<itemSets.size();++i) {
+            TIntIntHashMap trans = transitions.get(i);
+            if(finalStates.contains(i))
+                trans.put(grammar.terminalNames.length-1, ACCEPT_ACTION);
+            
+            TIntObjectHashMap<TIntHashSet> laMap = new TIntObjectHashMap<TIntHashSet>();
+            TIntIntHashMap stackPosMap = new TIntIntHashMap(); 
+            for(Item item : itemSets.get(i).items) {
+                Prod prod = grammar.prods.get(item.production);
+                if(prod.rhs.getAccepts(item.position)) {
+                    TIntHashSet la = laMap.get(item.production);
+                    if(la == null) {
+                        la = new TIntHashSet();
+                        laMap.put(item.production, la);
+                    }
+                    
+                    TLongHashSet visited = new TLongHashSet();
+                    lookback(visited, la, item.production, i, item.position);
+                    
+                    if(stackPosMap.containsKey(item.production)) {
+                        stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice
+                    }
+                    else
+                        stackPosMap.put(item.production, item.stackPos);
+                }
+            }
+            
+            // Create transitions
+            for(int production : laMap.keys()) {
+                int stackPos = 0; //stackPosMap.get(production);
+                TIntHashSet la = laMap.get(production);
+                for(int symbol : la.toArray()) {
+                    if(trans.contains(symbol)) {
+                        int oldAction = trans.get(symbol);
+                        if(oldAction >= 0) {
+                            Prod prod = grammar.prods.get(production);
+                            if(prod.annotations.containsKey(symbol)) {
+                                byte v = prod.annotations.get(symbol);
+                                if(v == 1)
+                                    trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
+                            }
+                            else {
+                                System.err.println("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
+                                System.err.println(itemSets.get(i).toString(grammar));
+                            }
+                        }
+                        else {
+                            System.err.println("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
+                            System.err.println(itemSets.get(i).toString(grammar));
+                        }
+                    }
+                    else
+                        trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
+                }
+            }
+            
+            // Check stacking conflicts
+            /*trans.forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int a, int b) {
+                    if(b >= 0) {
+                        boolean kernelState = false;
+                        boolean nonkernelState = false;
+                        for(Item item : itemSets.get(b).items) {
+                            Prod prod = grammar.prods.get(item.production);
+                            if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a))
+                                nonkernelState = true;
+                            else if(item.position != prod.rhs.getInitialState())
+                                kernelState = true;
+                        }
+                        
+                        
+                        if(kernelState && nonkernelState) {
+                            System.err.println("Stacking conflict when following " + grammar.getName(a) + " to");
+                            System.err.println(itemSets.get(b).toString(grammar));
+                        }
+                    }
+                    return true;
+                }
+            });*/
+        }
+    }
+    
+    public static ParseTable build(AnaGrammar grammar) {
+        ParseTableBuilder builder = new ParseTableBuilder(grammar);
+        
+        builder.initialStates = new int[grammar.initialNonterminals.length];
+        for(int i=0;i<grammar.initialNonterminals.length;++i) {
+            ArrayList<Item> seed = new ArrayList<Item>();
+            int prodId = grammar.prods.size()-i-1;
+            seed.add(new Item(prodId, 
+                    grammar.prods.get(prodId).rhs.getInitialState(), 0));
+            builder.initialStates[i] = builder.getState(REDUCE_MASK, seed);
+        }
+        
+        builder.createReduceActions();
+        
+        System.out.println("States: " + builder.itemSets.size());
+        
+        //builder.visualize();
+        
+        builder.printParseTable();
+        return builder.getParseTable();
+    }
+
+    private ParseTable getParseTable() {
+        int[] productionInfo = new int[grammar.prods.size()];
+        for(int i=0;i<productionInfo.length;++i) {
+            Prod prod = grammar.prods.get(i);
+            productionInfo[i] = prod.lhs;
+        }
+        
+        int[][] actionTable = new int[transitions.size()][];
+        int[][] gotoTable = new int[transitions.size()][];
+        for(int i=0;i<transitions.size();++i) {
+            final int[] actions = new int[grammar.terminalNames.length]; 
+            Arrays.fill(actions, ERROR_ACTION);
+            actionTable[i] = actions;
+            final int[] gotos = new int[grammar.nonterminalNames.length];
+            Arrays.fill(gotos, ERROR_ACTION);
+            gotoTable[i] = gotos;
+            final TIntIntHashMap stackOpMap = stackOps.get(i); 
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int a, int b) {
+                    int action = b | stackOpMap.get(a);
+                    if(a >= 0)
+                        actions[a] = action;
+                    else
+                        gotos[~a] = action;
+                    return true;
+                }
+            });
+        }
+        
+        String[] stateDescriptions = new String[itemSets.size()];
+        for(int i=0;i<stateDescriptions.length;++i) {
+            final StringBuilder b = new StringBuilder();
+            b.append(itemSets.get(i).toString(grammar));
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int action) {
+                    if(symbol >= 0) {
+                        b.append("\n    ").append(grammar.terminalNames[symbol]).append(" ->");
+                        if((action & REDUCE_MASK) == 0) {
+                            if((action & POP_MASK) != 0)
+                                b.append(" POP");
+                            if((action & PUSH_MASK) != 0)
+                                b.append(" PUSH");
+                            b.append(" SHIFT(").append(action&STATE_MASK).append(")");
+                        }
+                        else {
+                            if(action == 0xfffffffe)
+                                b.append(" ACCEPT");
+                            else
+                                b.append(" REDUCE(").append(action&STATE_MASK).append(")");
+                        }
+                    }
+                    else {
+                        b.append("\n    ").append(grammar.nonterminalNames[~symbol]).append(" ->")
+                         .append(" GOTO(").append(action).append(")");
+                    }
+                    return true;
+                }
+            });
+            stateDescriptions[i] = b.toString();
+        }
+        
+        //printParseTable();
+        return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo,
+                initialStates, stateDescriptions);
+    }
+
+    private void printParseTable() {
+        final ItemSet[] stateSets = new ItemSet[states.size()];
+        states.forEachEntry(new TObjectIntProcedure<ItemSet>() {
+            @Override
+            public boolean execute(ItemSet a, int b) {
+                stateSets[b] = a;
+                return true;
+            }
+        });
+        for(int i=0;i<stateSets.length;++i) {
+            System.out.println("--- State " + i + " ---");
+            System.out.println(stateSets[i].toString(grammar));
+            final TIntIntHashMap stackOp = stackOps.get(i);
+            transitions.get(i).forEachEntry(new TIntIntProcedure() {
+                @Override
+                public boolean execute(int a, int b) {
+                    int sOp = stackOp.get(a);
+                    System.out.print(grammar.getName(a) + " -> ");
+                    if(sOp != 0) {
+                        System.out.print("[");
+                        if((sOp & PUSH_MASK) != 0) {
+                            sOp ^= PUSH_MASK;
+                            System.out.print("PUSH ");
+                        }
+                        if((sOp & POP_MASK) != 0) {
+                            sOp ^= POP_MASK;
+                            System.out.print("POP ");
+                        }
+                        if(sOp != 0)
+                            System.out.print(sOp);
+                        System.out.print("] ");
+                    }
+                    if((b & REDUCE_MASK) != 0) {
+                        b ^= REDUCE_MASK;
+                        System.out.println("reduce " + b); // grammar.prods.get(~b).toString(grammar));
+                    }
+                    else {
+                        System.out.println("shift " + b);
+                    }
+                    return true;
+                }
+            });
+        }
+    }
+}