]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/AnaGrammar.java
Moved SCL parser generator to platform repository.
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / parser / generator / grammar / AnaGrammar.java
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/AnaGrammar.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/grammar/AnaGrammar.java
new file mode 100644 (file)
index 0000000..dc79e1e
--- /dev/null
@@ -0,0 +1,234 @@
+package org.simantics.scl.compiler.parser.generator.grammar;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntByteHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.THashSet;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.Production;
+import org.simantics.scl.compiler.parser.regexp.Namer;
+import org.simantics.scl.compiler.parser.regexp.automata.DFA;
+
+public class AnaGrammar implements Namer {
+    
+    public ArrayList<Prod> prods = new ArrayList<Prod>(); 
+    public int[] nonterminalPos;
+    
+    public final String[] terminalNames;
+    public final String[] nonterminalNames;
+
+    public final int[] initialNonterminals;
+    
+    public boolean[] nullable;
+    public int[][] first;
+    
+    
+    public AnaGrammar(Grammar grammar) {
+        initialNonterminals = grammar.initialNonterminals;
+        terminalNames = Arrays.copyOf(grammar.terminalNames, grammar.terminalNames.length+1);
+        terminalNames[terminalNames.length-1] = "EOF";
+        nonterminalNames = Arrays.copyOf(grammar.nonterminalNames,
+                grammar.nonterminalNames.length+initialNonterminals.length);
+        for(int i=1;i<=initialNonterminals.length;++i)
+            nonterminalNames[nonterminalNames.length-i] = "init$" + i;
+        
+        // Convert grammar
+        ArrayList<Prod>[] prodMap = new ArrayList[nonterminalNames.length];
+        for(int i=0;i<nonterminalNames.length;++i)
+            prodMap[i] = new ArrayList<Prod>(); 
+        for(Production production : grammar.productions) {
+            Prod prod = new Prod(production.name, ~production.lhs,
+                    production.rhs.toAutomaton().determinize().minimize(),
+                    production.annotations);
+            prodMap[prod.lhs].add(prod);
+        }
+        
+        // Initial production
+        for(int i=1;i<=initialNonterminals.length;++i) {
+            DFA dfa = new DFA();
+            int s0 = dfa.newState();
+            dfa.setInitialState(s0);
+            int s1 = dfa.newState();
+            dfa.addTransition(s0, initialNonterminals[i-1], s1);
+            int s2 = dfa.newState();
+            dfa.addTransition(s1, terminalNames.length-1, s2);
+            dfa.setAccepts(s2, true);
+            Prod prod = new Prod("Init", nonterminalNames.length-i, dfa, new TIntByteHashMap());
+            prodMap[prod.lhs].add(prod);
+        }
+        
+        TIntArrayList pos = new TIntArrayList();
+        for(int i=0;i<nonterminalNames.length;++i) {
+            pos.add(prods.size());
+            prods.addAll(prodMap[i]);
+        }
+        pos.add(prods.size());
+        nonterminalPos = pos.toArray();
+        
+        // Basic analysis
+        computeNullableNonterminals();
+    }
+    
+    private static class FrontierItem {
+        public final int production;
+        public final int state;
+        
+        public FrontierItem(int production, int state) {
+            this.production = production;
+            this.state = state;
+        }
+        
+        @Override
+        public int hashCode() {
+            return 31*state + production;
+        }
+        
+        @Override
+        public boolean equals(Object obj) {
+            if(this == obj)
+                return true;
+            if(obj == null || obj.getClass() != getClass())
+                return false;
+            FrontierItem other = (FrontierItem)obj;
+            return other.state == state && other.production == production;
+        }
+    }
+    
+    private void computeNullableNonterminals() {
+        int nonterminalCount = nonterminalNames.length;
+        nullable = new boolean[nonterminalCount];
+        final TIntHashSet[] first = new TIntHashSet[nonterminalCount];
+        final TIntHashSet[] gfirst = new TIntHashSet[nonterminalCount];
+        for(int i=0;i<nonterminalCount;++i) {
+            first[i] = new TIntHashSet();
+            gfirst[i] = new TIntHashSet();
+        }
+        final ArrayList<FrontierItem>[] pendingItems = new ArrayList[nonterminalCount];
+        final ArrayList<FrontierItem> stack = new ArrayList<FrontierItem>();
+        THashSet<FrontierItem> handledItems = new THashSet<FrontierItem>();
+        for(int i=0;i<nonterminalCount;++i)
+            pendingItems[i] = new ArrayList<FrontierItem>();
+        
+        for(int i=0;i<prods.size();++i)
+            stack.add(new FrontierItem(i, prods.get(i).rhs.getInitialState()));
+        while(!stack.isEmpty()) {
+            final FrontierItem cur = stack.remove(stack.size()-1);
+            if(!handledItems.add(cur))
+                continue;
+            final Prod curProd = prods.get(cur.production);
+            if(curProd.rhs.getAccepts(cur.state)) {
+                nullable[curProd.lhs] = true;
+                stack.addAll(pendingItems[curProd.lhs]);
+            }
+            curProd.rhs.forEachTransition(cur.state, new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int targetState) {
+                    if(symbol < 0) {
+                        symbol = ~symbol;
+                        gfirst[symbol].add(curProd.lhs);
+                        FrontierItem item = new FrontierItem(cur.production, targetState);
+                        if(nullable[symbol])
+                            stack.add(item);
+                        else
+                            pendingItems[symbol].add(item);
+                    }
+                    else 
+                        first[curProd.lhs].add(symbol);
+                    return true;
+                }
+            });
+        }
+        
+        gclose(first, gfirst);
+        this.first = new int[first.length][];
+        for(int i=0;i<first.length;++i)
+            this.first[i] = first[i].toArray();
+        
+        /*System.out.println("Nullable nonterminals:");
+        for(int i=0;i<nullable.length;++i) {
+            System.out.print("    " + nonterminalNames[i] + " -> ");
+            if(nullable[i])
+                System.out.print(" NULL");
+            for(int s : this.first[i])
+                System.out.print(" " + terminalNames[s]);
+            System.out.println();
+        }*/
+    }
+    
+    public static void gclose(TIntHashSet[] sets, TIntHashSet[] graph_) {
+        int[][] graph = new int[graph_.length][];
+        for(int i=0;i<graph.length;++i)
+            graph[i] = graph_[i].toArray();
+        
+        final TIntArrayList stack = new TIntArrayList();
+        for(int i=0;i<sets.length;++i) {
+            final int i_ = i;
+            sets[i].forEach(new TIntProcedure() {
+                @Override
+                public boolean execute(int value) {
+                    stack.add(i_);
+                    stack.add(value);
+                    return true;
+                }
+            });
+        }
+                
+        while(!stack.isEmpty()) {
+            int sp = stack.size();
+            int value = stack.removeAt(sp-1);
+            int id = stack.removeAt(sp-2);
+            for(int s : graph[id])
+                if(sets[s].add(value)) {
+                    stack.add(s);
+                    stack.add(value);
+                }
+        }
+    }
+
+    public String getName(int symbolId) {
+        if(symbolId >= 0)
+            return terminalNames[symbolId];
+        else
+            return nonterminalNames[~symbolId];
+    }
+
+    private class AlmostAcceptsProc implements  TIntIntProcedure {
+        DFA rhs;
+        TIntHashSet visited = new TIntHashSet();
+        
+        boolean result;
+        
+        public AlmostAcceptsProc(DFA rhs) {
+            this.rhs = rhs;
+        }
+
+        @Override
+        public boolean execute(int a, int b) {
+            if(a < 0 && nullable[~a])
+                visit(b);
+            return !result;
+        }
+
+        public void visit(int position) {
+            if(visited.add(position)) {
+                if(rhs.getAccepts(position)) {
+                    result = true;
+                    return;
+                }
+                rhs.forEachTransition(position, this);
+            }
+        }
+    }
+    
+    public boolean almostAccepts(DFA rhs, int position) {
+        AlmostAcceptsProc proc = new AlmostAcceptsProc(rhs);
+        proc.visit(position);
+        return proc.result;
+    }    
+}