]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java
Moved SCL parser generator to platform repository.
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / parser / regexp / automata / DFA.java
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java
new file mode 100644 (file)
index 0000000..c3ac114
--- /dev/null
@@ -0,0 +1,328 @@
+package org.simantics.scl.compiler.parser.regexp.automata;
+
+import gnu.trove.impl.Constants;
+import gnu.trove.list.array.TByteArrayList;
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntIntHashMap;
+import gnu.trove.procedure.TIntIntProcedure;
+import gnu.trove.procedure.TIntProcedure;
+import gnu.trove.set.hash.TIntHashSet;
+
+import java.util.ArrayList;
+
+import org.simantics.scl.compiler.parser.regexp.RAtom;
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+public class DFA implements Automata {
+
+    private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>(); 
+    private TByteArrayList accepts = new TByteArrayList();
+    private int initialState;
+    
+    public int newState() {
+        int stateId = transitions.size();
+        transitions.add(new TIntIntHashMap(
+                Constants.DEFAULT_CAPACITY,
+                Constants.DEFAULT_LOAD_FACTOR,
+                0, -1));
+        accepts.add((byte)0);
+        return stateId;
+    }
+    
+    public int size() {
+        return transitions.size();
+    }
+    
+    public DFA copy() {
+        DFA copy = new DFA();
+        for(TIntIntHashMap t : transitions)
+            copy.transitions.add(new TIntIntHashMap(t));
+        copy.accepts = new TByteArrayList(accepts);
+        copy.initialState = initialState;
+        return copy;
+    }
+
+    public void addTransition(int sourceId, int symbol, int targetId) {
+        transitions.get(sourceId).put(symbol, targetId);
+    }
+    
+    public int getTransition(int sourceId, int symbol) {
+        return transitions.get(sourceId).get(symbol);
+    }
+    
+    public void forEachTransition(int source, final TIntIntProcedure proc) {        
+        transitions.get(source).forEachEntry(proc);
+    }
+    
+    public int[] nextStates(int id) {
+        return transitions.get(id).keys();
+    }
+    
+    public void setAccepts(int id, boolean accepts) {
+        this.accepts.set(id, accepts ? (byte)1 : (byte)0);
+    }
+    
+    public boolean getAccepts(int id) {
+        return accepts.get(id)==1;
+    }
+
+    public void setInitialState(int initialState) {
+        this.initialState = initialState;
+    }
+    
+    public int getInitialState() {
+        return initialState;
+    }
+
+    public DFA minimize() {
+        // Compute relevant input characters for minimization
+        final TIntIntHashMap symbolMap = new TIntIntHashMap();
+        final int[] symbolArray;
+        {
+            final TIntArrayList l = new TIntArrayList();
+            TIntProcedure proc = new TIntProcedure() {
+                @Override
+                public boolean execute(int value) {
+                    if(!symbolMap.containsKey(value)) {
+                        symbolMap.put(value, l.size());
+                        l.add(value);
+                    }
+                    return true;
+                }
+            };
+            for(TIntIntHashMap tMap : transitions)
+                tMap.forEachKey(proc);
+            symbolArray = l.toArray();
+        }
+        int symbolCount = symbolMap.size();        
+        int stateCount = transitions.size();
+        
+        // Inverse automata
+        final TIntArrayList[][] inverse = new TIntArrayList[stateCount+1][];
+        for(int i=0;i<inverse.length;++i)
+            inverse[i] = new TIntArrayList[symbolCount];
+        for(int sourceId=0;sourceId<stateCount;++sourceId) {
+            TIntIntHashMap tMap = transitions.get(sourceId);
+            for(int j=0;j<symbolCount;++j) {
+                int targetId = tMap.get(symbolArray[j]);
+                if(targetId == -1)
+                    targetId = stateCount;
+                TIntArrayList l = inverse[targetId][j];
+                if(l == null) {
+                    l = new TIntArrayList();
+                    inverse[targetId][j] = l;   
+                }
+                l.add(sourceId);
+            }
+        }
+        
+        /*for(int i=0;i<inverse.length;++i)
+            for(int j=0;j<symbolCount;++j)
+                System.out.println(i + " " + j + " -> " + inverse[i][j]);
+        */
+        
+        // 
+        int[] ids = new int[stateCount+1];
+        final int[] memPartion = new int[stateCount+1];        
+        TIntArrayList partionBegin = new TIntArrayList();
+        TIntArrayList partionEnd = new TIntArrayList();
+        TIntArrayList stack = new TIntArrayList();
+        TIntArrayList scheduled = new TIntArrayList();
+        
+        // Initial partition
+        {
+            int min = 0;
+            int max = stateCount;
+            ids[min++] = stateCount;
+            memPartion[stateCount] = 0;
+            for(int i=0;i<stateCount;++i) {
+                if(accepts.get(i)==1) {
+                    ids[max--] = i;
+                    memPartion[i] = 1;
+                }
+                else {
+                    ids[min++] = i;
+                    memPartion[i] = 0;
+                }
+            }
+            partionBegin.add(0);
+            partionBegin.add(min);
+            partionEnd.add(min);
+            partionEnd.add(ids.length);
+            scheduled.add(0);
+            scheduled.add(0);
+            if(min < ids.length/2) {
+                stack.add(0);
+                scheduled.set(0, 1);
+            }
+            else {
+                stack.add(1);
+                scheduled.set(1, 1);
+            }
+        }
+        
+        // Refinement
+        while(!stack.isEmpty()) {
+            int partionId = stack.removeAt(stack.size()-1);
+            scheduled.set(partionId, 0);
+            int begin = partionBegin.get(partionId);
+            int end = partionEnd.get(partionId);
+            
+            for(int j=0;j<symbolCount;++j) {
+                TIntHashSet invStates = new TIntHashSet();
+                for(int i=begin;i<end;++i) {
+                    int s = ids[i];
+                    TIntArrayList inv = inverse[s][j];
+                    if(inv != null)
+                        invStates.addAll(inv);
+                }
+                int[] invStatesArray = invStates.toArray();
+                
+                TIntHashSet partions = new TIntHashSet();
+                for(int s : invStatesArray)
+                    partions.add(memPartion[s]);
+                int[] partionsArray = partions.toArray();
+                
+                for(int p : partionsArray) {
+                    int pBegin = partionBegin.get(p);
+                    int pEnd = partionEnd.get(p);
+                    boolean splits = false;
+                    for(int k=pBegin;k<pEnd;++k)
+                        if(!invStates.contains(ids[k])) {
+                            splits = true;
+                            break;
+                        }
+                    if(splits) {
+                        int p2 = partionBegin.size();
+                        int p2End = pEnd;
+                        for(int k=pBegin;k<pEnd;++k) {
+                            int s = ids[k];
+                            if(invStates.contains(s)) {
+                                memPartion[s] = p2;
+                                --pEnd;
+                                ids[k] = ids[pEnd];
+                                ids[pEnd] = s;
+                                --k;
+                            }
+                        }
+                        partionEnd.set(p, pEnd);
+                        partionBegin.add(pEnd);
+                        partionEnd.add(p2End);
+                        
+                        if(scheduled.get(p) == 1) {
+                            scheduled.add(1);
+                            stack.add(p2);
+                        }
+                        else {
+                            if(pEnd - pBegin <= p2End - pEnd) {
+                                stack.add(p);
+                                scheduled.add(0);
+                                scheduled.set(p, 1);
+                            }
+                            else {
+                                stack.add(p2);
+                                scheduled.add(1);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        
+        // Print partitions
+        /*System.out.println("Partition count: " + partionBegin.size());
+        for(int i=0;i<memPartion.length;++i)
+            System.out.println("    " + i + " in " + memPartion[i]);
+        */
+        
+        // Put reachable states into new automaton
+        final DFA aut = new DFA();
+        int failurePartion = memPartion[stateCount];
+        final int[] sArray = new int[partionBegin.size()];
+        sArray[failurePartion] = -1;
+        for(int i=0;i<partionBegin.size();++i)
+            if(i != failurePartion)
+                sArray[i] = aut.newState();
+        for(int i=0;i<partionBegin.size();++i)
+            if(i != failurePartion) {
+                final int sourceId = sArray[i];
+                int bId = ids[partionBegin.get(i)];
+                forEachTransition(bId, new TIntIntProcedure() {
+                    @Override
+                    public boolean execute(int a, int b) {
+                        aut.addTransition(sourceId, a, sArray[memPartion[b]]);
+                        return true;
+                    }
+                });
+                aut.setAccepts(sourceId, getAccepts(bId));
+            }
+        aut.setInitialState(sArray[memPartion[initialState]]);
+        return aut;
+    }
+    
+    public Regexp toRegexp(int from, byte[] to) {
+        int stateCount = size();
+        final int[] order = new int[stateCount];
+        for(int i=0;i<stateCount;++i)
+            order[i] = i;
+        order[0] = from;
+        order[from] = 0;
+        
+        final Regexp[][] a = new Regexp[stateCount][stateCount];
+        final Regexp[] b = new Regexp[stateCount];
+        for(int i=0;i<stateCount;++i) {
+            b[i] = to[order[i]]==1 ? Regexp.ONE : Regexp.ZERO;
+            for(int j=0;j<stateCount;++j)
+                a[i][j] = Regexp.ZERO;
+            final Regexp[] row = a[i];
+            forEachTransition(order[i], new TIntIntProcedure() {
+                @Override
+                public boolean execute(int symbol, int targetId) {
+                    targetId = order[targetId];
+                    row[targetId] = Regexp.or(row[targetId], new RAtom(symbol));
+                    return true;
+                }
+            });
+        }
+        
+        for(int n=stateCount-1;n>=0;--n) {
+            Regexp ss = Regexp.star(a[n][n]);
+            b[n] = Regexp.seq(ss, b[n]);
+            for(int j=0;j<stateCount;++j)
+                a[n][j] = Regexp.seq(ss, a[n][j]);
+            for(int i=0;i<n;++i) {
+                b[i] = Regexp.or(b[i], Regexp.seq(a[i][n], b[n]));
+                for(int j=0;j<n;++j)
+                    a[i][j] = Regexp.or(a[i][j], Regexp.seq(a[i][n], a[n][j]));
+            }
+        }
+        
+        return b[0];
+    }
+
+    public Regexp toRegexp() {
+        return toRegexp(initialState, accepts.toArray()).simplify();
+    }
+    
+    public Regexp toRegexpTo(int position) {
+        int stateCount = size();
+        byte[] targetArray = new byte[stateCount];
+        targetArray[position] = 1;
+        return toRegexp(initialState, targetArray).simplify();
+    }
+    
+    public Regexp toRegexpFrom(int position) {
+        return toRegexp(position, accepts.toArray()).simplify();
+    }
+
+    public Regexp toPositionalRegexp(int position) {
+        DFA aut = copy();
+        int s = transitions.size();
+        aut.transitions.add(aut.transitions.get(position));
+        aut.transitions.set(position, new TIntIntHashMap());
+        aut.addTransition(position, 0x80000000, s);
+        aut.accepts.add(accepts.get(position));
+        aut.accepts.set(position, (byte)0);
+        return aut.toRegexp();
+    }
+}