bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java

   1 package org.simantics.scl.compiler.parser.regexp.automata;
   2
   3 import gnu.trove.impl.Constants;
   4 import gnu.trove.list.array.TByteArrayList;
   5 import gnu.trove.list.array.TIntArrayList;
   6 import gnu.trove.map.hash.TIntIntHashMap;
   7 import gnu.trove.procedure.TIntIntProcedure;
   8 import gnu.trove.procedure.TIntProcedure;
   9 import gnu.trove.set.hash.TIntHashSet;
  10
  11 import java.util.ArrayList;
  12
  13 import org.simantics.scl.compiler.parser.regexp.RAtom;
  14 import org.simantics.scl.compiler.parser.regexp.Regexp;
  15
  16 public class DFA implements Automata {
  17
  18     private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
  19     private TByteArrayList accepts = new TByteArrayList();
  20     private int initialState;
  21
  22     public int newState() {
  23         int stateId = transitions.size();
  24         transitions.add(new TIntIntHashMap(
  25                 Constants.DEFAULT_CAPACITY,
  26                 Constants.DEFAULT_LOAD_FACTOR,
  27                 0, -1));
  28         accepts.add((byte)0);
  29         return stateId;
  30     }
  31
  32     public int size() {
  33         return transitions.size();
  34     }
  35
  36     public DFA copy() {
  37         DFA copy = new DFA();
  38         for(TIntIntHashMap t : transitions)
  39             copy.transitions.add(new TIntIntHashMap(t));
  40         copy.accepts = new TByteArrayList(accepts);
  41         copy.initialState = initialState;
  42         return copy;
  43     }
  44
  45     public void addTransition(int sourceId, int symbol, int targetId) {
  46         transitions.get(sourceId).put(symbol, targetId);
  47     }
  48
  49     public int getTransition(int sourceId, int symbol) {
  50         return transitions.get(sourceId).get(symbol);
  51     }
  52
  53     public void forEachTransition(int source, final TIntIntProcedure proc) {
  54         transitions.get(source).forEachEntry(proc);
  55     }
  56
  57     public int[] nextStates(int id) {
  58         return transitions.get(id).keys();
  59     }
  60
  61     public void setAccepts(int id, boolean accepts) {
  62         this.accepts.set(id, accepts ? (byte)1 : (byte)0);
  63     }
  64
  65     public boolean getAccepts(int id) {
  66         return accepts.get(id)==1;
  67     }
  68
  69     public void setInitialState(int initialState) {
  70         this.initialState = initialState;
  71     }
  72
  73     public int getInitialState() {
  74         return initialState;
  75     }
  76
  77     public DFA minimize() {
  78         // Compute relevant input characters for minimization
  79         final TIntIntHashMap symbolMap = new TIntIntHashMap();
  80         final int[] symbolArray;
  81         {
  82             final TIntArrayList l = new TIntArrayList();
  83             TIntProcedure proc = new TIntProcedure() {
  84                 @Override
  85                 public boolean execute(int value) {
  86                     if(!symbolMap.containsKey(value)) {
  87                         symbolMap.put(value, l.size());
  88                         l.add(value);
  89                     }
  90                     return true;
  91                 }
  92             };
  93             for(TIntIntHashMap tMap : transitions)
  94                 tMap.forEachKey(proc);
  95             symbolArray = l.toArray();
  96         }
  97         int symbolCount = symbolMap.size();
  98         int stateCount = transitions.size();
  99
 100         // Inverse automata
 101         final TIntArrayList[][] inverse = new TIntArrayList[stateCount+1][];
 102         for(int i=0;i<inverse.length;++i)
 103             inverse[i] = new TIntArrayList[symbolCount];
 104         for(int sourceId=0;sourceId<stateCount;++sourceId) {
 105             TIntIntHashMap tMap = transitions.get(sourceId);
 106             for(int j=0;j<symbolCount;++j) {
 107                 int targetId = tMap.get(symbolArray[j]);
 108                 if(targetId == -1)
 109                     targetId = stateCount;
 110                 TIntArrayList l = inverse[targetId][j];
 111                 if(l == null) {
 112                     l = new TIntArrayList();
 113                     inverse[targetId][j] = l;
 114                 }
 115                 l.add(sourceId);
 116             }
 117         }
 118
 119         /*for(int i=0;i<inverse.length;++i)
 120             for(int j=0;j<symbolCount;++j)
 121                 System.out.println(i + " " + j + " -> " + inverse[i][j]);
 122         */
 123
 124         //
 125         int[] ids = new int[stateCount+1];
 126         final int[] memPartion = new int[stateCount+1];
 127         TIntArrayList partionBegin = new TIntArrayList();
 128         TIntArrayList partionEnd = new TIntArrayList();
 129         TIntArrayList stack = new TIntArrayList();
 130         TIntArrayList scheduled = new TIntArrayList();
 131
 132         // Initial partition
 133         {
 134             int min = 0;
 135             int max = stateCount;
 136             ids[min++] = stateCount;
 137             memPartion[stateCount] = 0;
 138             for(int i=0;i<stateCount;++i) {
 139                 if(accepts.get(i)==1) {
 140                     ids[max--] = i;
 141                     memPartion[i] = 1;
 142                 }
 143                 else {
 144                     ids[min++] = i;
 145                     memPartion[i] = 0;
 146                 }
 147             }
 148             partionBegin.add(0);
 149             partionBegin.add(min);
 150             partionEnd.add(min);
 151             partionEnd.add(ids.length);
 152             scheduled.add(0);
 153             scheduled.add(0);
 154             if(min < ids.length/2) {
 155                 stack.add(0);
 156                 scheduled.set(0, 1);
 157             }
 158             else {
 159                 stack.add(1);
 160                 scheduled.set(1, 1);
 161             }
 162         }
 163
 164         // Refinement
 165         while(!stack.isEmpty()) {
 166             int partionId = stack.removeAt(stack.size()-1);
 167             scheduled.set(partionId, 0);
 168             int begin = partionBegin.get(partionId);
 169             int end = partionEnd.get(partionId);
 170
 171             for(int j=0;j<symbolCount;++j) {
 172                 TIntHashSet invStates = new TIntHashSet();
 173                 for(int i=begin;i<end;++i) {
 174                     int s = ids[i];
 175                     TIntArrayList inv = inverse[s][j];
 176                     if(inv != null)
 177                         invStates.addAll(inv);
 178                 }
 179                 int[] invStatesArray = invStates.toArray();
 180
 181                 TIntHashSet partions = new TIntHashSet();
 182                 for(int s : invStatesArray)
 183                     partions.add(memPartion[s]);
 184                 int[] partionsArray = partions.toArray();
 185
 186                 for(int p : partionsArray) {
 187                     int pBegin = partionBegin.get(p);
 188                     int pEnd = partionEnd.get(p);
 189                     boolean splits = false;
 190                     for(int k=pBegin;k<pEnd;++k)
 191                         if(!invStates.contains(ids[k])) {
 192                             splits = true;
 193                             break;
 194                         }
 195                     if(splits) {
 196                         int p2 = partionBegin.size();
 197                         int p2End = pEnd;
 198                         for(int k=pBegin;k<pEnd;++k) {
 199                             int s = ids[k];
 200                             if(invStates.contains(s)) {
 201                                 memPartion[s] = p2;
 202                                 --pEnd;
 203                                 ids[k] = ids[pEnd];
 204                                 ids[pEnd] = s;
 205                                 --k;
 206                             }
 207                         }
 208                         partionEnd.set(p, pEnd);
 209                         partionBegin.add(pEnd);
 210                         partionEnd.add(p2End);
 211
 212                         if(scheduled.get(p) == 1) {
 213                             scheduled.add(1);
 214                             stack.add(p2);
 215                         }
 216                         else {
 217                             if(pEnd - pBegin <= p2End - pEnd) {
 218                                 stack.add(p);
 219                                 scheduled.add(0);
 220                                 scheduled.set(p, 1);
 221                             }
 222                             else {
 223                                 stack.add(p2);
 224                                 scheduled.add(1);
 225                             }
 226                         }
 227                     }
 228                 }
 229             }
 230         }
 231
 232         // Print partitions
 233         /*System.out.println("Partition count: " + partionBegin.size());
 234         for(int i=0;i<memPartion.length;++i)
 235             System.out.println("    " + i + " in " + memPartion[i]);
 236         */
 237
 238         // Put reachable states into new automaton
 239         final DFA aut = new DFA();
 240         int failurePartion = memPartion[stateCount];
 241         final int[] sArray = new int[partionBegin.size()];
 242         sArray[failurePartion] = -1;
 243         for(int i=0;i<partionBegin.size();++i)
 244             if(i != failurePartion)
 245                 sArray[i] = aut.newState();
 246         for(int i=0;i<partionBegin.size();++i)
 247             if(i != failurePartion) {
 248                 final int sourceId = sArray[i];
 249                 int bId = ids[partionBegin.get(i)];
 250                 forEachTransition(bId, new TIntIntProcedure() {
 251                     @Override
 252                     public boolean execute(int a, int b) {
 253                         aut.addTransition(sourceId, a, sArray[memPartion[b]]);
 254                         return true;
 255                     }
 256                 });
 257                 aut.setAccepts(sourceId, getAccepts(bId));
 258             }
 259         aut.setInitialState(sArray[memPartion[initialState]]);
 260         return aut;
 261     }
 262
 263     public Regexp toRegexp(int from, byte[] to) {
 264         int stateCount = size();
 265         final int[] order = new int[stateCount];
 266         for(int i=0;i<stateCount;++i)
 267             order[i] = i;
 268         order[0] = from;
 269         order[from] = 0;
 270
 271         final Regexp[][] a = new Regexp[stateCount][stateCount];
 272         final Regexp[] b = new Regexp[stateCount];
 273         for(int i=0;i<stateCount;++i) {
 274             b[i] = to[order[i]]==1 ? Regexp.ONE : Regexp.ZERO;
 275             for(int j=0;j<stateCount;++j)
 276                 a[i][j] = Regexp.ZERO;
 277             final Regexp[] row = a[i];
 278             forEachTransition(order[i], new TIntIntProcedure() {
 279                 @Override
 280                 public boolean execute(int symbol, int targetId) {
 281                     targetId = order[targetId];
 282                     row[targetId] = Regexp.or(row[targetId], new RAtom(symbol));
 283                     return true;
 284                 }
 285             });
 286         }
 287
 288         for(int n=stateCount-1;n>=0;--n) {
 289             Regexp ss = Regexp.star(a[n][n]);
 290             b[n] = Regexp.seq(ss, b[n]);
 291             for(int j=0;j<stateCount;++j)
 292                 a[n][j] = Regexp.seq(ss, a[n][j]);
 293             for(int i=0;i<n;++i) {
 294                 b[i] = Regexp.or(b[i], Regexp.seq(a[i][n], b[n]));
 295                 for(int j=0;j<n;++j)
 296                     a[i][j] = Regexp.or(a[i][j], Regexp.seq(a[i][n], a[n][j]));
 297             }
 298         }
 299
 300         return b[0];
 301     }
 302
 303     public Regexp toRegexp() {
 304         return toRegexp(initialState, accepts.toArray()).simplify();
 305     }
 306
 307     public Regexp toRegexpTo(int position) {
 308         int stateCount = size();
 309         byte[] targetArray = new byte[stateCount];
 310         targetArray[position] = 1;
 311         return toRegexp(initialState, targetArray).simplify();
 312     }
 313
 314     public Regexp toRegexpFrom(int position) {
 315         return toRegexp(position, accepts.toArray()).simplify();
 316     }
 317
 318     public Regexp toPositionalRegexp(int position) {
 319         DFA aut = copy();
 320         int s = transitions.size();
 321         aut.transitions.add(aut.transitions.get(position));
 322         aut.transitions.set(position, new TIntIntHashMap());
 323         aut.addTransition(position, 0x80000000, s);
 324         aut.accepts.add(accepts.get(position));
 325         aut.accepts.set(position, (byte)0);
 326         return aut.toRegexp();
 327     }
 328 }