]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/automata/DFA.java
Moved SCL parser generator to platform repository.
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / parser / regexp / automata / DFA.java
1 package org.simantics.scl.compiler.parser.regexp.automata;
2
3 import gnu.trove.impl.Constants;
4 import gnu.trove.list.array.TByteArrayList;
5 import gnu.trove.list.array.TIntArrayList;
6 import gnu.trove.map.hash.TIntIntHashMap;
7 import gnu.trove.procedure.TIntIntProcedure;
8 import gnu.trove.procedure.TIntProcedure;
9 import gnu.trove.set.hash.TIntHashSet;
10
11 import java.util.ArrayList;
12
13 import org.simantics.scl.compiler.parser.regexp.RAtom;
14 import org.simantics.scl.compiler.parser.regexp.Regexp;
15
16 public class DFA implements Automata {
17
18     private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>(); 
19     private TByteArrayList accepts = new TByteArrayList();
20     private int initialState;
21     
22     public int newState() {
23         int stateId = transitions.size();
24         transitions.add(new TIntIntHashMap(
25                 Constants.DEFAULT_CAPACITY,
26                 Constants.DEFAULT_LOAD_FACTOR,
27                 0, -1));
28         accepts.add((byte)0);
29         return stateId;
30     }
31     
32     public int size() {
33         return transitions.size();
34     }
35     
36     public DFA copy() {
37         DFA copy = new DFA();
38         for(TIntIntHashMap t : transitions)
39             copy.transitions.add(new TIntIntHashMap(t));
40         copy.accepts = new TByteArrayList(accepts);
41         copy.initialState = initialState;
42         return copy;
43     }
44
45     public void addTransition(int sourceId, int symbol, int targetId) {
46         transitions.get(sourceId).put(symbol, targetId);
47     }
48     
49     public int getTransition(int sourceId, int symbol) {
50         return transitions.get(sourceId).get(symbol);
51     }
52     
53     public void forEachTransition(int source, final TIntIntProcedure proc) {        
54         transitions.get(source).forEachEntry(proc);
55     }
56     
57     public int[] nextStates(int id) {
58         return transitions.get(id).keys();
59     }
60     
61     public void setAccepts(int id, boolean accepts) {
62         this.accepts.set(id, accepts ? (byte)1 : (byte)0);
63     }
64     
65     public boolean getAccepts(int id) {
66         return accepts.get(id)==1;
67     }
68
69     public void setInitialState(int initialState) {
70         this.initialState = initialState;
71     }
72     
73     public int getInitialState() {
74         return initialState;
75     }
76
77     public DFA minimize() {
78         // Compute relevant input characters for minimization
79         final TIntIntHashMap symbolMap = new TIntIntHashMap();
80         final int[] symbolArray;
81         {
82             final TIntArrayList l = new TIntArrayList();
83             TIntProcedure proc = new TIntProcedure() {
84                 @Override
85                 public boolean execute(int value) {
86                     if(!symbolMap.containsKey(value)) {
87                         symbolMap.put(value, l.size());
88                         l.add(value);
89                     }
90                     return true;
91                 }
92             };
93             for(TIntIntHashMap tMap : transitions)
94                 tMap.forEachKey(proc);
95             symbolArray = l.toArray();
96         }
97         int symbolCount = symbolMap.size();        
98         int stateCount = transitions.size();
99         
100         // Inverse automata
101         final TIntArrayList[][] inverse = new TIntArrayList[stateCount+1][];
102         for(int i=0;i<inverse.length;++i)
103             inverse[i] = new TIntArrayList[symbolCount];
104         for(int sourceId=0;sourceId<stateCount;++sourceId) {
105             TIntIntHashMap tMap = transitions.get(sourceId);
106             for(int j=0;j<symbolCount;++j) {
107                 int targetId = tMap.get(symbolArray[j]);
108                 if(targetId == -1)
109                     targetId = stateCount;
110                 TIntArrayList l = inverse[targetId][j];
111                 if(l == null) {
112                     l = new TIntArrayList();
113                     inverse[targetId][j] = l;   
114                 }
115                 l.add(sourceId);
116             }
117         }
118         
119         /*for(int i=0;i<inverse.length;++i)
120             for(int j=0;j<symbolCount;++j)
121                 System.out.println(i + " " + j + " -> " + inverse[i][j]);
122         */
123         
124         // 
125         int[] ids = new int[stateCount+1];
126         final int[] memPartion = new int[stateCount+1];        
127         TIntArrayList partionBegin = new TIntArrayList();
128         TIntArrayList partionEnd = new TIntArrayList();
129         TIntArrayList stack = new TIntArrayList();
130         TIntArrayList scheduled = new TIntArrayList();
131         
132         // Initial partition
133         {
134             int min = 0;
135             int max = stateCount;
136             ids[min++] = stateCount;
137             memPartion[stateCount] = 0;
138             for(int i=0;i<stateCount;++i) {
139                 if(accepts.get(i)==1) {
140                     ids[max--] = i;
141                     memPartion[i] = 1;
142                 }
143                 else {
144                     ids[min++] = i;
145                     memPartion[i] = 0;
146                 }
147             }
148             partionBegin.add(0);
149             partionBegin.add(min);
150             partionEnd.add(min);
151             partionEnd.add(ids.length);
152             scheduled.add(0);
153             scheduled.add(0);
154             if(min < ids.length/2) {
155                 stack.add(0);
156                 scheduled.set(0, 1);
157             }
158             else {
159                 stack.add(1);
160                 scheduled.set(1, 1);
161             }
162         }
163         
164         // Refinement
165         while(!stack.isEmpty()) {
166             int partionId = stack.removeAt(stack.size()-1);
167             scheduled.set(partionId, 0);
168             int begin = partionBegin.get(partionId);
169             int end = partionEnd.get(partionId);
170             
171             for(int j=0;j<symbolCount;++j) {
172                 TIntHashSet invStates = new TIntHashSet();
173                 for(int i=begin;i<end;++i) {
174                     int s = ids[i];
175                     TIntArrayList inv = inverse[s][j];
176                     if(inv != null)
177                         invStates.addAll(inv);
178                 }
179                 int[] invStatesArray = invStates.toArray();
180                 
181                 TIntHashSet partions = new TIntHashSet();
182                 for(int s : invStatesArray)
183                     partions.add(memPartion[s]);
184                 int[] partionsArray = partions.toArray();
185                 
186                 for(int p : partionsArray) {
187                     int pBegin = partionBegin.get(p);
188                     int pEnd = partionEnd.get(p);
189                     boolean splits = false;
190                     for(int k=pBegin;k<pEnd;++k)
191                         if(!invStates.contains(ids[k])) {
192                             splits = true;
193                             break;
194                         }
195                     if(splits) {
196                         int p2 = partionBegin.size();
197                         int p2End = pEnd;
198                         for(int k=pBegin;k<pEnd;++k) {
199                             int s = ids[k];
200                             if(invStates.contains(s)) {
201                                 memPartion[s] = p2;
202                                 --pEnd;
203                                 ids[k] = ids[pEnd];
204                                 ids[pEnd] = s;
205                                 --k;
206                             }
207                         }
208                         partionEnd.set(p, pEnd);
209                         partionBegin.add(pEnd);
210                         partionEnd.add(p2End);
211                         
212                         if(scheduled.get(p) == 1) {
213                             scheduled.add(1);
214                             stack.add(p2);
215                         }
216                         else {
217                             if(pEnd - pBegin <= p2End - pEnd) {
218                                 stack.add(p);
219                                 scheduled.add(0);
220                                 scheduled.set(p, 1);
221                             }
222                             else {
223                                 stack.add(p2);
224                                 scheduled.add(1);
225                             }
226                         }
227                     }
228                 }
229             }
230         }
231         
232         // Print partitions
233         /*System.out.println("Partition count: " + partionBegin.size());
234         for(int i=0;i<memPartion.length;++i)
235             System.out.println("    " + i + " in " + memPartion[i]);
236         */
237         
238         // Put reachable states into new automaton
239         final DFA aut = new DFA();
240         int failurePartion = memPartion[stateCount];
241         final int[] sArray = new int[partionBegin.size()];
242         sArray[failurePartion] = -1;
243         for(int i=0;i<partionBegin.size();++i)
244             if(i != failurePartion)
245                 sArray[i] = aut.newState();
246         for(int i=0;i<partionBegin.size();++i)
247             if(i != failurePartion) {
248                 final int sourceId = sArray[i];
249                 int bId = ids[partionBegin.get(i)];
250                 forEachTransition(bId, new TIntIntProcedure() {
251                     @Override
252                     public boolean execute(int a, int b) {
253                         aut.addTransition(sourceId, a, sArray[memPartion[b]]);
254                         return true;
255                     }
256                 });
257                 aut.setAccepts(sourceId, getAccepts(bId));
258             }
259         aut.setInitialState(sArray[memPartion[initialState]]);
260         return aut;
261     }
262     
263     public Regexp toRegexp(int from, byte[] to) {
264         int stateCount = size();
265         final int[] order = new int[stateCount];
266         for(int i=0;i<stateCount;++i)
267             order[i] = i;
268         order[0] = from;
269         order[from] = 0;
270         
271         final Regexp[][] a = new Regexp[stateCount][stateCount];
272         final Regexp[] b = new Regexp[stateCount];
273         for(int i=0;i<stateCount;++i) {
274             b[i] = to[order[i]]==1 ? Regexp.ONE : Regexp.ZERO;
275             for(int j=0;j<stateCount;++j)
276                 a[i][j] = Regexp.ZERO;
277             final Regexp[] row = a[i];
278             forEachTransition(order[i], new TIntIntProcedure() {
279                 @Override
280                 public boolean execute(int symbol, int targetId) {
281                     targetId = order[targetId];
282                     row[targetId] = Regexp.or(row[targetId], new RAtom(symbol));
283                     return true;
284                 }
285             });
286         }
287         
288         for(int n=stateCount-1;n>=0;--n) {
289             Regexp ss = Regexp.star(a[n][n]);
290             b[n] = Regexp.seq(ss, b[n]);
291             for(int j=0;j<stateCount;++j)
292                 a[n][j] = Regexp.seq(ss, a[n][j]);
293             for(int i=0;i<n;++i) {
294                 b[i] = Regexp.or(b[i], Regexp.seq(a[i][n], b[n]));
295                 for(int j=0;j<n;++j)
296                     a[i][j] = Regexp.or(a[i][j], Regexp.seq(a[i][n], a[n][j]));
297             }
298         }
299         
300         return b[0];
301     }
302
303     public Regexp toRegexp() {
304         return toRegexp(initialState, accepts.toArray()).simplify();
305     }
306     
307     public Regexp toRegexpTo(int position) {
308         int stateCount = size();
309         byte[] targetArray = new byte[stateCount];
310         targetArray[position] = 1;
311         return toRegexp(initialState, targetArray).simplify();
312     }
313     
314     public Regexp toRegexpFrom(int position) {
315         return toRegexp(position, accepts.toArray()).simplify();
316     }
317
318     public Regexp toPositionalRegexp(int position) {
319         DFA aut = copy();
320         int s = transitions.size();
321         aut.transitions.add(aut.transitions.get(position));
322         aut.transitions.set(position, new TIntIntHashMap());
323         aut.addTransition(position, 0x80000000, s);
324         aut.accepts.add(accepts.get(position));
325         aut.accepts.set(position, (byte)0);
326         return aut.toRegexp();
327     }
328 }