1 package org.simantics.scl.compiler.parser.regexp.automata;
3 import gnu.trove.impl.Constants;
4 import gnu.trove.list.array.TByteArrayList;
5 import gnu.trove.list.array.TIntArrayList;
6 import gnu.trove.map.hash.TIntIntHashMap;
7 import gnu.trove.procedure.TIntIntProcedure;
8 import gnu.trove.procedure.TIntProcedure;
9 import gnu.trove.set.hash.TIntHashSet;
11 import java.util.ArrayList;
13 import org.simantics.scl.compiler.parser.regexp.RAtom;
14 import org.simantics.scl.compiler.parser.regexp.Regexp;
16 public class DFA implements Automata {
18 private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
19 private TByteArrayList accepts = new TByteArrayList();
20 private int initialState;
22 public int newState() {
23 int stateId = transitions.size();
24 transitions.add(new TIntIntHashMap(
25 Constants.DEFAULT_CAPACITY,
26 Constants.DEFAULT_LOAD_FACTOR,
33 return transitions.size();
38 for(TIntIntHashMap t : transitions)
39 copy.transitions.add(new TIntIntHashMap(t));
40 copy.accepts = new TByteArrayList(accepts);
41 copy.initialState = initialState;
45 public void addTransition(int sourceId, int symbol, int targetId) {
46 transitions.get(sourceId).put(symbol, targetId);
49 public int getTransition(int sourceId, int symbol) {
50 return transitions.get(sourceId).get(symbol);
53 public void forEachTransition(int source, final TIntIntProcedure proc) {
54 transitions.get(source).forEachEntry(proc);
57 public int[] nextStates(int id) {
58 return transitions.get(id).keys();
61 public void setAccepts(int id, boolean accepts) {
62 this.accepts.set(id, accepts ? (byte)1 : (byte)0);
65 public boolean getAccepts(int id) {
66 return accepts.get(id)==1;
69 public void setInitialState(int initialState) {
70 this.initialState = initialState;
73 public int getInitialState() {
77 public DFA minimize() {
78 // Compute relevant input characters for minimization
79 final TIntIntHashMap symbolMap = new TIntIntHashMap();
80 final int[] symbolArray;
82 final TIntArrayList l = new TIntArrayList();
83 TIntProcedure proc = new TIntProcedure() {
85 public boolean execute(int value) {
86 if(!symbolMap.containsKey(value)) {
87 symbolMap.put(value, l.size());
93 for(TIntIntHashMap tMap : transitions)
94 tMap.forEachKey(proc);
95 symbolArray = l.toArray();
97 int symbolCount = symbolMap.size();
98 int stateCount = transitions.size();
101 final TIntArrayList[][] inverse = new TIntArrayList[stateCount+1][];
102 for(int i=0;i<inverse.length;++i)
103 inverse[i] = new TIntArrayList[symbolCount];
104 for(int sourceId=0;sourceId<stateCount;++sourceId) {
105 TIntIntHashMap tMap = transitions.get(sourceId);
106 for(int j=0;j<symbolCount;++j) {
107 int targetId = tMap.get(symbolArray[j]);
109 targetId = stateCount;
110 TIntArrayList l = inverse[targetId][j];
112 l = new TIntArrayList();
113 inverse[targetId][j] = l;
119 /*for(int i=0;i<inverse.length;++i)
120 for(int j=0;j<symbolCount;++j)
121 System.out.println(i + " " + j + " -> " + inverse[i][j]);
125 int[] ids = new int[stateCount+1];
126 final int[] memPartion = new int[stateCount+1];
127 TIntArrayList partionBegin = new TIntArrayList();
128 TIntArrayList partionEnd = new TIntArrayList();
129 TIntArrayList stack = new TIntArrayList();
130 TIntArrayList scheduled = new TIntArrayList();
135 int max = stateCount;
136 ids[min++] = stateCount;
137 memPartion[stateCount] = 0;
138 for(int i=0;i<stateCount;++i) {
139 if(accepts.get(i)==1) {
149 partionBegin.add(min);
151 partionEnd.add(ids.length);
154 if(min < ids.length/2) {
165 while(!stack.isEmpty()) {
166 int partionId = stack.removeAt(stack.size()-1);
167 scheduled.set(partionId, 0);
168 int begin = partionBegin.get(partionId);
169 int end = partionEnd.get(partionId);
171 for(int j=0;j<symbolCount;++j) {
172 TIntHashSet invStates = new TIntHashSet();
173 for(int i=begin;i<end;++i) {
175 TIntArrayList inv = inverse[s][j];
177 invStates.addAll(inv);
179 int[] invStatesArray = invStates.toArray();
181 TIntHashSet partions = new TIntHashSet();
182 for(int s : invStatesArray)
183 partions.add(memPartion[s]);
184 int[] partionsArray = partions.toArray();
186 for(int p : partionsArray) {
187 int pBegin = partionBegin.get(p);
188 int pEnd = partionEnd.get(p);
189 boolean splits = false;
190 for(int k=pBegin;k<pEnd;++k)
191 if(!invStates.contains(ids[k])) {
196 int p2 = partionBegin.size();
198 for(int k=pBegin;k<pEnd;++k) {
200 if(invStates.contains(s)) {
208 partionEnd.set(p, pEnd);
209 partionBegin.add(pEnd);
210 partionEnd.add(p2End);
212 if(scheduled.get(p) == 1) {
217 if(pEnd - pBegin <= p2End - pEnd) {
233 /*System.out.println("Partition count: " + partionBegin.size());
234 for(int i=0;i<memPartion.length;++i)
235 System.out.println(" " + i + " in " + memPartion[i]);
238 // Put reachable states into new automaton
239 final DFA aut = new DFA();
240 int failurePartion = memPartion[stateCount];
241 final int[] sArray = new int[partionBegin.size()];
242 sArray[failurePartion] = -1;
243 for(int i=0;i<partionBegin.size();++i)
244 if(i != failurePartion)
245 sArray[i] = aut.newState();
246 for(int i=0;i<partionBegin.size();++i)
247 if(i != failurePartion) {
248 final int sourceId = sArray[i];
249 int bId = ids[partionBegin.get(i)];
250 forEachTransition(bId, new TIntIntProcedure() {
252 public boolean execute(int a, int b) {
253 aut.addTransition(sourceId, a, sArray[memPartion[b]]);
257 aut.setAccepts(sourceId, getAccepts(bId));
259 aut.setInitialState(sArray[memPartion[initialState]]);
263 public Regexp toRegexp(int from, byte[] to) {
264 int stateCount = size();
265 final int[] order = new int[stateCount];
266 for(int i=0;i<stateCount;++i)
271 final Regexp[][] a = new Regexp[stateCount][stateCount];
272 final Regexp[] b = new Regexp[stateCount];
273 for(int i=0;i<stateCount;++i) {
274 b[i] = to[order[i]]==1 ? Regexp.ONE : Regexp.ZERO;
275 for(int j=0;j<stateCount;++j)
276 a[i][j] = Regexp.ZERO;
277 final Regexp[] row = a[i];
278 forEachTransition(order[i], new TIntIntProcedure() {
280 public boolean execute(int symbol, int targetId) {
281 targetId = order[targetId];
282 row[targetId] = Regexp.or(row[targetId], new RAtom(symbol));
288 for(int n=stateCount-1;n>=0;--n) {
289 Regexp ss = Regexp.star(a[n][n]);
290 b[n] = Regexp.seq(ss, b[n]);
291 for(int j=0;j<stateCount;++j)
292 a[n][j] = Regexp.seq(ss, a[n][j]);
293 for(int i=0;i<n;++i) {
294 b[i] = Regexp.or(b[i], Regexp.seq(a[i][n], b[n]));
296 a[i][j] = Regexp.or(a[i][j], Regexp.seq(a[i][n], a[n][j]));
303 public Regexp toRegexp() {
304 return toRegexp(initialState, accepts.toArray()).simplify();
307 public Regexp toRegexpTo(int position) {
308 int stateCount = size();
309 byte[] targetArray = new byte[stateCount];
310 targetArray[position] = 1;
311 return toRegexp(initialState, targetArray).simplify();
314 public Regexp toRegexpFrom(int position) {
315 return toRegexp(position, accepts.toArray()).simplify();
318 public Regexp toPositionalRegexp(int position) {
320 int s = transitions.size();
321 aut.transitions.add(aut.transitions.get(position));
322 aut.transitions.set(position, new TIntIntHashMap());
323 aut.addTransition(position, 0x80000000, s);
324 aut.accepts.add(accepts.get(position));
325 aut.accepts.set(position, (byte)0);
326 return aut.toRegexp();