]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java
fdb01e58d37ce2f1cf37a49dee5fd82eee3531e2
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / parser / generator / table / ParseTableBuilder.java
1 package org.simantics.scl.compiler.parser.generator.table;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5
6 import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
7 import org.simantics.scl.compiler.parser.generator.grammar.Prod;
8
9 import gnu.trove.list.array.TIntArrayList;
10 import gnu.trove.list.array.TLongArrayList;
11 import gnu.trove.map.hash.TIntIntHashMap;
12 import gnu.trove.map.hash.TIntObjectHashMap;
13 import gnu.trove.map.hash.TLongIntHashMap;
14 import gnu.trove.map.hash.TObjectIntHashMap;
15 import gnu.trove.procedure.TIntIntProcedure;
16 import gnu.trove.procedure.TIntObjectProcedure;
17 import gnu.trove.procedure.TObjectIntProcedure;
18 import gnu.trove.set.hash.THashSet;
19 import gnu.trove.set.hash.TIntHashSet;
20 import gnu.trove.set.hash.TLongHashSet;
21
22 public class ParseTableBuilder {
23     public final static int MAX_STACK_ID = 10;
24     
25     private static final int STATE_MASK = 0x0fff;
26     private static final int REDUCE_MASK = 0x8000;
27     private static final int POP_MASK = 0x4000;
28     private static final int PUSH_MASK = 0x2000;
29     public static final int ERROR_ACTION = 0xffff;
30     private static final int ACCEPT_ACTION = 0xfffe;
31
32     final AnaGrammar grammar;
33     private TObjectIntHashMap<ItemSet> states = new TObjectIntHashMap<ItemSet>();
34     private ArrayList<ItemSet> itemSets = new ArrayList<ItemSet>();
35     private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
36     private ArrayList<TIntIntHashMap> stackOps = new ArrayList<TIntIntHashMap>();
37     private TIntArrayList backTransSymbols = new TIntArrayList();
38     private ArrayList<TIntArrayList> backLinks = new ArrayList<TIntArrayList>();
39     int[] initialStates;
40     TIntHashSet finalStates = new TIntHashSet(); 
41
42     private ParseTableBuilder(AnaGrammar grammar) {
43         this.grammar = grammar;
44     }
45         
46     private static boolean isNonterminal(int symbol) {
47         return symbol < 0;
48     }
49     
50     private void close(ArrayList<Item> items) {
51         THashSet<Item> itemSet = new THashSet<Item>(items);
52         for(int i=0;i<items.size();++i) {
53             Item item = items.get(i);
54             for(int nextSymbol : item.nextSymbols(grammar))
55                 if(isNonterminal(nextSymbol)) {
56                     nextSymbol = ~nextSymbol;
57                     int pEnd = grammar.nonterminalPos[nextSymbol+1];
58                     for(int pId=grammar.nonterminalPos[nextSymbol];pId<pEnd;++pId) {
59                         Item newItem = new Item(pId, grammar.prods.get(pId).rhs.getInitialState(), -1);
60                         if(itemSet.add(newItem))
61                             items.add(newItem);
62                     }
63                 }                
64         }
65     }
66     
67     private int getState(int backTransSymbol, ArrayList<Item> items) {
68         // Create state
69         close(items);
70         final ItemSet itemSet = new ItemSet(items);
71         if(states.contains(itemSet))
72             return states.get(itemSet);
73         final int newState = states.size();
74         states.put(itemSet, newState);
75         itemSets.add(itemSet);
76         backTransSymbols.add(backTransSymbol);
77         backLinks.add(new TIntArrayList(2));
78         
79         // Create transitions
80         TIntObjectHashMap<ArrayList<Item>> transitionMap = new TIntObjectHashMap<ArrayList<Item>>();
81         //close(items);
82         for(Item item : items) {
83             for(int s : item.nextSymbols(grammar)) {
84                 ArrayList<Item> l = transitionMap.get(s);
85                 if(l == null) {
86                     l = new ArrayList<Item>();
87                     transitionMap.put(s, l);
88                 }
89                 l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos));
90             }
91         }
92         
93         final TIntIntHashMap trans = new TIntIntHashMap();
94         final TIntIntHashMap stackOpMap = new TIntIntHashMap();
95         transitions.add(trans);
96         stackOps.add(stackOpMap);
97         if(transitionMap.remove(grammar.terminalNames.length-1)!=null) {
98             finalStates.add(newState);
99         }
100         transitionMap.forEachEntry(new TIntObjectProcedure<ArrayList<Item>>() {
101             @Override
102             public boolean execute(int a, ArrayList<Item> b) {
103                 boolean stackShift = false;
104                 int minStackPos = Integer.MAX_VALUE;
105                 for(Item item : b) {
106                     if(item.stackPos == -1)
107                         stackShift = true;
108                     else
109                         minStackPos = Math.min(minStackPos, item.stackPos);
110                 }
111                 int stackOp = 0;
112                 if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) {
113                     stackOp |= POP_MASK;
114                     //System.out.println("minStackPos = " + minStackPos);
115                     for(Item item : b)
116                         if(item.stackPos >= 0)
117                             --item.stackPos;
118                 }
119                 boolean stackOverflow = false;
120                 if(stackShift) {
121                     stackOp |= PUSH_MASK;
122                     for(Item item : b) {
123                         ++item.stackPos;
124                         if(item.stackPos > MAX_STACK_ID)
125                             stackOverflow = true;
126                     }
127                 }
128                 stackOpMap.put(a, stackOp);
129                 //System.out.println(newState + " " + grammar.getName(a) + " " + stackOp);
130                 
131                 if(stackOverflow) {
132                     System.err.println("Stack overflow when following " + grammar.getName(a) + " at");
133                     System.err.println(itemSet.toString(grammar));
134                 }
135                 else {
136                     int state = getState(a, b);
137                     trans.put(a, state);
138                     backLinks.get(state).add(newState);
139                 }
140                 return true;
141             }
142             
143         });
144         return newState;
145     }
146
147     TLongArrayList sMap = new TLongArrayList();
148     TLongIntHashMap sMapInv = new TLongIntHashMap();
149     TIntHashSet[] follow;
150         
151     private static int getState(long s) {
152         return (int)(s >> 32);
153     }
154     
155     private static int getSymbol(long s) {
156         return (int)s;
157     }
158     
159     private static long getS(int state, int symbol) {
160         return (((long)state) << 32) | (long)symbol;
161     }
162     
163     private void computeFollow() {
164         for(int i=0;i<itemSets.size();++i) {
165             final int source = i;
166             transitions.get(i).forEachEntry(new TIntIntProcedure() {
167                 @Override
168                 public boolean execute(int symbol, int target) {
169                     if(symbol < 0) {
170                         long s = getS(source, ~symbol);
171                         int id = sMap.size();
172                         sMap.add(s);
173                         sMapInv.put(s, id);
174                     }
175                     return true;
176                 }
177             });
178         }
179
180         // initfollow
181         follow = new TIntHashSet[sMap.size()];
182         final TIntHashSet[] gread = new TIntHashSet[follow.length];
183         final TIntHashSet[] gla = new TIntHashSet[sMap.size()];
184         for(int i=0;i<follow.length;++i) {
185             gread[i] = new TIntHashSet();
186             gla[i] = new TIntHashSet();
187         }
188         for(int i=0;i<follow.length;++i) {
189             final int id = i;
190             long s = sMap.get(i);
191             int source = getState(s);
192             int symbol = getSymbol(s);
193             final int target = transitions.get(source).get(~symbol);
194             final TIntHashSet drSet = new TIntHashSet();
195             transitions.get(target).forEachEntry(new TIntIntProcedure() {
196                 @Override
197                 public boolean execute(int symbol2, int target2) {
198                     if(symbol2 >= 0)
199                         drSet.add(symbol2);
200                     else if(grammar.nullable[~symbol2])
201                         gread[sMapInv.get(getS(target, ~symbol2))].add(id);
202                     return true;
203                 }
204             });
205             if(finalStates.contains(target))
206                 drSet.add(grammar.terminalNames.length-1);
207             follow[i] = drSet;
208             
209             ItemSet set = itemSets.get(target);
210             for(Item targetItem : set.items) {
211                 Prod prod = grammar.prods.get(targetItem.production);
212                 if(grammar.almostAccepts(prod.rhs, targetItem.position)) {
213                     for(Item sourceItem : itemSets.get(source).items) {
214                         if(sourceItem.production == targetItem.production &&
215                                 prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) {
216                             TLongHashSet visited = new TLongHashSet(); 
217                             traceBack(gla, id, visited, source, sourceItem);
218                         }
219                     }
220                     
221                 }
222             }
223         }
224         //System.out.println("follow: " + Arrays.toString(follow));
225         //System.out.println("gread: " + Arrays.toString(gread));
226         //System.out.println("gla: " + Arrays.toString(gla));
227         AnaGrammar.gclose(follow, gread);
228         AnaGrammar.gclose(follow, gla);
229         
230         /*System.out.println("Gla:");
231         for(int i=0;i<gla.length;++i) {
232             int iState = getState(sMap.get(i));
233             int iSymbol = getSymbol(sMap.get(i));
234             for(int j : gla[i].toArray()) {
235                 int jState = getState(sMap.get(j));
236                 int jSymbol = getSymbol(sMap.get(j));
237                 System.out.println("-- from --");
238                 System.out.println(itemSets.get(iState).toString(grammar));
239                 System.out.println("    symbol: " + grammar.nonterminalNames[iSymbol]);
240                 System.out.println("-- to --");
241                 System.out.println(itemSets.get(jState).toString(grammar));
242                 System.out.println("    symbol: " + grammar.nonterminalNames[jSymbol]);
243             }
244         }*/
245         
246         /*for(int i=0;i<follow.length;++i) {
247             long s = sMap.get(i);
248             int source = getState(s);
249             int symbol = getSymbol(s);
250             System.out.println("------------------------------");
251             System.out.println(itemSets.get(source).toString(grammar));
252             System.out.println("Symbol: " + grammar.nonterminalNames[symbol]);
253             System.out.print("Follow:");
254             for(int sym : follow[i].toArray())
255                 System.out.print(" " + grammar.terminalNames[sym]);
256             System.out.println();
257         }*/
258     }
259     
260     private void traceBack(TIntHashSet[] gla, int initialId, TLongHashSet visited, int state, Item item) {
261         if(visited.add( (((long)state)<<32) | (long)item.position )) {
262             Prod prod = grammar.prods.get(item.production);
263             if(item.stackPos == -1) {
264                 int id = sMapInv.get(getS(state, prod.lhs));
265                 gla[id].add(initialId);
266             }
267             
268             int backTransSymbol = backTransSymbols.get(state);
269             for(int prevState : backLinks.get(state).toArray())
270                 for(Item prevItem : itemSets.get(prevState).items)
271                     if(prevItem.production == item.production &&
272                             prod.rhs.getTransition(prevItem.position, backTransSymbol) == item.position)
273                         traceBack(gla, initialId, visited, prevState, prevItem);
274         }
275     }
276     
277     private void lookback( TLongHashSet visited, TIntHashSet la, int production, int state, int position) {
278         if(visited.add( (((long)state)<<32) | (long)position )) {
279             int backTransSymbol = backTransSymbols.get(state);
280             Prod prod = grammar.prods.get(production);
281             boolean mightBeInitial = prod.rhs.getTransition(prod.rhs.getInitialState(), backTransSymbol) == position;
282             for(int prevState : backLinks.get(state).toArray()) {
283                 for(Item item : itemSets.get(prevState).items) {
284                     if(item.production == production &&
285                             prod.rhs.getTransition(item.position, backTransSymbol) == position)
286                         lookback(visited, la, production, prevState, item.position);
287                     if(mightBeInitial && grammar.prods.get(item.production).rhs.getTransition(item.position, ~prod.lhs) >= 0) {
288                         int id = sMapInv.get(getS(prevState, prod.lhs));
289                         la.addAll(follow[id]);
290                     }
291                 }
292             }
293         }
294     }
295
296     private void createReduceActions() {
297         computeFollow();
298         for(int i=0;i<itemSets.size();++i) {
299             TIntIntHashMap trans = transitions.get(i);
300             if(finalStates.contains(i))
301                 trans.put(grammar.terminalNames.length-1, ACCEPT_ACTION);
302             
303             TIntObjectHashMap<TIntHashSet> laMap = new TIntObjectHashMap<TIntHashSet>();
304             TIntIntHashMap stackPosMap = new TIntIntHashMap(); 
305             for(Item item : itemSets.get(i).items) {
306                 Prod prod = grammar.prods.get(item.production);
307                 if(prod.rhs.getAccepts(item.position)) {
308                     TIntHashSet la = laMap.get(item.production);
309                     if(la == null) {
310                         la = new TIntHashSet();
311                         laMap.put(item.production, la);
312                     }
313                     
314                     TLongHashSet visited = new TLongHashSet();
315                     lookback(visited, la, item.production, i, item.position);
316                     
317                     if(stackPosMap.containsKey(item.production)) {
318                         stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice
319                     }
320                     else
321                         stackPosMap.put(item.production, item.stackPos);
322                 }
323             }
324             
325             // Create transitions
326             for(int production : laMap.keys()) {
327                 int stackPos = 0; //stackPosMap.get(production);
328                 TIntHashSet la = laMap.get(production);
329                 for(int symbol : la.toArray()) {
330                     if(trans.contains(symbol)) {
331                         int oldAction = trans.get(symbol);
332                         if(oldAction >= 0) {
333                             Prod prod = grammar.prods.get(production);
334                             if(prod.annotations.containsKey(symbol)) {
335                                 byte v = prod.annotations.get(symbol);
336                                 if(v == 1)
337                                     trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
338                             }
339                             else {
340                                 System.err.println("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
341                                 System.err.println(itemSets.get(i).toString(grammar));
342                             }
343                         }
344                         else {
345                             System.err.println("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
346                             System.err.println(itemSets.get(i).toString(grammar));
347                         }
348                     }
349                     else
350                         trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
351                 }
352             }
353             
354             // Check stacking conflicts
355             /*trans.forEachEntry(new TIntIntProcedure() {
356                 @Override
357                 public boolean execute(int a, int b) {
358                     if(b >= 0) {
359                         boolean kernelState = false;
360                         boolean nonkernelState = false;
361                         for(Item item : itemSets.get(b).items) {
362                             Prod prod = grammar.prods.get(item.production);
363                             if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a))
364                                 nonkernelState = true;
365                             else if(item.position != prod.rhs.getInitialState())
366                                 kernelState = true;
367                         }
368                         
369                         
370                         if(kernelState && nonkernelState) {
371                             System.err.println("Stacking conflict when following " + grammar.getName(a) + " to");
372                             System.err.println(itemSets.get(b).toString(grammar));
373                         }
374                     }
375                     return true;
376                 }
377             });*/
378         }
379     }
380     
381     public static ParseTable build(AnaGrammar grammar) {
382         ParseTableBuilder builder = new ParseTableBuilder(grammar);
383         
384         builder.initialStates = new int[grammar.initialNonterminals.length];
385         for(int i=0;i<grammar.initialNonterminals.length;++i) {
386             ArrayList<Item> seed = new ArrayList<Item>();
387             int prodId = grammar.prods.size()-i-1;
388             seed.add(new Item(prodId, 
389                     grammar.prods.get(prodId).rhs.getInitialState(), 0));
390             builder.initialStates[i] = builder.getState(REDUCE_MASK, seed);
391         }
392         
393         builder.createReduceActions();
394         
395         System.out.println("States: " + builder.itemSets.size());
396         
397         //builder.visualize();
398         
399         //builder.printParseTable();
400         return builder.getParseTable();
401     }
402
403     private ParseTable getParseTable() {
404         int[] productionInfo = new int[grammar.prods.size()];
405         for(int i=0;i<productionInfo.length;++i) {
406             Prod prod = grammar.prods.get(i);
407             productionInfo[i] = prod.lhs;
408         }
409         
410         int[][] actionTable = new int[transitions.size()][];
411         int[][] gotoTable = new int[transitions.size()][];
412         for(int i=0;i<transitions.size();++i) {
413             final int[] actions = new int[grammar.terminalNames.length]; 
414             Arrays.fill(actions, ERROR_ACTION);
415             actionTable[i] = actions;
416             final int[] gotos = new int[grammar.nonterminalNames.length];
417             Arrays.fill(gotos, ERROR_ACTION);
418             gotoTable[i] = gotos;
419             final TIntIntHashMap stackOpMap = stackOps.get(i); 
420             transitions.get(i).forEachEntry(new TIntIntProcedure() {
421                 @Override
422                 public boolean execute(int a, int b) {
423                     int action = b | stackOpMap.get(a);
424                     if(a >= 0)
425                         actions[a] = action;
426                     else
427                         gotos[~a] = action;
428                     return true;
429                 }
430             });
431         }
432         
433         String[] stateDescriptions = new String[itemSets.size()];
434         for(int i=0;i<stateDescriptions.length;++i) {
435             final StringBuilder b = new StringBuilder();
436             b.append(itemSets.get(i).toString(grammar));
437             transitions.get(i).forEachEntry(new TIntIntProcedure() {
438                 @Override
439                 public boolean execute(int symbol, int action) {
440                     if(symbol >= 0) {
441                         b.append("\n    ").append(grammar.terminalNames[symbol]).append(" ->");
442                         if((action & REDUCE_MASK) == 0) {
443                             if((action & POP_MASK) != 0)
444                                 b.append(" POP");
445                             if((action & PUSH_MASK) != 0)
446                                 b.append(" PUSH");
447                             b.append(" SHIFT(").append(action&STATE_MASK).append(")");
448                         }
449                         else {
450                             if(action == 0xfffffffe)
451                                 b.append(" ACCEPT");
452                             else
453                                 b.append(" REDUCE(").append(action&STATE_MASK).append(")");
454                         }
455                     }
456                     else {
457                         b.append("\n    ").append(grammar.nonterminalNames[~symbol]).append(" ->")
458                          .append(" GOTO(").append(action).append(")");
459                     }
460                     return true;
461                 }
462             });
463             stateDescriptions[i] = b.toString();
464         }
465         
466         //printParseTable();
467         return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo,
468                 initialStates, stateDescriptions);
469     }
470
471     private void printParseTable() {
472         final ItemSet[] stateSets = new ItemSet[states.size()];
473         states.forEachEntry(new TObjectIntProcedure<ItemSet>() {
474             @Override
475             public boolean execute(ItemSet a, int b) {
476                 stateSets[b] = a;
477                 return true;
478             }
479         });
480         for(int i=0;i<stateSets.length;++i) {
481             System.out.println("--- State " + i + " ---");
482             System.out.println(stateSets[i].toString(grammar));
483             final TIntIntHashMap stackOp = stackOps.get(i);
484             transitions.get(i).forEachEntry(new TIntIntProcedure() {
485                 @Override
486                 public boolean execute(int a, int b) {
487                     int sOp = stackOp.get(a);
488                     System.out.print(grammar.getName(a) + " -> ");
489                     if(sOp != 0) {
490                         System.out.print("[");
491                         if((sOp & PUSH_MASK) != 0) {
492                             sOp ^= PUSH_MASK;
493                             System.out.print("PUSH ");
494                         }
495                         if((sOp & POP_MASK) != 0) {
496                             sOp ^= POP_MASK;
497                             System.out.print("POP ");
498                         }
499                         if(sOp != 0)
500                             System.out.print(sOp);
501                         System.out.print("] ");
502                     }
503                     if((b & REDUCE_MASK) != 0) {
504                         b ^= REDUCE_MASK;
505                         System.out.println("reduce " + b); // grammar.prods.get(~b).toString(grammar));
506                     }
507                     else {
508                         System.out.println("shift " + b);
509                     }
510                     return true;
511                 }
512             });
513         }
514     }
515 }