]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/generator/table/ParseTableBuilder.java
Fixed multiple issues causing dangling references to discarded queries
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / parser / generator / table / ParseTableBuilder.java
1 package org.simantics.scl.compiler.parser.generator.table;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5
6 import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
7 import org.simantics.scl.compiler.parser.generator.grammar.Prod;
8 import org.slf4j.Logger;
9 import org.slf4j.LoggerFactory;
10
11 import gnu.trove.list.array.TIntArrayList;
12 import gnu.trove.list.array.TLongArrayList;
13 import gnu.trove.map.hash.TIntIntHashMap;
14 import gnu.trove.map.hash.TIntObjectHashMap;
15 import gnu.trove.map.hash.TLongIntHashMap;
16 import gnu.trove.map.hash.TObjectIntHashMap;
17 import gnu.trove.procedure.TIntIntProcedure;
18 import gnu.trove.procedure.TIntObjectProcedure;
19 import gnu.trove.procedure.TObjectIntProcedure;
20 import gnu.trove.set.hash.THashSet;
21 import gnu.trove.set.hash.TIntHashSet;
22 import gnu.trove.set.hash.TLongHashSet;
23
24 public class ParseTableBuilder {
25     private static final Logger LOGGER = LoggerFactory.getLogger(ParseTableBuilder.class);
26     public final static int MAX_STACK_ID = 10;
27     
28     private static final int STATE_MASK = 0x0fff;
29     private static final int REDUCE_MASK = 0x8000;
30     private static final int POP_MASK = 0x4000;
31     private static final int PUSH_MASK = 0x2000;
32     public static final int ERROR_ACTION = 0xffff;
33     private static final int ACCEPT_ACTION = 0xfffe;
34
35     final AnaGrammar grammar;
36     private TObjectIntHashMap<ItemSet> states = new TObjectIntHashMap<ItemSet>();
37     private ArrayList<ItemSet> itemSets = new ArrayList<ItemSet>();
38     private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
39     private ArrayList<TIntIntHashMap> stackOps = new ArrayList<TIntIntHashMap>();
40     private TIntArrayList backTransSymbols = new TIntArrayList();
41     private ArrayList<TIntArrayList> backLinks = new ArrayList<TIntArrayList>();
42     int[] initialStates;
43     TIntHashSet finalStates = new TIntHashSet(); 
44
45     private ParseTableBuilder(AnaGrammar grammar) {
46         this.grammar = grammar;
47     }
48         
49     private static boolean isNonterminal(int symbol) {
50         return symbol < 0;
51     }
52     
53     private void close(ArrayList<Item> items) {
54         THashSet<Item> itemSet = new THashSet<Item>(items);
55         for(int i=0;i<items.size();++i) {
56             Item item = items.get(i);
57             for(int nextSymbol : item.nextSymbols(grammar))
58                 if(isNonterminal(nextSymbol)) {
59                     nextSymbol = ~nextSymbol;
60                     int pEnd = grammar.nonterminalPos[nextSymbol+1];
61                     for(int pId=grammar.nonterminalPos[nextSymbol];pId<pEnd;++pId) {
62                         Item newItem = new Item(pId, grammar.prods.get(pId).rhs.getInitialState(), -1);
63                         if(itemSet.add(newItem))
64                             items.add(newItem);
65                     }
66                 }                
67         }
68     }
69     
70     private int getState(int backTransSymbol, ArrayList<Item> items) {
71         // Create state
72         close(items);
73         final ItemSet itemSet = new ItemSet(items);
74         if(states.contains(itemSet))
75             return states.get(itemSet);
76         final int newState = states.size();
77         states.put(itemSet, newState);
78         itemSets.add(itemSet);
79         backTransSymbols.add(backTransSymbol);
80         backLinks.add(new TIntArrayList(2));
81         
82         // Create transitions
83         TIntObjectHashMap<ArrayList<Item>> transitionMap = new TIntObjectHashMap<ArrayList<Item>>();
84         //close(items);
85         for(Item item : items) {
86             for(int s : item.nextSymbols(grammar)) {
87                 ArrayList<Item> l = transitionMap.get(s);
88                 if(l == null) {
89                     l = new ArrayList<Item>();
90                     transitionMap.put(s, l);
91                 }
92                 l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos));
93             }
94         }
95         
96         final TIntIntHashMap trans = new TIntIntHashMap();
97         final TIntIntHashMap stackOpMap = new TIntIntHashMap();
98         transitions.add(trans);
99         stackOps.add(stackOpMap);
100         if(transitionMap.remove(grammar.terminalNames.length-1)!=null) {
101             finalStates.add(newState);
102         }
103         transitionMap.forEachEntry(new TIntObjectProcedure<ArrayList<Item>>() {
104             @Override
105             public boolean execute(int a, ArrayList<Item> b) {
106                 boolean stackShift = false;
107                 int minStackPos = Integer.MAX_VALUE;
108                 for(Item item : b) {
109                     if(item.stackPos == -1)
110                         stackShift = true;
111                     else
112                         minStackPos = Math.min(minStackPos, item.stackPos);
113                 }
114                 int stackOp = 0;
115                 if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) {
116                     stackOp |= POP_MASK;
117                     //System.out.println("minStackPos = " + minStackPos);
118                     for(Item item : b)
119                         if(item.stackPos >= 0)
120                             --item.stackPos;
121                 }
122                 boolean stackOverflow = false;
123                 if(stackShift) {
124                     stackOp |= PUSH_MASK;
125                     for(Item item : b) {
126                         ++item.stackPos;
127                         if(item.stackPos > MAX_STACK_ID)
128                             stackOverflow = true;
129                     }
130                 }
131                 stackOpMap.put(a, stackOp);
132                 //System.out.println(newState + " " + grammar.getName(a) + " " + stackOp);
133                 
134                 if(stackOverflow) {
135                     LOGGER.error("Stack overflow when following " + grammar.getName(a) + " at");
136                     LOGGER.error(itemSet.toString(grammar));
137                 }
138                 else {
139                     int state = getState(a, b);
140                     trans.put(a, state);
141                     backLinks.get(state).add(newState);
142                 }
143                 return true;
144             }
145             
146         });
147         return newState;
148     }
149
150     TLongArrayList sMap = new TLongArrayList();
151     TLongIntHashMap sMapInv = new TLongIntHashMap();
152     TIntHashSet[] follow;
153         
154     private static int getState(long s) {
155         return (int)(s >> 32);
156     }
157     
158     private static int getSymbol(long s) {
159         return (int)s;
160     }
161     
162     private static long getS(int state, int symbol) {
163         return (((long)state) << 32) | (long)symbol;
164     }
165     
166     private void computeFollow() {
167         for(int i=0;i<itemSets.size();++i) {
168             final int source = i;
169             transitions.get(i).forEachEntry(new TIntIntProcedure() {
170                 @Override
171                 public boolean execute(int symbol, int target) {
172                     if(symbol < 0) {
173                         long s = getS(source, ~symbol);
174                         int id = sMap.size();
175                         sMap.add(s);
176                         sMapInv.put(s, id);
177                     }
178                     return true;
179                 }
180             });
181         }
182
183         // initfollow
184         follow = new TIntHashSet[sMap.size()];
185         final TIntHashSet[] gread = new TIntHashSet[follow.length];
186         final TIntHashSet[] gla = new TIntHashSet[sMap.size()];
187         for(int i=0;i<follow.length;++i) {
188             gread[i] = new TIntHashSet();
189             gla[i] = new TIntHashSet();
190         }
191         for(int i=0;i<follow.length;++i) {
192             final int id = i;
193             long s = sMap.get(i);
194             int source = getState(s);
195             int symbol = getSymbol(s);
196             final int target = transitions.get(source).get(~symbol);
197             final TIntHashSet drSet = new TIntHashSet();
198             transitions.get(target).forEachEntry(new TIntIntProcedure() {
199                 @Override
200                 public boolean execute(int symbol2, int target2) {
201                     if(symbol2 >= 0)
202                         drSet.add(symbol2);
203                     else if(grammar.nullable[~symbol2])
204                         gread[sMapInv.get(getS(target, ~symbol2))].add(id);
205                     return true;
206                 }
207             });
208             if(finalStates.contains(target))
209                 drSet.add(grammar.terminalNames.length-1);
210             follow[i] = drSet;
211             
212             ItemSet set = itemSets.get(target);
213             for(Item targetItem : set.items) {
214                 Prod prod = grammar.prods.get(targetItem.production);
215                 if(grammar.almostAccepts(prod.rhs, targetItem.position)) {
216                     for(Item sourceItem : itemSets.get(source).items) {
217                         if(sourceItem.production == targetItem.production &&
218                                 prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) {
219                             TLongHashSet visited = new TLongHashSet(); 
220                             traceBack(gla, id, visited, source, sourceItem);
221                         }
222                     }
223                     
224                 }
225             }
226         }
227         //System.out.println("follow: " + Arrays.toString(follow));
228         //System.out.println("gread: " + Arrays.toString(gread));
229         //System.out.println("gla: " + Arrays.toString(gla));
230         AnaGrammar.gclose(follow, gread);
231         AnaGrammar.gclose(follow, gla);
232         
233         /*System.out.println("Gla:");
234         for(int i=0;i<gla.length;++i) {
235             int iState = getState(sMap.get(i));
236             int iSymbol = getSymbol(sMap.get(i));
237             for(int j : gla[i].toArray()) {
238                 int jState = getState(sMap.get(j));
239                 int jSymbol = getSymbol(sMap.get(j));
240                 System.out.println("-- from --");
241                 System.out.println(itemSets.get(iState).toString(grammar));
242                 System.out.println("    symbol: " + grammar.nonterminalNames[iSymbol]);
243                 System.out.println("-- to --");
244                 System.out.println(itemSets.get(jState).toString(grammar));
245                 System.out.println("    symbol: " + grammar.nonterminalNames[jSymbol]);
246             }
247         }*/
248         
249         /*for(int i=0;i<follow.length;++i) {
250             long s = sMap.get(i);
251             int source = getState(s);
252             int symbol = getSymbol(s);
253             System.out.println("------------------------------");
254             System.out.println(itemSets.get(source).toString(grammar));
255             System.out.println("Symbol: " + grammar.nonterminalNames[symbol]);
256             System.out.print("Follow:");
257             for(int sym : follow[i].toArray())
258                 System.out.print(" " + grammar.terminalNames[sym]);
259             System.out.println();
260         }*/
261     }
262     
263     private void traceBack(TIntHashSet[] gla, int initialId, TLongHashSet visited, int state, Item item) {
264         if(visited.add( (((long)state)<<32) | (long)item.position )) {
265             Prod prod = grammar.prods.get(item.production);
266             if(item.stackPos == -1) {
267                 int id = sMapInv.get(getS(state, prod.lhs));
268                 gla[id].add(initialId);
269             }
270             
271             int backTransSymbol = backTransSymbols.get(state);
272             for(int prevState : backLinks.get(state).toArray())
273                 for(Item prevItem : itemSets.get(prevState).items)
274                     if(prevItem.production == item.production &&
275                             prod.rhs.getTransition(prevItem.position, backTransSymbol) == item.position)
276                         traceBack(gla, initialId, visited, prevState, prevItem);
277         }
278     }
279     
280     private void lookback( TLongHashSet visited, TIntHashSet la, int production, int state, int position) {
281         if(visited.add( (((long)state)<<32) | (long)position )) {
282             int backTransSymbol = backTransSymbols.get(state);
283             Prod prod = grammar.prods.get(production);
284             boolean mightBeInitial = prod.rhs.getTransition(prod.rhs.getInitialState(), backTransSymbol) == position;
285             for(int prevState : backLinks.get(state).toArray()) {
286                 for(Item item : itemSets.get(prevState).items) {
287                     if(item.production == production &&
288                             prod.rhs.getTransition(item.position, backTransSymbol) == position)
289                         lookback(visited, la, production, prevState, item.position);
290                     if(mightBeInitial && grammar.prods.get(item.production).rhs.getTransition(item.position, ~prod.lhs) >= 0) {
291                         int id = sMapInv.get(getS(prevState, prod.lhs));
292                         la.addAll(follow[id]);
293                     }
294                 }
295             }
296         }
297     }
298
299     private void createReduceActions() {
300         computeFollow();
301         for(int i=0;i<itemSets.size();++i) {
302             TIntIntHashMap trans = transitions.get(i);
303             if(finalStates.contains(i))
304                 trans.put(grammar.terminalNames.length-1, ACCEPT_ACTION);
305             
306             TIntObjectHashMap<TIntHashSet> laMap = new TIntObjectHashMap<TIntHashSet>();
307             TIntIntHashMap stackPosMap = new TIntIntHashMap(); 
308             for(Item item : itemSets.get(i).items) {
309                 Prod prod = grammar.prods.get(item.production);
310                 if(prod.rhs.getAccepts(item.position)) {
311                     TIntHashSet la = laMap.get(item.production);
312                     if(la == null) {
313                         la = new TIntHashSet();
314                         laMap.put(item.production, la);
315                     }
316                     
317                     TLongHashSet visited = new TLongHashSet();
318                     lookback(visited, la, item.production, i, item.position);
319                     
320                     if(stackPosMap.containsKey(item.production)) {
321                         stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice
322                     }
323                     else
324                         stackPosMap.put(item.production, item.stackPos);
325                 }
326             }
327             
328             // Create transitions
329             for(int production : laMap.keys()) {
330                 int stackPos = 0; //stackPosMap.get(production);
331                 TIntHashSet la = laMap.get(production);
332                 for(int symbol : la.toArray()) {
333                     if(trans.contains(symbol)) {
334                         int oldAction = trans.get(symbol);
335                         if(oldAction >= 0) {
336                             Prod prod = grammar.prods.get(production);
337                             if(prod.annotations.containsKey(symbol)) {
338                                 byte v = prod.annotations.get(symbol);
339                                 if(v == 1)
340                                     trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
341                             }
342                             else {
343                                 LOGGER.error("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
344                                 LOGGER.error(itemSets.get(i).toString(grammar));
345                             }
346                         }
347                         else {
348                             LOGGER.error("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
349                             LOGGER.error(itemSets.get(i).toString(grammar));
350                         }
351                     }
352                     else
353                         trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
354                 }
355             }
356             
357             // Check stacking conflicts
358             /*trans.forEachEntry(new TIntIntProcedure() {
359                 @Override
360                 public boolean execute(int a, int b) {
361                     if(b >= 0) {
362                         boolean kernelState = false;
363                         boolean nonkernelState = false;
364                         for(Item item : itemSets.get(b).items) {
365                             Prod prod = grammar.prods.get(item.production);
366                             if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a))
367                                 nonkernelState = true;
368                             else if(item.position != prod.rhs.getInitialState())
369                                 kernelState = true;
370                         }
371                         
372                         
373                         if(kernelState && nonkernelState) {
374                             System.err.println("Stacking conflict when following " + grammar.getName(a) + " to");
375                             System.err.println(itemSets.get(b).toString(grammar));
376                         }
377                     }
378                     return true;
379                 }
380             });*/
381         }
382     }
383     
384     public static ParseTable build(AnaGrammar grammar) {
385         ParseTableBuilder builder = new ParseTableBuilder(grammar);
386         
387         builder.initialStates = new int[grammar.initialNonterminals.length];
388         for(int i=0;i<grammar.initialNonterminals.length;++i) {
389             ArrayList<Item> seed = new ArrayList<Item>();
390             int prodId = grammar.prods.size()-i-1;
391             seed.add(new Item(prodId, 
392                     grammar.prods.get(prodId).rhs.getInitialState(), 0));
393             builder.initialStates[i] = builder.getState(REDUCE_MASK, seed);
394         }
395         
396         builder.createReduceActions();
397         
398         LOGGER.info("States: " + builder.itemSets.size());
399         
400         //builder.visualize();
401         
402         //builder.printParseTable();
403         return builder.getParseTable();
404     }
405
406     private ParseTable getParseTable() {
407         int[] productionInfo = new int[grammar.prods.size()];
408         for(int i=0;i<productionInfo.length;++i) {
409             Prod prod = grammar.prods.get(i);
410             productionInfo[i] = prod.lhs;
411         }
412         
413         int[][] actionTable = new int[transitions.size()][];
414         int[][] gotoTable = new int[transitions.size()][];
415         for(int i=0;i<transitions.size();++i) {
416             final int[] actions = new int[grammar.terminalNames.length]; 
417             Arrays.fill(actions, ERROR_ACTION);
418             actionTable[i] = actions;
419             final int[] gotos = new int[grammar.nonterminalNames.length];
420             Arrays.fill(gotos, ERROR_ACTION);
421             gotoTable[i] = gotos;
422             final TIntIntHashMap stackOpMap = stackOps.get(i); 
423             transitions.get(i).forEachEntry(new TIntIntProcedure() {
424                 @Override
425                 public boolean execute(int a, int b) {
426                     int action = b | stackOpMap.get(a);
427                     if(a >= 0)
428                         actions[a] = action;
429                     else
430                         gotos[~a] = action;
431                     return true;
432                 }
433             });
434         }
435         
436         String[] stateDescriptions = new String[itemSets.size()];
437         for(int i=0;i<stateDescriptions.length;++i) {
438             final StringBuilder b = new StringBuilder();
439             b.append(itemSets.get(i).toString(grammar));
440             transitions.get(i).forEachEntry(new TIntIntProcedure() {
441                 @Override
442                 public boolean execute(int symbol, int action) {
443                     if(symbol >= 0) {
444                         b.append("\n    ").append(grammar.terminalNames[symbol]).append(" ->");
445                         if((action & REDUCE_MASK) == 0) {
446                             if((action & POP_MASK) != 0)
447                                 b.append(" POP");
448                             if((action & PUSH_MASK) != 0)
449                                 b.append(" PUSH");
450                             b.append(" SHIFT(").append(action&STATE_MASK).append(")");
451                         }
452                         else {
453                             if(action == 0xfffffffe)
454                                 b.append(" ACCEPT");
455                             else
456                                 b.append(" REDUCE(").append(action&STATE_MASK).append(")");
457                         }
458                     }
459                     else {
460                         b.append("\n    ").append(grammar.nonterminalNames[~symbol]).append(" ->")
461                          .append(" GOTO(").append(action).append(")");
462                     }
463                     return true;
464                 }
465             });
466             stateDescriptions[i] = b.toString();
467         }
468         
469         //printParseTable();
470         return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo,
471                 initialStates, stateDescriptions);
472     }
473
474     private void printParseTable() {
475         final ItemSet[] stateSets = new ItemSet[states.size()];
476         states.forEachEntry(new TObjectIntProcedure<ItemSet>() {
477             @Override
478             public boolean execute(ItemSet a, int b) {
479                 stateSets[b] = a;
480                 return true;
481             }
482         });
483         for(int i=0;i<stateSets.length;++i) {
484             LOGGER.info("--- State " + i + " ---");
485             LOGGER.info(stateSets[i].toString(grammar));
486             final TIntIntHashMap stackOp = stackOps.get(i);
487             transitions.get(i).forEachEntry(new TIntIntProcedure() {
488                 @Override
489                 public boolean execute(int a, int b) {
490                     int sOp = stackOp.get(a);
491                     LOGGER.info(grammar.getName(a) + " -> ");
492                     if(sOp != 0) {
493                         LOGGER.info("[");
494                         if((sOp & PUSH_MASK) != 0) {
495                             sOp ^= PUSH_MASK;
496                             LOGGER.info("PUSH ");
497                         }
498                         if((sOp & POP_MASK) != 0) {
499                             sOp ^= POP_MASK;
500                             LOGGER.info("POP ");
501                         }
502                         if(sOp != 0)
503                             LOGGER.info(Integer.toString(sOp));
504                         LOGGER.info("] ");
505                     }
506                     if((b & REDUCE_MASK) != 0) {
507                         b ^= REDUCE_MASK;
508                         LOGGER.info("reduce " + b); // grammar.prods.get(~b).toString(grammar));
509                     }
510                     else {
511                         LOGGER.info("shift " + b);
512                     }
513                     return true;
514                 }
515             });
516         }
517     }
518 }