1 package org.simantics.scl.compiler.parser.generator.table;
3 import java.util.ArrayList;
4 import java.util.Arrays;
6 import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
7 import org.simantics.scl.compiler.parser.generator.grammar.Prod;
9 import gnu.trove.list.array.TIntArrayList;
10 import gnu.trove.list.array.TLongArrayList;
11 import gnu.trove.map.hash.TIntIntHashMap;
12 import gnu.trove.map.hash.TIntObjectHashMap;
13 import gnu.trove.map.hash.TLongIntHashMap;
14 import gnu.trove.map.hash.TObjectIntHashMap;
15 import gnu.trove.procedure.TIntIntProcedure;
16 import gnu.trove.procedure.TIntObjectProcedure;
17 import gnu.trove.procedure.TObjectIntProcedure;
18 import gnu.trove.set.hash.THashSet;
19 import gnu.trove.set.hash.TIntHashSet;
20 import gnu.trove.set.hash.TLongHashSet;
22 public class ParseTableBuilder {
23 public final static int MAX_STACK_ID = 10;
25 private static final int STATE_MASK = 0x0fff;
26 private static final int REDUCE_MASK = 0x8000;
27 private static final int POP_MASK = 0x4000;
28 private static final int PUSH_MASK = 0x2000;
29 public static final int ERROR_ACTION = 0xffff;
30 private static final int ACCEPT_ACTION = 0xfffe;
32 final AnaGrammar grammar;
33 private TObjectIntHashMap<ItemSet> states = new TObjectIntHashMap<ItemSet>();
34 private ArrayList<ItemSet> itemSets = new ArrayList<ItemSet>();
35 private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
36 private ArrayList<TIntIntHashMap> stackOps = new ArrayList<TIntIntHashMap>();
37 private TIntArrayList backTransSymbols = new TIntArrayList();
38 private ArrayList<TIntArrayList> backLinks = new ArrayList<TIntArrayList>();
40 TIntHashSet finalStates = new TIntHashSet();
42 private ParseTableBuilder(AnaGrammar grammar) {
43 this.grammar = grammar;
46 private static boolean isNonterminal(int symbol) {
50 private void close(ArrayList<Item> items) {
51 THashSet<Item> itemSet = new THashSet<Item>(items);
52 for(int i=0;i<items.size();++i) {
53 Item item = items.get(i);
54 for(int nextSymbol : item.nextSymbols(grammar))
55 if(isNonterminal(nextSymbol)) {
56 nextSymbol = ~nextSymbol;
57 int pEnd = grammar.nonterminalPos[nextSymbol+1];
58 for(int pId=grammar.nonterminalPos[nextSymbol];pId<pEnd;++pId) {
59 Item newItem = new Item(pId, grammar.prods.get(pId).rhs.getInitialState(), -1);
60 if(itemSet.add(newItem))
67 private int getState(int backTransSymbol, ArrayList<Item> items) {
70 final ItemSet itemSet = new ItemSet(items);
71 if(states.contains(itemSet))
72 return states.get(itemSet);
73 final int newState = states.size();
74 states.put(itemSet, newState);
75 itemSets.add(itemSet);
76 backTransSymbols.add(backTransSymbol);
77 backLinks.add(new TIntArrayList(2));
80 TIntObjectHashMap<ArrayList<Item>> transitionMap = new TIntObjectHashMap<ArrayList<Item>>();
82 for(Item item : items) {
83 for(int s : item.nextSymbols(grammar)) {
84 ArrayList<Item> l = transitionMap.get(s);
86 l = new ArrayList<Item>();
87 transitionMap.put(s, l);
89 l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos));
93 final TIntIntHashMap trans = new TIntIntHashMap();
94 final TIntIntHashMap stackOpMap = new TIntIntHashMap();
95 transitions.add(trans);
96 stackOps.add(stackOpMap);
97 if(transitionMap.remove(grammar.terminalNames.length-1)!=null) {
98 finalStates.add(newState);
100 transitionMap.forEachEntry(new TIntObjectProcedure<ArrayList<Item>>() {
102 public boolean execute(int a, ArrayList<Item> b) {
103 boolean stackShift = false;
104 int minStackPos = Integer.MAX_VALUE;
106 if(item.stackPos == -1)
109 minStackPos = Math.min(minStackPos, item.stackPos);
112 if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) {
114 //System.out.println("minStackPos = " + minStackPos);
116 if(item.stackPos >= 0)
119 boolean stackOverflow = false;
121 stackOp |= PUSH_MASK;
124 if(item.stackPos > MAX_STACK_ID)
125 stackOverflow = true;
128 stackOpMap.put(a, stackOp);
129 System.out.println(newState + " " + grammar.getName(a) + " " + stackOp);
132 System.err.println("Stack overflow when following " + grammar.getName(a) + " at");
133 System.err.println(itemSet.toString(grammar));
136 int state = getState(a, b);
138 backLinks.get(state).add(newState);
147 TLongArrayList sMap = new TLongArrayList();
148 TLongIntHashMap sMapInv = new TLongIntHashMap();
149 TIntHashSet[] follow;
151 private static int getState(long s) {
152 return (int)(s >> 32);
155 private static int getSymbol(long s) {
159 private static long getS(int state, int symbol) {
160 return (((long)state) << 32) | (long)symbol;
163 private void computeFollow() {
164 for(int i=0;i<itemSets.size();++i) {
165 final int source = i;
166 transitions.get(i).forEachEntry(new TIntIntProcedure() {
168 public boolean execute(int symbol, int target) {
170 long s = getS(source, ~symbol);
171 int id = sMap.size();
181 follow = new TIntHashSet[sMap.size()];
182 final TIntHashSet[] gread = new TIntHashSet[follow.length];
183 final TIntHashSet[] gla = new TIntHashSet[sMap.size()];
184 for(int i=0;i<follow.length;++i) {
185 gread[i] = new TIntHashSet();
186 gla[i] = new TIntHashSet();
188 for(int i=0;i<follow.length;++i) {
190 long s = sMap.get(i);
191 int source = getState(s);
192 int symbol = getSymbol(s);
193 final int target = transitions.get(source).get(~symbol);
194 final TIntHashSet drSet = new TIntHashSet();
195 transitions.get(target).forEachEntry(new TIntIntProcedure() {
197 public boolean execute(int symbol2, int target2) {
200 else if(grammar.nullable[~symbol2])
201 gread[sMapInv.get(getS(target, ~symbol2))].add(id);
205 if(finalStates.contains(target))
206 drSet.add(grammar.terminalNames.length-1);
209 ItemSet set = itemSets.get(target);
210 for(Item targetItem : set.items) {
211 Prod prod = grammar.prods.get(targetItem.production);
212 if(grammar.almostAccepts(prod.rhs, targetItem.position)) {
213 for(Item sourceItem : itemSets.get(source).items) {
214 if(sourceItem.production == targetItem.production &&
215 prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) {
216 TLongHashSet visited = new TLongHashSet();
217 traceBack(gla, id, visited, source, sourceItem);
224 //System.out.println("follow: " + Arrays.toString(follow));
225 //System.out.println("gread: " + Arrays.toString(gread));
226 //System.out.println("gla: " + Arrays.toString(gla));
227 AnaGrammar.gclose(follow, gread);
228 AnaGrammar.gclose(follow, gla);
230 /*System.out.println("Gla:");
231 for(int i=0;i<gla.length;++i) {
232 int iState = getState(sMap.get(i));
233 int iSymbol = getSymbol(sMap.get(i));
234 for(int j : gla[i].toArray()) {
235 int jState = getState(sMap.get(j));
236 int jSymbol = getSymbol(sMap.get(j));
237 System.out.println("-- from --");
238 System.out.println(itemSets.get(iState).toString(grammar));
239 System.out.println(" symbol: " + grammar.nonterminalNames[iSymbol]);
240 System.out.println("-- to --");
241 System.out.println(itemSets.get(jState).toString(grammar));
242 System.out.println(" symbol: " + grammar.nonterminalNames[jSymbol]);
246 /*for(int i=0;i<follow.length;++i) {
247 long s = sMap.get(i);
248 int source = getState(s);
249 int symbol = getSymbol(s);
250 System.out.println("------------------------------");
251 System.out.println(itemSets.get(source).toString(grammar));
252 System.out.println("Symbol: " + grammar.nonterminalNames[symbol]);
253 System.out.print("Follow:");
254 for(int sym : follow[i].toArray())
255 System.out.print(" " + grammar.terminalNames[sym]);
256 System.out.println();
260 private void traceBack(TIntHashSet[] gla, int initialId, TLongHashSet visited, int state, Item item) {
261 if(visited.add( (((long)state)<<32) | (long)item.position )) {
262 Prod prod = grammar.prods.get(item.production);
263 if(item.stackPos == -1) {
264 int id = sMapInv.get(getS(state, prod.lhs));
265 gla[id].add(initialId);
268 int backTransSymbol = backTransSymbols.get(state);
269 for(int prevState : backLinks.get(state).toArray())
270 for(Item prevItem : itemSets.get(prevState).items)
271 if(prevItem.production == item.production &&
272 prod.rhs.getTransition(prevItem.position, backTransSymbol) == item.position)
273 traceBack(gla, initialId, visited, prevState, prevItem);
277 private void lookback( TLongHashSet visited, TIntHashSet la, int production, int state, int position) {
278 if(visited.add( (((long)state)<<32) | (long)position )) {
279 int backTransSymbol = backTransSymbols.get(state);
280 Prod prod = grammar.prods.get(production);
281 boolean mightBeInitial = prod.rhs.getTransition(prod.rhs.getInitialState(), backTransSymbol) == position;
282 for(int prevState : backLinks.get(state).toArray()) {
283 for(Item item : itemSets.get(prevState).items) {
284 if(item.production == production &&
285 prod.rhs.getTransition(item.position, backTransSymbol) == position)
286 lookback(visited, la, production, prevState, item.position);
287 if(mightBeInitial && grammar.prods.get(item.production).rhs.getTransition(item.position, ~prod.lhs) >= 0) {
288 int id = sMapInv.get(getS(prevState, prod.lhs));
289 la.addAll(follow[id]);
296 private void createReduceActions() {
298 for(int i=0;i<itemSets.size();++i) {
299 TIntIntHashMap trans = transitions.get(i);
300 if(finalStates.contains(i))
301 trans.put(grammar.terminalNames.length-1, ACCEPT_ACTION);
303 TIntObjectHashMap<TIntHashSet> laMap = new TIntObjectHashMap<TIntHashSet>();
304 TIntIntHashMap stackPosMap = new TIntIntHashMap();
305 for(Item item : itemSets.get(i).items) {
306 Prod prod = grammar.prods.get(item.production);
307 if(prod.rhs.getAccepts(item.position)) {
308 TIntHashSet la = laMap.get(item.production);
310 la = new TIntHashSet();
311 laMap.put(item.production, la);
314 TLongHashSet visited = new TLongHashSet();
315 lookback(visited, la, item.production, i, item.position);
317 if(stackPosMap.containsKey(item.production)) {
318 stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice
321 stackPosMap.put(item.production, item.stackPos);
325 // Create transitions
326 for(int production : laMap.keys()) {
327 int stackPos = 0; //stackPosMap.get(production);
328 TIntHashSet la = laMap.get(production);
329 for(int symbol : la.toArray()) {
330 if(trans.contains(symbol)) {
331 int oldAction = trans.get(symbol);
333 Prod prod = grammar.prods.get(production);
334 if(prod.annotations.containsKey(symbol)) {
335 byte v = prod.annotations.get(symbol);
337 trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
340 System.err.println("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
341 System.err.println(itemSets.get(i).toString(grammar));
345 System.err.println("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
346 System.err.println(itemSets.get(i).toString(grammar));
350 trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
354 // Check stacking conflicts
355 /*trans.forEachEntry(new TIntIntProcedure() {
357 public boolean execute(int a, int b) {
359 boolean kernelState = false;
360 boolean nonkernelState = false;
361 for(Item item : itemSets.get(b).items) {
362 Prod prod = grammar.prods.get(item.production);
363 if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a))
364 nonkernelState = true;
365 else if(item.position != prod.rhs.getInitialState())
370 if(kernelState && nonkernelState) {
371 System.err.println("Stacking conflict when following " + grammar.getName(a) + " to");
372 System.err.println(itemSets.get(b).toString(grammar));
381 public static ParseTable build(AnaGrammar grammar) {
382 ParseTableBuilder builder = new ParseTableBuilder(grammar);
384 builder.initialStates = new int[grammar.initialNonterminals.length];
385 for(int i=0;i<grammar.initialNonterminals.length;++i) {
386 ArrayList<Item> seed = new ArrayList<Item>();
387 int prodId = grammar.prods.size()-i-1;
388 seed.add(new Item(prodId,
389 grammar.prods.get(prodId).rhs.getInitialState(), 0));
390 builder.initialStates[i] = builder.getState(REDUCE_MASK, seed);
393 builder.createReduceActions();
395 System.out.println("States: " + builder.itemSets.size());
397 //builder.visualize();
399 builder.printParseTable();
400 return builder.getParseTable();
403 private ParseTable getParseTable() {
404 int[] productionInfo = new int[grammar.prods.size()];
405 for(int i=0;i<productionInfo.length;++i) {
406 Prod prod = grammar.prods.get(i);
407 productionInfo[i] = prod.lhs;
410 int[][] actionTable = new int[transitions.size()][];
411 int[][] gotoTable = new int[transitions.size()][];
412 for(int i=0;i<transitions.size();++i) {
413 final int[] actions = new int[grammar.terminalNames.length];
414 Arrays.fill(actions, ERROR_ACTION);
415 actionTable[i] = actions;
416 final int[] gotos = new int[grammar.nonterminalNames.length];
417 Arrays.fill(gotos, ERROR_ACTION);
418 gotoTable[i] = gotos;
419 final TIntIntHashMap stackOpMap = stackOps.get(i);
420 transitions.get(i).forEachEntry(new TIntIntProcedure() {
422 public boolean execute(int a, int b) {
423 int action = b | stackOpMap.get(a);
433 String[] stateDescriptions = new String[itemSets.size()];
434 for(int i=0;i<stateDescriptions.length;++i) {
435 final StringBuilder b = new StringBuilder();
436 b.append(itemSets.get(i).toString(grammar));
437 transitions.get(i).forEachEntry(new TIntIntProcedure() {
439 public boolean execute(int symbol, int action) {
441 b.append("\n ").append(grammar.terminalNames[symbol]).append(" ->");
442 if((action & REDUCE_MASK) == 0) {
443 if((action & POP_MASK) != 0)
445 if((action & PUSH_MASK) != 0)
447 b.append(" SHIFT(").append(action&STATE_MASK).append(")");
450 if(action == 0xfffffffe)
453 b.append(" REDUCE(").append(action&STATE_MASK).append(")");
457 b.append("\n ").append(grammar.nonterminalNames[~symbol]).append(" ->")
458 .append(" GOTO(").append(action).append(")");
463 stateDescriptions[i] = b.toString();
467 return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo,
468 initialStates, stateDescriptions);
471 private void printParseTable() {
472 final ItemSet[] stateSets = new ItemSet[states.size()];
473 states.forEachEntry(new TObjectIntProcedure<ItemSet>() {
475 public boolean execute(ItemSet a, int b) {
480 for(int i=0;i<stateSets.length;++i) {
481 System.out.println("--- State " + i + " ---");
482 System.out.println(stateSets[i].toString(grammar));
483 final TIntIntHashMap stackOp = stackOps.get(i);
484 transitions.get(i).forEachEntry(new TIntIntProcedure() {
486 public boolean execute(int a, int b) {
487 int sOp = stackOp.get(a);
488 System.out.print(grammar.getName(a) + " -> ");
490 System.out.print("[");
491 if((sOp & PUSH_MASK) != 0) {
493 System.out.print("PUSH ");
495 if((sOp & POP_MASK) != 0) {
497 System.out.print("POP ");
500 System.out.print(sOp);
501 System.out.print("] ");
503 if((b & REDUCE_MASK) != 0) {
505 System.out.println("reduce " + b); // grammar.prods.get(~b).toString(grammar));
508 System.out.println("shift " + b);