1 package org.simantics.scl.compiler.parser.generator.table;
3 import java.util.ArrayList;
4 import java.util.Arrays;
6 import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar;
7 import org.simantics.scl.compiler.parser.generator.grammar.Prod;
8 import org.slf4j.Logger;
9 import org.slf4j.LoggerFactory;
11 import gnu.trove.list.array.TIntArrayList;
12 import gnu.trove.list.array.TLongArrayList;
13 import gnu.trove.map.hash.TIntIntHashMap;
14 import gnu.trove.map.hash.TIntObjectHashMap;
15 import gnu.trove.map.hash.TLongIntHashMap;
16 import gnu.trove.map.hash.TObjectIntHashMap;
17 import gnu.trove.procedure.TIntIntProcedure;
18 import gnu.trove.procedure.TIntObjectProcedure;
19 import gnu.trove.procedure.TObjectIntProcedure;
20 import gnu.trove.set.hash.THashSet;
21 import gnu.trove.set.hash.TIntHashSet;
22 import gnu.trove.set.hash.TLongHashSet;
24 public class ParseTableBuilder {
25 private static final Logger LOGGER = LoggerFactory.getLogger(ParseTableBuilder.class);
26 public final static int MAX_STACK_ID = 10;
28 private static final int STATE_MASK = 0x0fff;
29 private static final int REDUCE_MASK = 0x8000;
30 private static final int POP_MASK = 0x4000;
31 private static final int PUSH_MASK = 0x2000;
32 public static final int ERROR_ACTION = 0xffff;
33 private static final int ACCEPT_ACTION = 0xfffe;
35 final AnaGrammar grammar;
36 private TObjectIntHashMap<ItemSet> states = new TObjectIntHashMap<ItemSet>();
37 private ArrayList<ItemSet> itemSets = new ArrayList<ItemSet>();
38 private ArrayList<TIntIntHashMap> transitions = new ArrayList<TIntIntHashMap>();
39 private ArrayList<TIntIntHashMap> stackOps = new ArrayList<TIntIntHashMap>();
40 private TIntArrayList backTransSymbols = new TIntArrayList();
41 private ArrayList<TIntArrayList> backLinks = new ArrayList<TIntArrayList>();
43 TIntHashSet finalStates = new TIntHashSet();
45 private ParseTableBuilder(AnaGrammar grammar) {
46 this.grammar = grammar;
49 private static boolean isNonterminal(int symbol) {
53 private void close(ArrayList<Item> items) {
54 THashSet<Item> itemSet = new THashSet<Item>(items);
55 for(int i=0;i<items.size();++i) {
56 Item item = items.get(i);
57 for(int nextSymbol : item.nextSymbols(grammar))
58 if(isNonterminal(nextSymbol)) {
59 nextSymbol = ~nextSymbol;
60 int pEnd = grammar.nonterminalPos[nextSymbol+1];
61 for(int pId=grammar.nonterminalPos[nextSymbol];pId<pEnd;++pId) {
62 Item newItem = new Item(pId, grammar.prods.get(pId).rhs.getInitialState(), -1);
63 if(itemSet.add(newItem))
70 private int getState(int backTransSymbol, ArrayList<Item> items) {
73 final ItemSet itemSet = new ItemSet(items);
74 if(states.contains(itemSet))
75 return states.get(itemSet);
76 final int newState = states.size();
77 states.put(itemSet, newState);
78 itemSets.add(itemSet);
79 backTransSymbols.add(backTransSymbol);
80 backLinks.add(new TIntArrayList(2));
83 TIntObjectHashMap<ArrayList<Item>> transitionMap = new TIntObjectHashMap<ArrayList<Item>>();
85 for(Item item : items) {
86 for(int s : item.nextSymbols(grammar)) {
87 ArrayList<Item> l = transitionMap.get(s);
89 l = new ArrayList<Item>();
90 transitionMap.put(s, l);
92 l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos));
96 final TIntIntHashMap trans = new TIntIntHashMap();
97 final TIntIntHashMap stackOpMap = new TIntIntHashMap();
98 transitions.add(trans);
99 stackOps.add(stackOpMap);
100 if(transitionMap.remove(grammar.terminalNames.length-1)!=null) {
101 finalStates.add(newState);
103 transitionMap.forEachEntry(new TIntObjectProcedure<ArrayList<Item>>() {
105 public boolean execute(int a, ArrayList<Item> b) {
106 boolean stackShift = false;
107 int minStackPos = Integer.MAX_VALUE;
109 if(item.stackPos == -1)
112 minStackPos = Math.min(minStackPos, item.stackPos);
115 if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) {
117 //System.out.println("minStackPos = " + minStackPos);
119 if(item.stackPos >= 0)
122 boolean stackOverflow = false;
124 stackOp |= PUSH_MASK;
127 if(item.stackPos > MAX_STACK_ID)
128 stackOverflow = true;
131 stackOpMap.put(a, stackOp);
132 //System.out.println(newState + " " + grammar.getName(a) + " " + stackOp);
135 LOGGER.error("Stack overflow when following " + grammar.getName(a) + " at");
136 LOGGER.error(itemSet.toString(grammar));
139 int state = getState(a, b);
141 backLinks.get(state).add(newState);
150 TLongArrayList sMap = new TLongArrayList();
151 TLongIntHashMap sMapInv = new TLongIntHashMap();
152 TIntHashSet[] follow;
154 private static int getState(long s) {
155 return (int)(s >> 32);
158 private static int getSymbol(long s) {
162 private static long getS(int state, int symbol) {
163 return (((long)state) << 32) | (long)symbol;
166 private void computeFollow() {
167 for(int i=0;i<itemSets.size();++i) {
168 final int source = i;
169 transitions.get(i).forEachEntry(new TIntIntProcedure() {
171 public boolean execute(int symbol, int target) {
173 long s = getS(source, ~symbol);
174 int id = sMap.size();
184 follow = new TIntHashSet[sMap.size()];
185 final TIntHashSet[] gread = new TIntHashSet[follow.length];
186 final TIntHashSet[] gla = new TIntHashSet[sMap.size()];
187 for(int i=0;i<follow.length;++i) {
188 gread[i] = new TIntHashSet();
189 gla[i] = new TIntHashSet();
191 for(int i=0;i<follow.length;++i) {
193 long s = sMap.get(i);
194 int source = getState(s);
195 int symbol = getSymbol(s);
196 final int target = transitions.get(source).get(~symbol);
197 final TIntHashSet drSet = new TIntHashSet();
198 transitions.get(target).forEachEntry(new TIntIntProcedure() {
200 public boolean execute(int symbol2, int target2) {
203 else if(grammar.nullable[~symbol2])
204 gread[sMapInv.get(getS(target, ~symbol2))].add(id);
208 if(finalStates.contains(target))
209 drSet.add(grammar.terminalNames.length-1);
212 ItemSet set = itemSets.get(target);
213 for(Item targetItem : set.items) {
214 Prod prod = grammar.prods.get(targetItem.production);
215 if(grammar.almostAccepts(prod.rhs, targetItem.position)) {
216 for(Item sourceItem : itemSets.get(source).items) {
217 if(sourceItem.production == targetItem.production &&
218 prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) {
219 TLongHashSet visited = new TLongHashSet();
220 traceBack(gla, id, visited, source, sourceItem);
227 //System.out.println("follow: " + Arrays.toString(follow));
228 //System.out.println("gread: " + Arrays.toString(gread));
229 //System.out.println("gla: " + Arrays.toString(gla));
230 AnaGrammar.gclose(follow, gread);
231 AnaGrammar.gclose(follow, gla);
233 /*System.out.println("Gla:");
234 for(int i=0;i<gla.length;++i) {
235 int iState = getState(sMap.get(i));
236 int iSymbol = getSymbol(sMap.get(i));
237 for(int j : gla[i].toArray()) {
238 int jState = getState(sMap.get(j));
239 int jSymbol = getSymbol(sMap.get(j));
240 System.out.println("-- from --");
241 System.out.println(itemSets.get(iState).toString(grammar));
242 System.out.println(" symbol: " + grammar.nonterminalNames[iSymbol]);
243 System.out.println("-- to --");
244 System.out.println(itemSets.get(jState).toString(grammar));
245 System.out.println(" symbol: " + grammar.nonterminalNames[jSymbol]);
249 /*for(int i=0;i<follow.length;++i) {
250 long s = sMap.get(i);
251 int source = getState(s);
252 int symbol = getSymbol(s);
253 System.out.println("------------------------------");
254 System.out.println(itemSets.get(source).toString(grammar));
255 System.out.println("Symbol: " + grammar.nonterminalNames[symbol]);
256 System.out.print("Follow:");
257 for(int sym : follow[i].toArray())
258 System.out.print(" " + grammar.terminalNames[sym]);
259 System.out.println();
263 private void traceBack(TIntHashSet[] gla, int initialId, TLongHashSet visited, int state, Item item) {
264 if(visited.add( (((long)state)<<32) | (long)item.position )) {
265 Prod prod = grammar.prods.get(item.production);
266 if(item.stackPos == -1) {
267 int id = sMapInv.get(getS(state, prod.lhs));
268 gla[id].add(initialId);
271 int backTransSymbol = backTransSymbols.get(state);
272 for(int prevState : backLinks.get(state).toArray())
273 for(Item prevItem : itemSets.get(prevState).items)
274 if(prevItem.production == item.production &&
275 prod.rhs.getTransition(prevItem.position, backTransSymbol) == item.position)
276 traceBack(gla, initialId, visited, prevState, prevItem);
280 private void lookback( TLongHashSet visited, TIntHashSet la, int production, int state, int position) {
281 if(visited.add( (((long)state)<<32) | (long)position )) {
282 int backTransSymbol = backTransSymbols.get(state);
283 Prod prod = grammar.prods.get(production);
284 boolean mightBeInitial = prod.rhs.getTransition(prod.rhs.getInitialState(), backTransSymbol) == position;
285 for(int prevState : backLinks.get(state).toArray()) {
286 for(Item item : itemSets.get(prevState).items) {
287 if(item.production == production &&
288 prod.rhs.getTransition(item.position, backTransSymbol) == position)
289 lookback(visited, la, production, prevState, item.position);
290 if(mightBeInitial && grammar.prods.get(item.production).rhs.getTransition(item.position, ~prod.lhs) >= 0) {
291 int id = sMapInv.get(getS(prevState, prod.lhs));
292 la.addAll(follow[id]);
299 private void createReduceActions() {
301 for(int i=0;i<itemSets.size();++i) {
302 TIntIntHashMap trans = transitions.get(i);
303 if(finalStates.contains(i))
304 trans.put(grammar.terminalNames.length-1, ACCEPT_ACTION);
306 TIntObjectHashMap<TIntHashSet> laMap = new TIntObjectHashMap<TIntHashSet>();
307 TIntIntHashMap stackPosMap = new TIntIntHashMap();
308 for(Item item : itemSets.get(i).items) {
309 Prod prod = grammar.prods.get(item.production);
310 if(prod.rhs.getAccepts(item.position)) {
311 TIntHashSet la = laMap.get(item.production);
313 la = new TIntHashSet();
314 laMap.put(item.production, la);
317 TLongHashSet visited = new TLongHashSet();
318 lookback(visited, la, item.production, i, item.position);
320 if(stackPosMap.containsKey(item.production)) {
321 stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice
324 stackPosMap.put(item.production, item.stackPos);
328 // Create transitions
329 for(int production : laMap.keys()) {
330 int stackPos = 0; //stackPosMap.get(production);
331 TIntHashSet la = laMap.get(production);
332 for(int symbol : la.toArray()) {
333 if(trans.contains(symbol)) {
334 int oldAction = trans.get(symbol);
336 Prod prod = grammar.prods.get(production);
337 if(prod.annotations.containsKey(symbol)) {
338 byte v = prod.annotations.get(symbol);
340 trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
343 LOGGER.error("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
344 LOGGER.error(itemSets.get(i).toString(grammar));
348 LOGGER.error("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context");
349 LOGGER.error(itemSets.get(i).toString(grammar));
353 trans.put(symbol, REDUCE_MASK | production | (stackPos << 13));
357 // Check stacking conflicts
358 /*trans.forEachEntry(new TIntIntProcedure() {
360 public boolean execute(int a, int b) {
362 boolean kernelState = false;
363 boolean nonkernelState = false;
364 for(Item item : itemSets.get(b).items) {
365 Prod prod = grammar.prods.get(item.production);
366 if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a))
367 nonkernelState = true;
368 else if(item.position != prod.rhs.getInitialState())
373 if(kernelState && nonkernelState) {
374 System.err.println("Stacking conflict when following " + grammar.getName(a) + " to");
375 System.err.println(itemSets.get(b).toString(grammar));
384 public static ParseTable build(AnaGrammar grammar) {
385 ParseTableBuilder builder = new ParseTableBuilder(grammar);
387 builder.initialStates = new int[grammar.initialNonterminals.length];
388 for(int i=0;i<grammar.initialNonterminals.length;++i) {
389 ArrayList<Item> seed = new ArrayList<Item>();
390 int prodId = grammar.prods.size()-i-1;
391 seed.add(new Item(prodId,
392 grammar.prods.get(prodId).rhs.getInitialState(), 0));
393 builder.initialStates[i] = builder.getState(REDUCE_MASK, seed);
396 builder.createReduceActions();
398 LOGGER.info("States: " + builder.itemSets.size());
400 //builder.visualize();
402 //builder.printParseTable();
403 return builder.getParseTable();
406 private ParseTable getParseTable() {
407 int[] productionInfo = new int[grammar.prods.size()];
408 for(int i=0;i<productionInfo.length;++i) {
409 Prod prod = grammar.prods.get(i);
410 productionInfo[i] = prod.lhs;
413 int[][] actionTable = new int[transitions.size()][];
414 int[][] gotoTable = new int[transitions.size()][];
415 for(int i=0;i<transitions.size();++i) {
416 final int[] actions = new int[grammar.terminalNames.length];
417 Arrays.fill(actions, ERROR_ACTION);
418 actionTable[i] = actions;
419 final int[] gotos = new int[grammar.nonterminalNames.length];
420 Arrays.fill(gotos, ERROR_ACTION);
421 gotoTable[i] = gotos;
422 final TIntIntHashMap stackOpMap = stackOps.get(i);
423 transitions.get(i).forEachEntry(new TIntIntProcedure() {
425 public boolean execute(int a, int b) {
426 int action = b | stackOpMap.get(a);
436 String[] stateDescriptions = new String[itemSets.size()];
437 for(int i=0;i<stateDescriptions.length;++i) {
438 final StringBuilder b = new StringBuilder();
439 b.append(itemSets.get(i).toString(grammar));
440 transitions.get(i).forEachEntry(new TIntIntProcedure() {
442 public boolean execute(int symbol, int action) {
444 b.append("\n ").append(grammar.terminalNames[symbol]).append(" ->");
445 if((action & REDUCE_MASK) == 0) {
446 if((action & POP_MASK) != 0)
448 if((action & PUSH_MASK) != 0)
450 b.append(" SHIFT(").append(action&STATE_MASK).append(")");
453 if(action == 0xfffffffe)
456 b.append(" REDUCE(").append(action&STATE_MASK).append(")");
460 b.append("\n ").append(grammar.nonterminalNames[~symbol]).append(" ->")
461 .append(" GOTO(").append(action).append(")");
466 stateDescriptions[i] = b.toString();
470 return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo,
471 initialStates, stateDescriptions);
474 private void printParseTable() {
475 final ItemSet[] stateSets = new ItemSet[states.size()];
476 states.forEachEntry(new TObjectIntProcedure<ItemSet>() {
478 public boolean execute(ItemSet a, int b) {
483 for(int i=0;i<stateSets.length;++i) {
484 LOGGER.info("--- State " + i + " ---");
485 LOGGER.info(stateSets[i].toString(grammar));
486 final TIntIntHashMap stackOp = stackOps.get(i);
487 transitions.get(i).forEachEntry(new TIntIntProcedure() {
489 public boolean execute(int a, int b) {
490 int sOp = stackOp.get(a);
491 LOGGER.info(grammar.getName(a) + " -> ");
494 if((sOp & PUSH_MASK) != 0) {
496 LOGGER.info("PUSH ");
498 if((sOp & POP_MASK) != 0) {
503 LOGGER.info(Integer.toString(sOp));
506 if((b & REDUCE_MASK) != 0) {
508 LOGGER.info("reduce " + b); // grammar.prods.get(~b).toString(grammar));
511 LOGGER.info("shift " + b);