package org.simantics.scl.compiler.parser.generator.table; import java.util.ArrayList; import java.util.Arrays; import org.simantics.scl.compiler.parser.generator.grammar.AnaGrammar; import org.simantics.scl.compiler.parser.generator.grammar.Prod; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import gnu.trove.list.array.TIntArrayList; import gnu.trove.list.array.TLongArrayList; import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.map.hash.TLongIntHashMap; import gnu.trove.map.hash.TObjectIntHashMap; import gnu.trove.procedure.TIntIntProcedure; import gnu.trove.procedure.TIntObjectProcedure; import gnu.trove.procedure.TObjectIntProcedure; import gnu.trove.set.hash.THashSet; import gnu.trove.set.hash.TIntHashSet; import gnu.trove.set.hash.TLongHashSet; public class ParseTableBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(ParseTableBuilder.class); public final static int MAX_STACK_ID = 10; private static final int STATE_MASK = 0x0fff; private static final int REDUCE_MASK = 0x8000; private static final int POP_MASK = 0x4000; private static final int PUSH_MASK = 0x2000; public static final int ERROR_ACTION = 0xffff; private static final int ACCEPT_ACTION = 0xfffe; final AnaGrammar grammar; private TObjectIntHashMap states = new TObjectIntHashMap(); private ArrayList itemSets = new ArrayList(); private ArrayList transitions = new ArrayList(); private ArrayList stackOps = new ArrayList(); private TIntArrayList backTransSymbols = new TIntArrayList(); private ArrayList backLinks = new ArrayList(); int[] initialStates; TIntHashSet finalStates = new TIntHashSet(); private ParseTableBuilder(AnaGrammar grammar) { this.grammar = grammar; } private static boolean isNonterminal(int symbol) { return symbol < 0; } private void close(ArrayList items) { THashSet itemSet = new THashSet(items); for(int i=0;i items) { // Create state close(items); final ItemSet itemSet = new ItemSet(items); if(states.contains(itemSet)) return states.get(itemSet); final int newState = states.size(); states.put(itemSet, newState); itemSets.add(itemSet); backTransSymbols.add(backTransSymbol); backLinks.add(new TIntArrayList(2)); // Create transitions TIntObjectHashMap> transitionMap = new TIntObjectHashMap>(); //close(items); for(Item item : items) { for(int s : item.nextSymbols(grammar)) { ArrayList l = transitionMap.get(s); if(l == null) { l = new ArrayList(); transitionMap.put(s, l); } l.add(new Item(item.production, item.nextPosition(grammar, s), item.stackPos)); } } final TIntIntHashMap trans = new TIntIntHashMap(); final TIntIntHashMap stackOpMap = new TIntIntHashMap(); transitions.add(trans); stackOps.add(stackOpMap); if(transitionMap.remove(grammar.terminalNames.length-1)!=null) { finalStates.add(newState); } transitionMap.forEachEntry(new TIntObjectProcedure>() { @Override public boolean execute(int a, ArrayList b) { boolean stackShift = false; int minStackPos = Integer.MAX_VALUE; for(Item item : b) { if(item.stackPos == -1) stackShift = true; else minStackPos = Math.min(minStackPos, item.stackPos); } int stackOp = 0; if(minStackPos > 0 && minStackPos != Integer.MAX_VALUE) { stackOp |= POP_MASK; //System.out.println("minStackPos = " + minStackPos); for(Item item : b) if(item.stackPos >= 0) --item.stackPos; } boolean stackOverflow = false; if(stackShift) { stackOp |= PUSH_MASK; for(Item item : b) { ++item.stackPos; if(item.stackPos > MAX_STACK_ID) stackOverflow = true; } } stackOpMap.put(a, stackOp); //System.out.println(newState + " " + grammar.getName(a) + " " + stackOp); if(stackOverflow) { LOGGER.error("Stack overflow when following " + grammar.getName(a) + " at"); LOGGER.error(itemSet.toString(grammar)); } else { int state = getState(a, b); trans.put(a, state); backLinks.get(state).add(newState); } return true; } }); return newState; } TLongArrayList sMap = new TLongArrayList(); TLongIntHashMap sMapInv = new TLongIntHashMap(); TIntHashSet[] follow; private static int getState(long s) { return (int)(s >> 32); } private static int getSymbol(long s) { return (int)s; } private static long getS(int state, int symbol) { return (((long)state) << 32) | (long)symbol; } private void computeFollow() { for(int i=0;i= 0) drSet.add(symbol2); else if(grammar.nullable[~symbol2]) gread[sMapInv.get(getS(target, ~symbol2))].add(id); return true; } }); if(finalStates.contains(target)) drSet.add(grammar.terminalNames.length-1); follow[i] = drSet; ItemSet set = itemSets.get(target); for(Item targetItem : set.items) { Prod prod = grammar.prods.get(targetItem.production); if(grammar.almostAccepts(prod.rhs, targetItem.position)) { for(Item sourceItem : itemSets.get(source).items) { if(sourceItem.production == targetItem.production && prod.rhs.getTransition(sourceItem.position, ~symbol) == targetItem.position) { TLongHashSet visited = new TLongHashSet(); traceBack(gla, id, visited, source, sourceItem); } } } } } //System.out.println("follow: " + Arrays.toString(follow)); //System.out.println("gread: " + Arrays.toString(gread)); //System.out.println("gla: " + Arrays.toString(gla)); AnaGrammar.gclose(follow, gread); AnaGrammar.gclose(follow, gla); /*System.out.println("Gla:"); for(int i=0;i= 0) { int id = sMapInv.get(getS(prevState, prod.lhs)); la.addAll(follow[id]); } } } } } private void createReduceActions() { computeFollow(); for(int i=0;i laMap = new TIntObjectHashMap(); TIntIntHashMap stackPosMap = new TIntIntHashMap(); for(Item item : itemSets.get(i).items) { Prod prod = grammar.prods.get(item.production); if(prod.rhs.getAccepts(item.position)) { TIntHashSet la = laMap.get(item.production); if(la == null) { la = new TIntHashSet(); laMap.put(item.production, la); } TLongHashSet visited = new TLongHashSet(); lookback(visited, la, item.production, i, item.position); if(stackPosMap.containsKey(item.production)) { stackPosMap.put(item.production, Math.max(item.stackPos, stackPosMap.get(item.production))); // TODO arbitrary choice } else stackPosMap.put(item.production, item.stackPos); } } // Create transitions for(int production : laMap.keys()) { int stackPos = 0; //stackPosMap.get(production); TIntHashSet la = laMap.get(production); for(int symbol : la.toArray()) { if(trans.contains(symbol)) { int oldAction = trans.get(symbol); if(oldAction >= 0) { Prod prod = grammar.prods.get(production); if(prod.annotations.containsKey(symbol)) { byte v = prod.annotations.get(symbol); if(v == 1) trans.put(symbol, REDUCE_MASK | production | (stackPos << 13)); } else { LOGGER.error("Shift/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context"); LOGGER.error(itemSets.get(i).toString(grammar)); } } else { LOGGER.error("Reduce/reduce conflict when encountering " + grammar.terminalNames[symbol] + " in context"); LOGGER.error(itemSets.get(i).toString(grammar)); } } else trans.put(symbol, REDUCE_MASK | production | (stackPos << 13)); } } // Check stacking conflicts /*trans.forEachEntry(new TIntIntProcedure() { @Override public boolean execute(int a, int b) { if(b >= 0) { boolean kernelState = false; boolean nonkernelState = false; for(Item item : itemSets.get(b).items) { Prod prod = grammar.prods.get(item.production); if(item.position == prod.rhs.getTransition(prod.rhs.getInitialState(), a)) nonkernelState = true; else if(item.position != prod.rhs.getInitialState()) kernelState = true; } if(kernelState && nonkernelState) { System.err.println("Stacking conflict when following " + grammar.getName(a) + " to"); System.err.println(itemSets.get(b).toString(grammar)); } } return true; } });*/ } } public static ParseTable build(AnaGrammar grammar) { ParseTableBuilder builder = new ParseTableBuilder(grammar); builder.initialStates = new int[grammar.initialNonterminals.length]; for(int i=0;i seed = new ArrayList(); int prodId = grammar.prods.size()-i-1; seed.add(new Item(prodId, grammar.prods.get(prodId).rhs.getInitialState(), 0)); builder.initialStates[i] = builder.getState(REDUCE_MASK, seed); } builder.createReduceActions(); LOGGER.info("States: " + builder.itemSets.size()); //builder.visualize(); //builder.printParseTable(); return builder.getParseTable(); } private ParseTable getParseTable() { int[] productionInfo = new int[grammar.prods.size()]; for(int i=0;i= 0) actions[a] = action; else gotos[~a] = action; return true; } }); } String[] stateDescriptions = new String[itemSets.size()]; for(int i=0;i= 0) { b.append("\n ").append(grammar.terminalNames[symbol]).append(" ->"); if((action & REDUCE_MASK) == 0) { if((action & POP_MASK) != 0) b.append(" POP"); if((action & PUSH_MASK) != 0) b.append(" PUSH"); b.append(" SHIFT(").append(action&STATE_MASK).append(")"); } else { if(action == 0xfffffffe) b.append(" ACCEPT"); else b.append(" REDUCE(").append(action&STATE_MASK).append(")"); } } else { b.append("\n ").append(grammar.nonterminalNames[~symbol]).append(" ->") .append(" GOTO(").append(action).append(")"); } return true; } }); stateDescriptions[i] = b.toString(); } //printParseTable(); return new ParseTable(itemSets.size(), actionTable, gotoTable, productionInfo, initialStates, stateDescriptions); } private void printParseTable() { final ItemSet[] stateSets = new ItemSet[states.size()]; states.forEachEntry(new TObjectIntProcedure() { @Override public boolean execute(ItemSet a, int b) { stateSets[b] = a; return true; } }); for(int i=0;i "); if(sOp != 0) { LOGGER.info("["); if((sOp & PUSH_MASK) != 0) { sOp ^= PUSH_MASK; LOGGER.info("PUSH "); } if((sOp & POP_MASK) != 0) { sOp ^= POP_MASK; LOGGER.info("POP "); } if(sOp != 0) LOGGER.info(Integer.toString(sOp)); LOGGER.info("] "); } if((b & REDUCE_MASK) != 0) { b ^= REDUCE_MASK; LOGGER.info("reduce " + b); // grammar.prods.get(~b).toString(grammar)); } else { LOGGER.info("shift " + b); } return true; } }); } } }