package org.simantics.scl.compiler.parser.regexp; import gnu.trove.set.hash.THashSet; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.atomic.AtomicReference; import org.simantics.scl.compiler.parser.regexp.automata.NFA; public abstract class Regexp implements Comparable { static final int ATOM = 0; static final int OP = 1; static final int OR = 2; static final int SEQ = 3; protected abstract void buildAutomaton(NFA aut, int inState, int outState); public static final Regexp ONE = new RSeq(new Regexp[0]); public static final Regexp ZERO = new ROr(new Regexp[0]); Regexp() { } public NFA toAutomaton() { NFA aut = new NFA(); int inState = aut.newState(); int outState = aut.newState(); buildAutomaton(aut, inState, outState); aut.setInitialState(inState); aut.setAccepts(outState, true); return aut; } @Override public String toString() { StringBuilder b = new StringBuilder(); toString(b, 0); return b.toString(); } protected abstract void toString(StringBuilder b, int prec); protected abstract int getTypeId(); public abstract boolean isNullable(); public Regexp simplify() { return this; } private static int parseRegexp(int pos, String regexp, AtomicReference result) { ArrayList dc = new ArrayList(); ArrayList cc = new ArrayList(); loop: while(true) { if(pos == regexp.length()) break loop; char c = regexp.charAt(pos++); switch(c) { case '(': { AtomicReference child = new AtomicReference(); pos = parseRegexp(pos, regexp, child); cc.add(child.get()); } break; case ')': break loop; case '|': { dc.add(seq(cc)); cc.clear(); } break; case '*': { if(cc.isEmpty()) throw new IllegalArgumentException("Encountered * that is not front of anything."); int p = cc.size()-1; cc.set(p, Regexp.star(cc.get(p))); } break; case '?': { if(cc.isEmpty()) throw new IllegalArgumentException("Encountered ? that is not front of anything."); int p = cc.size()-1; cc.set(p, Regexp.optional(cc.get(p))); } break; case '+': { if(cc.isEmpty()) throw new IllegalArgumentException("Encountered + that is not front of anything."); int p = cc.size()-1; cc.set(p, Regexp.plus(cc.get(p))); } break; default: cc.add(new RAtom(c)); } } dc.add(seq(cc)); result.set(or(dc)); return pos; } public static Regexp of(String regexp) { AtomicReference result = new AtomicReference(); int finalPos = parseRegexp(0, regexp, result); if(finalPos < regexp.length()) throw new IllegalArgumentException("Extra closing parenteses"); return result.get(); } static void seq(ArrayList l, Regexp exp) { if(l.size() > 0 && l.get(0) == ZERO) return; if(exp instanceof RSeq) for(Regexp e : ((RSeq)exp).exps) seq(l, e); else if(exp == ZERO) { l.clear(); l.add(exp); } else l.add(exp); } static Regexp seq_(List es) { if(es.size() == 0) return ONE; if(es.size() == 1) return es.get(0); Regexp[] eArray = es.toArray(new Regexp[es.size()]); return new RSeq(eArray); } public static Regexp seq(Regexp ... exps) { if(exps.length == 0) return ONE; if(exps.length == 1) return exps[0]; ArrayList es = new ArrayList(); for(Regexp exp : exps) seq(es, exp); return seq_(es); } public static Regexp seq(Collection exps) { return seq(exps.toArray(new Regexp[exps.size()])); } static void or(THashSet s, Regexp exp) { if(exp instanceof ROr) for(Regexp e : ((ROr)exp).exps) or(s, e); else s.add(exp); } static Regexp or_(THashSet es) { if(es.size() == 0) return ZERO; if(es.size() == 1) return es.iterator().next(); Regexp[] eArray = es.toArray(new Regexp[es.size()]); Arrays.sort(eArray); return new ROr(eArray); } public static Regexp or(Regexp ... exps) { if(exps.length == 0) return ZERO; if(exps.length == 1) return exps[0]; THashSet es = new THashSet(); for(Regexp exp : exps) or(es, exp); return or_(es); } public static Regexp star(Regexp exp) { if(exp == ONE || exp == ZERO) return ONE; return new ROp(exp, '*'); } public static Regexp plus(Regexp exp) { return new ROp(exp, '+'); } public static Regexp optional(Regexp exp) { return new ROp(exp, '?'); } public static Regexp or(Collection exps) { return or(exps.toArray(new Regexp[exps.size()])); } @Override public int compareTo(Regexp o) { int tA = getTypeId(); int tB = o.getTypeId(); if(tA < tB) return -1; if(tA > tB) return 1; switch(tA) { case ATOM: { int sA = ((RAtom)this).symbolId; int sB = ((RAtom)o).symbolId; if(sA < sB) return -1; if(sA > sB) return 1; return 0; } case OP: { ROp a = (ROp)this; ROp b = (ROp)o; if(a.op < b.op) return -1; if(a.op > b.op) return 1; return a.exp.compareTo(b.exp); } case OR: return compare(((ROr)this).exps, ((ROr)o).exps); case SEQ: { return compare(((RSeq)this).exps, ((RSeq)o).exps); } default: throw new IllegalArgumentException(); } } private static int compare(Regexp[] a, Regexp[] b) { if(a.length < b.length) return -1; if(a.length > b.length) return 1; for(int i=0;i