X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=blobdiff_plain;ds=sidebyside;f=bundles%2Forg.simantics.scl.compiler%2Fsrc%2Forg%2Fsimantics%2Fscl%2Fcompiler%2Fparser%2Fregexp%2FRegexp.java;fp=bundles%2Forg.simantics.scl.compiler%2Fsrc%2Forg%2Fsimantics%2Fscl%2Fcompiler%2Fparser%2Fregexp%2FRegexp.java;h=8211b6a740b7aeb07b9e613ebd26367b1d57d242;hb=649890ad306df48440a97893d7d53fb8a6386a4e;hp=0000000000000000000000000000000000000000;hpb=655590362c7017aff657d1ff30e6c63f03b6dd75;p=simantics%2Fplatform.git diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java new file mode 100644 index 000000000..8211b6a74 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/regexp/Regexp.java @@ -0,0 +1,244 @@ +package org.simantics.scl.compiler.parser.regexp; + +import gnu.trove.set.hash.THashSet; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import org.simantics.scl.compiler.parser.regexp.automata.NFA; + +public abstract class Regexp implements Comparable { + static final int ATOM = 0; + static final int OP = 1; + static final int OR = 2; + static final int SEQ = 3; + + protected abstract void buildAutomaton(NFA aut, int inState, int outState); + + public static final Regexp ONE = new RSeq(new Regexp[0]); + public static final Regexp ZERO = new ROr(new Regexp[0]); + + Regexp() { + } + + public NFA toAutomaton() { + NFA aut = new NFA(); + int inState = aut.newState(); + int outState = aut.newState(); + buildAutomaton(aut, inState, outState); + aut.setInitialState(inState); + aut.setAccepts(outState, true); + return aut; + } + + @Override + public String toString() { + StringBuilder b = new StringBuilder(); + toString(b, 0); + return b.toString(); + } + + protected abstract void toString(StringBuilder b, int prec); + + protected abstract int getTypeId(); + + public abstract boolean isNullable(); + + public Regexp simplify() { + return this; + } + + private static int parseRegexp(int pos, String regexp, AtomicReference result) { + ArrayList dc = new ArrayList(); + ArrayList cc = new ArrayList(); + loop: while(true) { + if(pos == regexp.length()) + break loop; + char c = regexp.charAt(pos++); + switch(c) { + case '(': { + AtomicReference child = new AtomicReference(); + pos = parseRegexp(pos, regexp, child); + cc.add(child.get()); + } break; + case ')': + break loop; + case '|': { + dc.add(seq(cc)); + cc.clear(); + } break; + case '*': { + if(cc.isEmpty()) + throw new IllegalArgumentException("Encountered * that is not front of anything."); + int p = cc.size()-1; + cc.set(p, Regexp.star(cc.get(p))); + } break; + case '?': { + if(cc.isEmpty()) + throw new IllegalArgumentException("Encountered ? that is not front of anything."); + int p = cc.size()-1; + cc.set(p, Regexp.optional(cc.get(p))); + } break; + case '+': { + if(cc.isEmpty()) + throw new IllegalArgumentException("Encountered + that is not front of anything."); + int p = cc.size()-1; + cc.set(p, Regexp.plus(cc.get(p))); + } break; + default: + cc.add(new RAtom(c)); + } + } + dc.add(seq(cc)); + result.set(or(dc)); + return pos; + } + + public static Regexp of(String regexp) { + AtomicReference result = new AtomicReference(); + int finalPos = parseRegexp(0, regexp, result); + if(finalPos < regexp.length()) + throw new IllegalArgumentException("Extra closing parenteses"); + return result.get(); + } + + static void seq(ArrayList l, Regexp exp) { + if(l.size() > 0 && l.get(0) == ZERO) + return; + if(exp instanceof RSeq) + for(Regexp e : ((RSeq)exp).exps) + seq(l, e); + else if(exp == ZERO) { + l.clear(); + l.add(exp); + } + else + l.add(exp); + } + + static Regexp seq_(List es) { + if(es.size() == 0) + return ONE; + if(es.size() == 1) + return es.get(0); + Regexp[] eArray = es.toArray(new Regexp[es.size()]); + return new RSeq(eArray); + } + + public static Regexp seq(Regexp ... exps) { + if(exps.length == 0) + return ONE; + if(exps.length == 1) + return exps[0]; + ArrayList es = new ArrayList(); + for(Regexp exp : exps) + seq(es, exp); + return seq_(es); + } + + public static Regexp seq(Collection exps) { + return seq(exps.toArray(new Regexp[exps.size()])); + } + + static void or(THashSet s, Regexp exp) { + if(exp instanceof ROr) + for(Regexp e : ((ROr)exp).exps) + or(s, e); + else + s.add(exp); + } + + static Regexp or_(THashSet es) { + if(es.size() == 0) + return ZERO; + if(es.size() == 1) + return es.iterator().next(); + Regexp[] eArray = es.toArray(new Regexp[es.size()]); + Arrays.sort(eArray); + return new ROr(eArray); + } + + public static Regexp or(Regexp ... exps) { + if(exps.length == 0) + return ZERO; + if(exps.length == 1) + return exps[0]; + THashSet es = new THashSet(); + for(Regexp exp : exps) + or(es, exp); + return or_(es); + } + + public static Regexp star(Regexp exp) { + if(exp == ONE || exp == ZERO) + return ONE; + return new ROp(exp, '*'); + } + + public static Regexp plus(Regexp exp) { + return new ROp(exp, '+'); + } + + public static Regexp optional(Regexp exp) { + return new ROp(exp, '?'); + } + + public static Regexp or(Collection exps) { + return or(exps.toArray(new Regexp[exps.size()])); + } + + @Override + public int compareTo(Regexp o) { + int tA = getTypeId(); + int tB = o.getTypeId(); + if(tA < tB) + return -1; + if(tA > tB) + return 1; + switch(tA) { + case ATOM: { + int sA = ((RAtom)this).symbolId; + int sB = ((RAtom)o).symbolId; + if(sA < sB) + return -1; + if(sA > sB) + return 1; + return 0; + } + case OP: { + ROp a = (ROp)this; + ROp b = (ROp)o; + if(a.op < b.op) + return -1; + if(a.op > b.op) + return 1; + return a.exp.compareTo(b.exp); + } + case OR: + return compare(((ROr)this).exps, ((ROr)o).exps); + case SEQ: { + return compare(((RSeq)this).exps, ((RSeq)o).exps); + } + default: + throw new IllegalArgumentException(); + } + } + + private static int compare(Regexp[] a, Regexp[] b) { + if(a.length < b.length) + return -1; + if(a.length > b.length) + return 1; + for(int i=0;i