1 package org.simantics.scl.compiler.parser.regexp;
3 import gnu.trove.set.hash.THashSet;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
9 import java.util.concurrent.atomic.AtomicReference;
11 import org.simantics.scl.compiler.parser.regexp.automata.NFA;
13 public abstract class Regexp implements Comparable<Regexp> {
14 static final int ATOM = 0;
15 static final int OP = 1;
16 static final int OR = 2;
17 static final int SEQ = 3;
19 protected abstract void buildAutomaton(NFA aut, int inState, int outState);
21 public static final Regexp ONE = new RSeq(new Regexp[0]);
22 public static final Regexp ZERO = new ROr(new Regexp[0]);
27 public NFA toAutomaton() {
29 int inState = aut.newState();
30 int outState = aut.newState();
31 buildAutomaton(aut, inState, outState);
32 aut.setInitialState(inState);
33 aut.setAccepts(outState, true);
38 public String toString() {
39 StringBuilder b = new StringBuilder();
44 protected abstract void toString(StringBuilder b, int prec);
46 protected abstract int getTypeId();
48 public abstract boolean isNullable();
50 public Regexp simplify() {
54 private static int parseRegexp(int pos, String regexp, AtomicReference<Regexp> result) {
55 ArrayList<Regexp> dc = new ArrayList<Regexp>();
56 ArrayList<Regexp> cc = new ArrayList<Regexp>();
58 if(pos == regexp.length())
60 char c = regexp.charAt(pos++);
63 AtomicReference<Regexp> child = new AtomicReference<Regexp>();
64 pos = parseRegexp(pos, regexp, child);
75 throw new IllegalArgumentException("Encountered * that is not front of anything.");
77 cc.set(p, Regexp.star(cc.get(p)));
81 throw new IllegalArgumentException("Encountered ? that is not front of anything.");
83 cc.set(p, Regexp.optional(cc.get(p)));
87 throw new IllegalArgumentException("Encountered + that is not front of anything.");
89 cc.set(p, Regexp.plus(cc.get(p)));
100 public static Regexp of(String regexp) {
101 AtomicReference<Regexp> result = new AtomicReference<Regexp>();
102 int finalPos = parseRegexp(0, regexp, result);
103 if(finalPos < regexp.length())
104 throw new IllegalArgumentException("Extra closing parenteses");
108 static void seq(ArrayList<Regexp> l, Regexp exp) {
109 if(l.size() > 0 && l.get(0) == ZERO)
111 if(exp instanceof RSeq)
112 for(Regexp e : ((RSeq)exp).exps)
114 else if(exp == ZERO) {
122 static Regexp seq_(List<Regexp> es) {
127 Regexp[] eArray = es.toArray(new Regexp[es.size()]);
128 return new RSeq(eArray);
131 public static Regexp seq(Regexp ... exps) {
136 ArrayList<Regexp> es = new ArrayList<Regexp>();
137 for(Regexp exp : exps)
142 public static Regexp seq(Collection<Regexp> exps) {
143 return seq(exps.toArray(new Regexp[exps.size()]));
146 static void or(THashSet<Regexp> s, Regexp exp) {
147 if(exp instanceof ROr)
148 for(Regexp e : ((ROr)exp).exps)
154 static Regexp or_(THashSet<Regexp> es) {
158 return es.iterator().next();
159 Regexp[] eArray = es.toArray(new Regexp[es.size()]);
161 return new ROr(eArray);
164 public static Regexp or(Regexp ... exps) {
169 THashSet<Regexp> es = new THashSet<Regexp>();
170 for(Regexp exp : exps)
175 public static Regexp star(Regexp exp) {
176 if(exp == ONE || exp == ZERO)
178 return new ROp(exp, '*');
181 public static Regexp plus(Regexp exp) {
182 return new ROp(exp, '+');
185 public static Regexp optional(Regexp exp) {
186 return new ROp(exp, '?');
189 public static Regexp or(Collection<Regexp> exps) {
190 return or(exps.toArray(new Regexp[exps.size()]));
194 public int compareTo(Regexp o) {
195 int tA = getTypeId();
196 int tB = o.getTypeId();
203 int sA = ((RAtom)this).symbolId;
204 int sB = ((RAtom)o).symbolId;
218 return a.exp.compareTo(b.exp);
221 return compare(((ROr)this).exps, ((ROr)o).exps);
223 return compare(((RSeq)this).exps, ((RSeq)o).exps);
226 throw new IllegalArgumentException();
230 private static int compare(Regexp[] a, Regexp[] b) {
231 if(a.length < b.length)
233 if(a.length > b.length)
235 for(int i=0;i<a.length;++i) {
236 int temp = a[i].compareTo(b[i]);
243 public abstract void toString(StringBuilder b, Namer grammar, int prec);