Moved SCL parser generator to platform repository.

[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / parser / grammar / input / GrammarParserImpl.java
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParserImpl.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParserImpl.java

new file mode 100644 (file)

index 0000000..a5d3b8d
--- /dev/null
+++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/parser/grammar/input/GrammarParserImpl.java
@@ -0,0 +1,152 @@
+package org.simantics.scl.compiler.parser.grammar.input;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+
+import org.simantics.scl.compiler.parser.grammar.Grammar;
+import org.simantics.scl.compiler.parser.grammar.Production;
+import org.simantics.scl.compiler.parser.regexp.RAtom;
+import org.simantics.scl.compiler.parser.regexp.Regexp;
+
+import gnu.trove.list.array.TIntArrayList;
+import gnu.trove.map.hash.TIntByteHashMap;
+import gnu.trove.map.hash.TObjectIntHashMap;
+
+public class GrammarParserImpl extends GrammarParser {
+    private final GrammarLexer lexer;
+    
+    ArrayList<String> terminals = new ArrayList<String>();
+    ArrayList<String> nonterminals = new ArrayList<String>();
+    TObjectIntHashMap<String> symbols = new TObjectIntHashMap<String>();
+    ArrayList<Production> productions = new ArrayList<Production>();
+    TIntArrayList initials = new TIntArrayList();
+
+    public GrammarParserImpl(Reader reader) {
+        lexer = new GrammarLexer(reader);
+    }
+    
+    private int getId(String symbol) {
+        if(symbols.contains(symbol))
+            return symbols.get(symbol);
+        int id;
+        if(Character.isUpperCase(symbol.charAt(0))) {
+            id = terminals.size();
+            terminals.add(symbol);
+        }
+        else {
+            id = ~nonterminals.size();
+            nonterminals.add(symbol);
+        }
+        symbols.put(symbol, id);
+        return id;
+    }
+    
+    @Override
+    protected Token nextToken() {
+        try {
+            Token token = lexer.nextToken();
+            return token;
+        } catch(Exception e) {
+            if(e instanceof RuntimeException)
+                throw (RuntimeException)e;
+            else
+                throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    protected RuntimeException syntaxError(Token token, String description) {
+        return new RuntimeException(description);
+    }
+
+    @Override
+    protected Object reduceFile() {
+        return null;
+    }
+
+    @Override
+    protected Object reduceProduction() {
+        int lhs = getId(((Token)get(0)).text);
+        for(int i=2;i<length();i+=2) {
+            Production prod = (Production)get(i);
+            prod.lhs = lhs;
+            productions.add(prod);
+        }
+        return null;
+    }
+
+    @Override
+    protected Object reduceInitial() {
+        initials.add(getId(((Token)get(1)).text));
+        return null;
+    }
+    
+    @Override
+    protected Object reduceTerminal() {
+        return new RAtom(getId(((Token)get(0)).text));
+    }
+
+    private static Regexp postOp(Regexp regexp, Token op) {
+        switch(op.id) {
+        case GrammarTerminals.STAR: return Regexp.star(regexp);
+        case GrammarTerminals.PLUS: return Regexp.plus(regexp);
+        case GrammarTerminals.OPTIONAL: return Regexp.optional(regexp);
+        default: throw new IllegalStateException();
+        }
+    }
+
+    @Override
+    protected Object reduceConcatenation() {
+        ArrayList<Regexp> regexps = new ArrayList<Regexp>(length());
+        for(int i=0;i<length();++i) {
+            Object obj = get(i);
+            if(obj instanceof Regexp)
+                regexps.add((Regexp)obj);
+            else {
+                Token token = (Token)obj;
+                Regexp regexp = regexps.remove(regexps.size()-1);
+                regexps.add(postOp(regexp, token));
+            }
+        }
+        return Regexp.seq(regexps.toArray(new Regexp[regexps.size()]));
+    }
+    
+    @Override
+    protected Object reduceUnion() {
+        Regexp[] regexps = new Regexp[length()/2];
+        for(int i=1;i<length();i+=2)
+            regexps[i/2] = (Regexp)get(i);
+        return Regexp.or(regexps);
+    }
+    
+    @Override
+    protected Object reduceProductionRhs() {
+        Regexp rhs = (Regexp)get(0);
+        String name = ((Token)get(2)).text;
+        TIntByteHashMap annotations = new TIntByteHashMap();
+        for(int i=4;i<length();i+=3) {
+            Token type = (Token)get(i);
+            int id = getId(((Token)get(i+1)).text);
+            annotations.put(id, (byte)(type.id == GrammarTerminals.SHIFT ? 0 : 1));
+        }
+        return new Production(name, 0, rhs, annotations);
+    }
+
+    public Grammar getGrammar() {
+        return new Grammar(
+                productions.toArray(new Production[productions.size()]),
+                terminals.toArray(new String[terminals.size()]),
+                nonterminals.toArray(new String[nonterminals.size()]),
+                initials.toArray()
+                );
+    }
+
+    public static Grammar read(InputStream inputStream) throws IOException {
+        GrammarParserImpl parser = new GrammarParserImpl(new InputStreamReader(inputStream, "UTF-8"));
+        parser.parseFile();
+        return parser.getGrammar();
+    }
+}