]> gerrit.simantics Code Review - simantics/platform.git/blob
595c082ca4c70151f946cf37a63353d4a078bd2e
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import org.simantics.scl.compiler.compilation.CompilationContext;
4 import org.simantics.scl.compiler.internal.parsing.Token;
5 import org.simantics.scl.compiler.errors.Locations;
6 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
7 import gnu.trove.list.array.TIntArrayList;
8
9 %%
10
11 %public
12 %char
13 %unicode
14 %class SCLLexer
15 %function nextToken
16 %type Token
17 %yylexthrow SCLSyntaxErrorException
18 %scanerror RuntimeException
19 %eofval{
20     return sym(SCLTerminals.EOF);
21 %eofval}
22
23 %{
24     public SCLParserOptions options = SCLParserOptions.DEFAULT;
25     int stringStart;
26     TIntArrayList parenCountStack = new TIntArrayList(2);
27     int parenCount = 0;
28     TIntArrayList stateStack = new TIntArrayList(2);
29
30     StringBuffer string = new StringBuffer();
31     CompilationContext context;
32     
33     private Token sym(int id) {
34         return new Token(id, yychar, yychar+yylength(), yytext());
35     }
36     private Token sym(int id, String text) {
37         return new Token(id, yychar, yychar+yylength(), text);
38     }
39     public void setCompilationContext(CompilationContext context) {
40         this.context = context;
41     }
42     public boolean supportCHR() {
43         return context.header == null ? false : context.header.chr;
44     }
45     public boolean supportEDO() {
46         return context.header == null ? false : context.header.edo;
47     }
48 %}
49
50 letter          = [a-zA-Z_]
51 digit           = [0-9]
52 hexDigit        = [0-9a-fA-F]
53 id_char         = {letter} | {digit} | "'"
54 ord_symbol_char = [!$%&*+\/<=>@\\\^|\-:~]
55 symbol_char     = {ord_symbol_char} | "#"
56
57 prefix          = {letter} {id_char}* "."
58
59 annotation_id   = "@" {letter} {id_char}*
60                 | "(" ","+ ")"
61 id              = {prefix}* ("?")? {letter} {id_char}*
62                 | "(" ","+ ")"
63 symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
64 separatedDot    = "." [ \n\r]+
65 escaped_symbol_inner = {symbol_char}+ | "."
66 escaped_symbol  = "(" {escaped_symbol_inner} ")"
67 escaped_id      = "`" {id} "`"
68 queryOp         = "<" [&|!?] ">"
69 integer         = {digit}+
70 float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
71                 | {digit}+ [eE] ("-")? {digit}+
72 whitespace      = [ ]+
73 c_comment       = "//" [^\n\r]*
74 cpp_comment     = "/*" ~"*/"
75
76 //xmlName         = {letter} ({letter} | {digit} | [-.])*
77 existentialVar  = "?" {letter} {id_char}*
78
79 char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
80
81 %state STRING
82 %state LONG_STRING
83 %state CHAR
84 /*%state XML
85 %state XML_TAG*/
86
87 %%
88
89 <YYINITIAL> {
90   {c_comment}     { return sym(SCLTerminals.COMMENT); }
91   {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
92   forall          { return sym(SCLTerminals.FORALL); }
93   if              { return sym(SCLTerminals.IF); }
94   then            { return sym(SCLTerminals.THEN); }
95   else            { return sym(SCLTerminals.ELSE); }
96   where           { return sym(SCLTerminals.WHERE); }
97   when            { return sym(SCLTerminals.WHEN); }
98   ruleset         { return sym(supportCHR() ? SCLTerminals.RULESET : SCLTerminals.ID); }
99   rule            { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.RULE); }
100   abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
101   extends         { return sym(SCLTerminals.EXTENDS); }
102   mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
103   transformation  { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.TRANSFORMATION); }
104   select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
105   select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
106   select          { return sym(supportCHR() ? SCLTerminals.CHR_SELECT : SCLTerminals.SELECT); }
107   enforce         { return sym(SCLTerminals.ENFORCE); }
108   do              { return sym(SCLTerminals.DO); }
109   eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
110   mdo             { return sym(SCLTerminals.MDO); }
111   edo             { return sym(supportEDO() ? SCLTerminals.EDO : SCLTerminals.ID); }
112   class           { return sym(SCLTerminals.CLASS); }
113   effect          { return sym(SCLTerminals.EFFECT); }
114   match           { return sym(SCLTerminals.MATCH); }
115   with            { return sym(SCLTerminals.WITH); }
116   let             { return sym(SCLTerminals.LET); }
117   in              { return sym(SCLTerminals.IN); }
118   instance        { return sym(SCLTerminals.INSTANCE); }
119   deriving        { return sym(SCLTerminals.DERIVING); }
120   data            { return sym(SCLTerminals.DATA); }
121   type            { return sym(SCLTerminals.TYPE); }
122   infixl          { return sym(SCLTerminals.INFIXL); }
123   infixr          { return sym(SCLTerminals.INFIXR); }
124   infix           { return sym(SCLTerminals.INFIX); }
125   include         { return sym(SCLTerminals.INCLUDE); }  
126   import          { return sym(SCLTerminals.IMPORT); }
127   importJava      { return sym(SCLTerminals.IMPORTJAVA); }
128   hiding          { return sym(SCLTerminals.HIDING); }
129 //  relation        { return sym(SCLTerminals.RELATION); }
130   as              { return sym(SCLTerminals.AS); }
131   by              { return sym(SCLTerminals.BY); }
132   constraint      { return sym(SCLTerminals.CONSTRAINT); }
133   {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
134   "@"             { return sym(SCLTerminals.AT); }
135   "{"             { return sym(SCLTerminals.LBRACE); }
136   "}"             { return sym(SCLTerminals.RBRACE); }
137   "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
138   ")"             { --parenCount;
139                     if(parenCount == 0 && !parenCountStack.isEmpty()) { 
140                        parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
141                        string.setLength(0);
142                        stringStart=yychar;
143                        yybegin(stateStack.removeAt(stateStack.size()-1));
144                        return sym(SCLTerminals.CONTINUE_STRING);
145                     }
146                     else
147                        return sym(SCLTerminals.RPAREN);
148                   }
149   "["             { return sym(SCLTerminals.LBRACKET); }
150   "]"             { return sym(SCLTerminals.RBRACKET); }
151   "->"            { return sym(SCLTerminals.ARROW); }
152   "<-"            { return sym(SCLTerminals.BINDS); }
153   "=>"            { return sym(SCLTerminals.IMPLIES); }
154   ":-"            { return sym(SCLTerminals.FOLLOWS); }
155   ","             { return sym(SCLTerminals.COMMA); }
156   ".."            { return sym(SCLTerminals.DOTDOT, "."); }
157   {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
158   "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
159   "-"             { return sym(SCLTerminals.MINUS, "-"); }
160 //  "<" {existentialVar} ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
161   "<"             { return sym(SCLTerminals.LESS, "<"); }
162   ">"             { return sym(SCLTerminals.GREATER, ">"); }
163   ";"             { return sym(SCLTerminals.SEMICOLON); }
164   "|"             { return sym(SCLTerminals.BAR); }
165   "="             { return sym(SCLTerminals.EQUALS); }
166   "::"            { return sym(SCLTerminals.HASTYPE); }
167   ":"             { return sym(SCLTerminals.COLON); }
168   "\\" " "* match { return sym(SCLTerminals.LAMBDA_MATCH); }
169   "\\"            { return sym(SCLTerminals.LAMBDA); }
170   "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
171   "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
172   {char_literal}  { return sym(SCLTerminals.CHAR); }
173   "_"             { return sym(SCLTerminals.BLANK); }
174   {id}            { return sym(SCLTerminals.ID); }
175   {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
176   {escaped_symbol} { String text = yytext();
177                      return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
178                   }
179   {escaped_id}    { String text = yytext();
180                      return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
181                   }
182   {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
183   "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
184   {float}         { return sym(SCLTerminals.FLOAT); }
185   {integer}       { return sym(SCLTerminals.INTEGER); }
186   {whitespace}    { }
187   \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
188   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
189   \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
190   .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
191 }
192
193 <STRING> {
194   \"              { yybegin(YYINITIAL); 
195                     return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
196                   }
197   [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
198   \\t             { string.append('\t'); }
199   \\n             { string.append('\n'); }
200
201   \\r             { string.append('\r'); }
202   \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
203   \\\(            { parenCountStack.add(parenCount);
204                     parenCount = 1;
205                     stateStack.add(STRING);
206                     yybegin(YYINITIAL); 
207                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
208                   }
209   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
210   \\[\'\"\\]      { string.append(yytext().substring(1)); }
211   \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
212   \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
213   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
214 }
215
216 <LONG_STRING> {
217   "\"\"\""        { yybegin(YYINITIAL); 
218                       return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
219                   }
220   \R              { string.append('\n'); }
221   [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
222   \\\(            { parenCountStack.add(parenCount);
223                     parenCount = 1;
224                     stateStack.add(LONG_STRING);
225                     yybegin(YYINITIAL); 
226                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
227                   }  
228   [\"\\]          { string.append( yytext() ); }
229   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
230   .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
231   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
232 }
233
234 /*
235 <XML> {
236   "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
237   "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
238   "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
239   [^<]+              { return sym(SCLTerminals.XML_TEXT); }
240 }
241
242 <XML_TAG> {
243   "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
244   ">"               { return sym(SCLTerminals.XML_TAG_END); }
245   {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
246   {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
247   "="               { return sym(SCLTerminals.EQUALS); }
248   "\"" [^\"]* "\""  { String text = yytext();
249                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
250   "'"  [^\']* "'"   { String text = yytext();
251                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
252   {whitespace}      { }
253 }
254 */