]> gerrit.simantics Code Review - simantics/platform.git/blob
0f45c2511b297a3e6575c7a953c6cd5e9f752c34
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import org.simantics.scl.compiler.compilation.CompilationContext;
4 import org.simantics.scl.compiler.internal.parsing.Token;
5 import org.simantics.scl.compiler.errors.Locations;
6 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
7 import gnu.trove.list.array.TIntArrayList;
8
9 %%
10
11 %public
12 %char
13 %unicode
14 %class SCLLexer
15 %function nextToken
16 %type Token
17 %yylexthrow SCLSyntaxErrorException
18 %scanerror RuntimeException
19 %eofval{
20     return sym(SCLTerminals.EOF);
21 %eofval}
22
23 %{
24     public SCLParserOptions options = SCLParserOptions.DEFAULT;
25     int stringStart;
26     TIntArrayList parenCountStack = new TIntArrayList(2);
27     int parenCount = 0;
28     TIntArrayList stateStack = new TIntArrayList(2);
29
30     StringBuffer string = new StringBuffer();
31     CompilationContext context;
32     
33     private Token sym(int id) {
34         return new Token(id, yychar, yychar+yylength(), yytext());
35     }
36     private Token sym(int id, String text) {
37         return new Token(id, yychar, yychar+yylength(), text);
38     }
39     public void setCompilationContext(CompilationContext context) {
40         this.context = context;
41     }
42     public boolean supportCHR() {
43         return context.header == null ? false : context.header.chr;
44     }
45 %}
46
47 letter          = [a-zA-Z_]
48 digit           = [0-9]
49 hexDigit        = [0-9a-fA-F]
50 id_char         = {letter} | {digit} | "'"
51 ord_symbol_char = [!$%&*+\/<=>@\\\^|\-:~]
52 symbol_char     = {ord_symbol_char} | "#"
53
54 prefix          = {letter} {id_char}* "."
55
56 annotation_id   = "@" {letter} {id_char}*
57                 | "(" ","+ ")"
58 id              = {prefix}* ("?")? {letter} {id_char}*
59                 | "(" ","+ ")"
60 symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
61 separatedDot    = "." [ \n\r]+
62 escaped_symbol_inner = {symbol_char}+ | "."
63 escaped_symbol  = "(" {escaped_symbol_inner} ")"
64 escaped_id      = "`" {id} "`"
65 queryOp         = "<" [&|!?] ">"
66 integer         = {digit}+
67 float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
68                 | {digit}+ [eE] ("-")? {digit}+
69 whitespace      = [ ]+
70 c_comment       = "//" [^\n\r]*
71 cpp_comment     = "/*" ~"*/"
72
73 //xmlName         = {letter} ({letter} | {digit} | [-.])*
74 existentialVar  = "?" {letter} {id_char}*
75
76 char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
77
78 %state STRING
79 %state LONG_STRING
80 %state CHAR
81 /*%state XML
82 %state XML_TAG*/
83
84 %%
85
86 <YYINITIAL> {
87   {c_comment}     { return sym(SCLTerminals.COMMENT); }
88   {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
89   forall          { return sym(SCLTerminals.FORALL); }
90   if              { return sym(SCLTerminals.IF); }
91   then            { return sym(SCLTerminals.THEN); }
92   else            { return sym(SCLTerminals.ELSE); }
93   where           { return sym(SCLTerminals.WHERE); }
94   when            { return sym(SCLTerminals.WHEN); }
95   ruleset         { return sym(supportCHR() ? SCLTerminals.RULESET : SCLTerminals.ID); }
96   rule            { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.RULE); }
97   abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
98   extends         { return sym(SCLTerminals.EXTENDS); }
99   mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
100   transformation  { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.TRANSFORMATION); }
101   select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
102   select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
103   select          { return sym(SCLTerminals.SELECT); }
104   enforce         { return sym(SCLTerminals.ENFORCE); }
105   do              { return sym(SCLTerminals.DO); }
106   eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
107   mdo             { return sym(SCLTerminals.MDO); }
108   class           { return sym(SCLTerminals.CLASS); }
109   effect          { return sym(SCLTerminals.EFFECT); }
110   match           { return sym(SCLTerminals.MATCH); }
111   with            { return sym(SCLTerminals.WITH); }
112   let             { return sym(SCLTerminals.LET); }
113   in              { return sym(SCLTerminals.IN); }
114   instance        { return sym(SCLTerminals.INSTANCE); }
115   deriving        { return sym(SCLTerminals.DERIVING); }
116   data            { return sym(SCLTerminals.DATA); }
117   type            { return sym(SCLTerminals.TYPE); }
118   infixl          { return sym(SCLTerminals.INFIXL); }
119   infixr          { return sym(SCLTerminals.INFIXR); }
120   infix           { return sym(SCLTerminals.INFIX); }
121   include         { return sym(SCLTerminals.INCLUDE); }  
122   import          { return sym(SCLTerminals.IMPORT); }
123   importJava      { return sym(SCLTerminals.IMPORTJAVA); }
124   hiding          { return sym(SCLTerminals.HIDING); }
125 //  relation        { return sym(SCLTerminals.RELATION); }
126   as              { return sym(SCLTerminals.AS); }
127   by              { return sym(SCLTerminals.BY); }
128   constraint      { return sym(SCLTerminals.CONSTRAINT); }
129   {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
130   "@"             { return sym(SCLTerminals.AT); }
131   "{"             { return sym(SCLTerminals.LBRACE); }
132   "}"             { return sym(SCLTerminals.RBRACE); }
133   "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
134   ")"             { --parenCount;
135                     if(parenCount == 0 && !parenCountStack.isEmpty()) { 
136                        parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
137                        string.setLength(0);
138                        stringStart=yychar;
139                        yybegin(stateStack.removeAt(stateStack.size()-1));
140                        return sym(SCLTerminals.CONTINUE_STRING);
141                     }
142                     else
143                        return sym(SCLTerminals.RPAREN);
144                   }
145   "["             { return sym(SCLTerminals.LBRACKET); }
146   "]"             { return sym(SCLTerminals.RBRACKET); }
147   "->"            { return sym(SCLTerminals.ARROW); }
148   "<-"            { return sym(SCLTerminals.BINDS); }
149   "=>"            { return sym(SCLTerminals.IMPLIES); }
150   ":-"            { return sym(SCLTerminals.FOLLOWS); }
151   ","             { return sym(SCLTerminals.COMMA); }
152   ".."            { return sym(SCLTerminals.DOTDOT, "."); }
153   {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
154   "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
155   "-"             { return sym(SCLTerminals.MINUS, "-"); }
156 //  "<" {existentialVar} ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
157   "<"             { return sym(SCLTerminals.LESS, "<"); }
158   ">"             { return sym(SCLTerminals.GREATER, ">"); }
159   ";"             { return sym(SCLTerminals.SEMICOLON); }
160   "|"             { return sym(SCLTerminals.BAR); }
161   "="             { return sym(SCLTerminals.EQUALS); }
162   "::"            { return sym(SCLTerminals.HASTYPE); }
163   ":"             { return sym(SCLTerminals.COLON); }
164   "\\" " "* match { return sym(SCLTerminals.LAMBDA_MATCH); }
165   "\\"            { return sym(SCLTerminals.LAMBDA); }
166   "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
167   "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
168   {char_literal}  { return sym(SCLTerminals.CHAR); }
169   "_"             { return sym(SCLTerminals.BLANK); }
170   {id}            { return sym(SCLTerminals.ID); }
171   {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
172   {escaped_symbol} { String text = yytext();
173                      return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
174                   }
175   {escaped_id}    { String text = yytext();
176                      return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
177                   }
178   {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
179   "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
180   {float}         { return sym(SCLTerminals.FLOAT); }
181   {integer}       { return sym(SCLTerminals.INTEGER); }
182   {whitespace}    { }
183   \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
184   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
185   \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
186   .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
187 }
188
189 <STRING> {
190   \"              { yybegin(YYINITIAL); 
191                     return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
192                   }
193   [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
194   \\t             { string.append('\t'); }
195   \\n             { string.append('\n'); }
196
197   \\r             { string.append('\r'); }
198   \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
199   \\\(            { parenCountStack.add(parenCount);
200                     parenCount = 1;
201                     stateStack.add(STRING);
202                     yybegin(YYINITIAL); 
203                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
204                   }
205   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
206   \\[\'\"\\]      { string.append(yytext().substring(1)); }
207   \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
208   \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
209   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
210 }
211
212 <LONG_STRING> {
213   "\"\"\""        { yybegin(YYINITIAL); 
214                       return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
215                   }
216   \R              { string.append('\n'); }
217   [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
218   \\\(            { parenCountStack.add(parenCount);
219                     parenCount = 1;
220                     stateStack.add(LONG_STRING);
221                     yybegin(YYINITIAL); 
222                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
223                   }  
224   [\"\\]          { string.append( yytext() ); }
225   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
226   .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
227   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
228 }
229
230 /*
231 <XML> {
232   "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
233   "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
234   "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
235   [^<]+              { return sym(SCLTerminals.XML_TEXT); }
236 }
237
238 <XML_TAG> {
239   "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
240   ">"               { return sym(SCLTerminals.XML_TAG_END); }
241   {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
242   {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
243   "="               { return sym(SCLTerminals.EQUALS); }
244   "\"" [^\"]* "\""  { String text = yytext();
245                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
246   "'"  [^\']* "'"   { String text = yytext();
247                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
248   {whitespace}      { }
249 }
250 */