]> gerrit.simantics Code Review - simantics/platform.git/blob
e673b294b94b5e8170e6960778a0176b1c37dcb9
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import org.simantics.scl.compiler.internal.parsing.Token;
4 import org.simantics.scl.compiler.errors.Locations;
5 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
6 import gnu.trove.list.array.TIntArrayList;
7
8 %%
9
10 %public
11 %char
12 %unicode
13 %class SCLLexer
14 %function nextToken
15 %type Token
16 %yylexthrow SCLSyntaxErrorException
17 %scanerror RuntimeException
18 %eofval{
19     return sym(SCLTerminals.EOF);
20 %eofval}
21
22 %{
23     public SCLParserOptions options = SCLParserOptions.DEFAULT;
24     int stringStart;
25     TIntArrayList parenCountStack = new TIntArrayList(2);
26     int parenCount = 0;
27     TIntArrayList stateStack = new TIntArrayList(2);
28
29     StringBuffer string = new StringBuffer();
30     
31     private Token sym(int id) {
32         return new Token(id, yychar, yychar+yylength(), yytext());
33     }
34     private Token sym(int id, String text) {
35         return new Token(id, yychar, yychar+yylength(), text);
36     }
37 %}
38
39 letter          = [a-zA-Z_]
40 digit           = [0-9]
41 hexDigit        = [0-9a-fA-F]
42 id_char         = {letter} | {digit} | "'"
43 ord_symbol_char = [!$%&*+\/<=>@\\\^|\-:~]
44 symbol_char     = {ord_symbol_char} | "#"
45
46 prefix          = {letter} {id_char}* "."
47
48 annotation_id   = "@" {letter} {id_char}*
49                 | "(" ","+ ")"
50 id              = {prefix}* ("?")? {letter} {id_char}*
51                 | "(" ","+ ")"
52 symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
53 separatedDot    = "." [ \n\r]+
54 escaped_symbol_inner = {symbol_char}+ | "."
55 escaped_symbol  = "(" {escaped_symbol_inner} ")"
56 escaped_id      = "`" {id} "`"
57 queryOp         = "<" [&|!?] ">"
58 integer         = {digit}+
59 float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
60                 | {digit}+ [eE] ("-")? {digit}+
61 whitespace      = [ ]+
62 c_comment       = "//" [^\n\r]*
63 cpp_comment     = "/*" ~"*/"
64
65 //xmlName         = {letter} ({letter} | {digit} | [-.])*
66 existentialVar  = "?" {letter} {id_char}*
67
68 char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
69
70 %state STRING
71 %state LONG_STRING
72 %state CHAR
73 /*%state XML
74 %state XML_TAG*/
75
76 %%
77
78 <YYINITIAL> {
79   {c_comment}     { return sym(SCLTerminals.COMMENT); }
80   {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
81   forall          { return sym(SCLTerminals.FORALL); }
82   if              { return sym(SCLTerminals.IF); }
83   then            { return sym(SCLTerminals.THEN); }
84   else            { return sym(SCLTerminals.ELSE); }
85   where           { return sym(SCLTerminals.WHERE); }
86   when            { return sym(SCLTerminals.WHEN); }
87   ruleset         { return sym(SCLTerminals.RULESET); }
88   rule            { return sym(SCLTerminals.RULE); }
89   abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
90   extends         { return sym(SCLTerminals.EXTENDS); }
91   mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
92   transformation  { return sym(SCLTerminals.TRANSFORMATION); }
93   select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
94   select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
95   select          { return sym(SCLTerminals.SELECT); }
96   enforce         { return sym(SCLTerminals.ENFORCE); }
97   do              { return sym(SCLTerminals.DO); }
98   eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
99   mdo             { return sym(SCLTerminals.MDO); }
100   class           { return sym(SCLTerminals.CLASS); }
101   effect          { return sym(SCLTerminals.EFFECT); }
102   match           { return sym(SCLTerminals.MATCH); }
103   with            { return sym(SCLTerminals.WITH); }
104   let             { return sym(SCLTerminals.LET); }
105   in              { return sym(SCLTerminals.IN); }
106   instance        { return sym(SCLTerminals.INSTANCE); }
107   deriving        { return sym(SCLTerminals.DERIVING); }
108   data            { return sym(SCLTerminals.DATA); }
109   type            { return sym(SCLTerminals.TYPE); }
110   infixl          { return sym(SCLTerminals.INFIXL); }
111   infixr          { return sym(SCLTerminals.INFIXR); }
112   infix           { return sym(SCLTerminals.INFIX); }
113   include         { return sym(SCLTerminals.INCLUDE); }  
114   import          { return sym(SCLTerminals.IMPORT); }
115   importJava      { return sym(SCLTerminals.IMPORTJAVA); }
116   hiding          { return sym(SCLTerminals.HIDING); }
117 //  relation        { return sym(SCLTerminals.RELATION); }
118   as              { return sym(SCLTerminals.AS); }
119   by              { return sym(SCLTerminals.BY); }
120   constraint      { return sym(SCLTerminals.CONSTRAINT); }
121   {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
122   "@"             { return sym(SCLTerminals.AT); }
123   "{"             { return sym(SCLTerminals.LBRACE); }
124   "}"             { return sym(SCLTerminals.RBRACE); }
125   "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
126   ")"             { --parenCount;
127                     if(parenCount == 0 && !parenCountStack.isEmpty()) { 
128                        parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
129                        string.setLength(0);
130                        stringStart=yychar;
131                        yybegin(stateStack.removeAt(stateStack.size()-1));
132                        return sym(SCLTerminals.CONTINUE_STRING);
133                     }
134                     else
135                        return sym(SCLTerminals.RPAREN);
136                   }
137   "["             { return sym(SCLTerminals.LBRACKET); }
138   "]"             { return sym(SCLTerminals.RBRACKET); }
139   "->"            { return sym(SCLTerminals.ARROW); }
140   "<-"            { return sym(SCLTerminals.BINDS); }
141   "=>"            { return sym(SCLTerminals.IMPLIES); }
142   ":-"            { return sym(SCLTerminals.FOLLOWS); }
143   ","             { return sym(SCLTerminals.COMMA); }
144   ".."            { return sym(SCLTerminals.DOTDOT, "."); }
145   {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
146   "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
147   "-"             { return sym(SCLTerminals.MINUS, "-"); }
148 //  "<" {existentialVar} ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
149   "<"             { return sym(SCLTerminals.LESS, "<"); }
150   ">"             { return sym(SCLTerminals.GREATER, ">"); }
151   ";"             { return sym(SCLTerminals.SEMICOLON); }
152   "|"             { return sym(SCLTerminals.BAR); }
153   "="             { return sym(SCLTerminals.EQUALS); }
154   "::"            { return sym(SCLTerminals.HASTYPE); }
155   ":"             { return sym(SCLTerminals.COLON); }
156   "\\" " "* match { return sym(SCLTerminals.LAMBDA_MATCH); }
157   "\\"            { return sym(SCLTerminals.LAMBDA); }
158   "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
159   "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
160   {char_literal}  { return sym(SCLTerminals.CHAR); }
161   "_"             { return sym(SCLTerminals.BLANK); }
162   {id}            { return sym(SCLTerminals.ID); }
163   {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
164   {escaped_symbol} { String text = yytext();
165                      return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
166                   }
167   {escaped_id}    { String text = yytext();
168                      return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
169                   }
170   {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
171   "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
172   {float}         { return sym(SCLTerminals.FLOAT); }
173   {integer}       { return sym(SCLTerminals.INTEGER); }
174   {whitespace}    { }
175   \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
176   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
177   \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
178   .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
179 }
180
181 <STRING> {
182   \"              { yybegin(YYINITIAL); 
183                     return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
184                   }
185   [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
186   \\t             { string.append('\t'); }
187   \\n             { string.append('\n'); }
188
189   \\r             { string.append('\r'); }
190   \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
191   \\\(            { parenCountStack.add(parenCount);
192                     parenCount = 1;
193                     stateStack.add(STRING);
194                     yybegin(YYINITIAL); 
195                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
196                   }
197   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
198   \\[\'\"\\]      { string.append(yytext().substring(1)); }
199   \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
200   \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
201   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
202 }
203
204 <LONG_STRING> {
205   "\"\"\""        { yybegin(YYINITIAL); 
206                       return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
207                   }
208   \R              { string.append('\n'); }
209   [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
210   \\\(            { parenCountStack.add(parenCount);
211                     parenCount = 1;
212                     stateStack.add(LONG_STRING);
213                     yybegin(YYINITIAL); 
214                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
215                   }  
216   [\"\\]          { string.append( yytext() ); }
217   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
218   .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
219   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
220 }
221
222 /*
223 <XML> {
224   "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
225   "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
226   "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
227   [^<]+              { return sym(SCLTerminals.XML_TEXT); }
228 }
229
230 <XML_TAG> {
231   "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
232   ">"               { return sym(SCLTerminals.XML_TAG_END); }
233   {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
234   {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
235   "="               { return sym(SCLTerminals.EQUALS); }
236   "\"" [^\"]* "\""  { String text = yytext();
237                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
238   "'"  [^\']* "'"   { String text = yytext();
239                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
240   {whitespace}      { }
241 }
242 */