]> gerrit.simantics Code Review - simantics/platform.git/blob
ecae4e5a54a36b288e0bc6cfff737aec8f16cc8e
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import org.simantics.scl.compiler.internal.parsing.Token;
4 import org.simantics.scl.compiler.errors.Locations;
5 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
6 import gnu.trove.list.array.TIntArrayList;
7
8 %%
9
10 %public
11 %char
12 %unicode
13 %class SCLLexer
14 %function nextToken
15 %type Token
16 %yylexthrow SCLSyntaxErrorException
17 %scanerror RuntimeException
18 %eofval{
19     return sym(SCLTerminals.EOF);
20 %eofval}
21
22 %{
23     public SCLParserOptions options = SCLParserOptions.DEFAULT;
24     int stringStart;
25     TIntArrayList parenCountStack = new TIntArrayList(2);
26     int parenCount = 0;
27     TIntArrayList stateStack = new TIntArrayList(2);
28
29     StringBuffer string = new StringBuffer();
30     
31     private Token sym(int id) {
32         return new Token(id, yychar, yychar+yylength(), yytext());
33     }
34     private Token sym(int id, String text) {
35         return new Token(id, yychar, yychar+yylength(), text);
36     }
37 %}
38
39 letter          = [a-zA-Z_]
40 digit           = [0-9]
41 hexDigit        = [0-9a-fA-F]
42 id_char         = {letter} | {digit} | "'"
43 ord_symbol_char = [!$%&*+\/<=>?@\\\^|\-:~]
44 symbol_char     = {ord_symbol_char} | "#"
45
46 prefix          = {letter} {id_char}* "."
47
48 annotation_id   = "@" {letter} {id_char}*
49                 | "(" ","+ ")"
50 id              = {prefix}* ("?")? {letter} {id_char}*
51                 | "(" ","+ ")"
52 symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
53 separatedDot    = "." [ \n\r]+
54 escaped_symbol_inner = {symbol_char}+ | "."
55 escaped_symbol  = "(" {escaped_symbol_inner} ")"
56 escaped_id      = "`" {id} "`"
57 queryOp         = "<" [&|!?] ">"
58 integer         = {digit}+
59 float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
60                 | {digit}+ [eE] ("-")? {digit}+
61 whitespace      = [ ]+
62 c_comment       = "//" [^\n\r]*
63 cpp_comment     = "/*" ~"*/"
64
65 //xmlName         = {letter} ({letter} | {digit} | [-.])*
66 existentialVar  = "?" {letter} {id_char}*
67
68 char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
69
70 %state STRING
71 %state LONG_STRING
72 %state CHAR
73 /*%state XML
74 %state XML_TAG*/
75
76 %%
77
78 <YYINITIAL> {
79   {c_comment}     { return sym(SCLTerminals.COMMENT); }
80   {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
81   forall          { return sym(SCLTerminals.FORALL); }
82   if              { return sym(SCLTerminals.IF); }
83   then            { return sym(SCLTerminals.THEN); }
84   else            { return sym(SCLTerminals.ELSE); }
85   where           { return sym(SCLTerminals.WHERE); }
86   when            { return sym(SCLTerminals.WHEN); }
87   rule            { return sym(SCLTerminals.RULE); }
88   abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
89   extends         { return sym(SCLTerminals.EXTENDS); }
90   mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
91   transformation  { return sym(SCLTerminals.TRANSFORMATION); }
92   select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
93   select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
94   select          { return sym(SCLTerminals.SELECT); }
95   enforce         { return sym(SCLTerminals.ENFORCE); }
96   do              { return sym(SCLTerminals.DO); }
97   eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
98   mdo             { return sym(SCLTerminals.MDO); }
99   class           { return sym(SCLTerminals.CLASS); }
100   effect          { return sym(SCLTerminals.EFFECT); }
101   match           { return sym(SCLTerminals.MATCH); }
102   with            { return sym(SCLTerminals.WITH); }
103   let             { return sym(SCLTerminals.LET); }
104   in              { return sym(SCLTerminals.IN); }
105   instance        { return sym(SCLTerminals.INSTANCE); }
106   deriving        { return sym(SCLTerminals.DERIVING); }
107   data            { return sym(SCLTerminals.DATA); }
108   type            { return sym(SCLTerminals.TYPE); }
109   infixl          { return sym(SCLTerminals.INFIXL); }
110   infixr          { return sym(SCLTerminals.INFIXR); }
111   infix           { return sym(SCLTerminals.INFIX); }
112   include         { return sym(SCLTerminals.INCLUDE); }  
113   import          { return sym(SCLTerminals.IMPORT); }
114   importJava      { return sym(SCLTerminals.IMPORTJAVA); }
115   hiding          { return sym(SCLTerminals.HIDING); }
116 //  relation        { return sym(SCLTerminals.RELATION); }
117   as              { return sym(SCLTerminals.AS); }
118   by              { return sym(SCLTerminals.BY); }
119   {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
120   "@"             { return sym(SCLTerminals.AT); }
121   "{"             { return sym(SCLTerminals.LBRACE); }
122   "}"             { return sym(SCLTerminals.RBRACE); }
123   "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
124   ")"             { --parenCount;
125                     if(parenCount == 0 && !parenCountStack.isEmpty()) { 
126                        parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
127                        string.setLength(0);
128                        stringStart=yychar;
129                        yybegin(stateStack.removeAt(stateStack.size()-1));
130                        return sym(SCLTerminals.CONTINUE_STRING);
131                     }
132                     else
133                        return sym(SCLTerminals.RPAREN);
134                   }
135   "["             { return sym(SCLTerminals.LBRACKET); }
136   "]"             { return sym(SCLTerminals.RBRACKET); }
137   "->"            { return sym(SCLTerminals.ARROW); }
138   "<-"            { return sym(SCLTerminals.BINDS); }
139   "=>"            { return sym(SCLTerminals.IMPLIES); }
140   ":-"            { return sym(SCLTerminals.FOLLOWS); }
141   ","             { return sym(SCLTerminals.COMMA); }
142   ".."            { return sym(SCLTerminals.DOTDOT, "."); }
143   {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
144   "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
145   "-"             { return sym(SCLTerminals.MINUS, "-"); }
146 //  "<" existentialVar ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
147   "<"             { return sym(SCLTerminals.LESS, "<"); }
148   ">"             { return sym(SCLTerminals.GREATER, ">"); }
149   ";"             { return sym(SCLTerminals.SEMICOLON); }
150   "|"             { return sym(SCLTerminals.BAR); }
151   "="             { return sym(SCLTerminals.EQUALS); }
152   "::"            { return sym(SCLTerminals.HASTYPE); }
153   ":"             { return sym(SCLTerminals.COLON); }
154   "\\"            { return sym(SCLTerminals.LAMBDA); }
155   "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
156   "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
157   {char_literal}  { return sym(SCLTerminals.CHAR); }
158   "_"             { return sym(SCLTerminals.BLANK); }
159   {id}            { return sym(SCLTerminals.ID); }
160   {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
161   {escaped_symbol} { String text = yytext();
162                      return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
163                   }
164   {escaped_id}    { String text = yytext();
165                      return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
166                   }
167   {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
168   "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
169   {float}         { return sym(SCLTerminals.FLOAT); }
170   {integer}       { return sym(SCLTerminals.INTEGER); }
171   {whitespace}    { }
172   \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
173   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
174   \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
175   .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
176 }
177
178 <STRING> {
179   \"              { yybegin(YYINITIAL); 
180                     return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
181                   }
182   [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
183   \\t             { string.append('\t'); }
184   \\n             { string.append('\n'); }
185
186   \\r             { string.append('\r'); }
187   \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
188   \\\(            { parenCountStack.add(parenCount);
189                     parenCount = 1;
190                     stateStack.add(STRING);
191                     yybegin(YYINITIAL); 
192                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
193                   }
194   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
195   \\[\'\"\\]      { string.append(yytext().substring(1)); }
196   \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
197   \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
198   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
199 }
200
201 <LONG_STRING> {
202   "\"\"\""        { yybegin(YYINITIAL); 
203                       return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
204                   }
205   \R              { string.append('\n'); }
206   [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
207   \\\(            { parenCountStack.add(parenCount);
208                     parenCount = 1;
209                     stateStack.add(LONG_STRING);
210                     yybegin(YYINITIAL); 
211                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
212                   }  
213   [\"\\]          { string.append( yytext() ); }
214   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
215   .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
216   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
217 }
218
219 /*
220 <XML> {
221   "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
222   "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
223   "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
224   [^<]+              { return sym(SCLTerminals.XML_TEXT); }
225 }
226
227 <XML_TAG> {
228   "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
229   ">"               { return sym(SCLTerminals.XML_TAG_END); }
230   {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
231   {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
232   "="               { return sym(SCLTerminals.EQUALS); }
233   "\"" [^\"]* "\""  { String text = yytext();
234                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
235   "'"  [^\']* "'"   { String text = yytext();
236                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
237   {whitespace}      { }
238 }
239 */