]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLLexer.flex
New SCL syntax <<effects>>
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / internal / parsing / parser / SCLLexer.flex
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import org.simantics.scl.compiler.compilation.CompilationContext;
4 import org.simantics.scl.compiler.internal.parsing.Token;
5 import org.simantics.scl.compiler.errors.Locations;
6 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
7 import gnu.trove.list.array.TIntArrayList;
8
9 %%
10
11 %public
12 %char
13 %unicode
14 %class SCLLexer
15 %function nextToken
16 %type Token
17 %yylexthrow SCLSyntaxErrorException
18 %scanerror RuntimeException
19 %eofval{
20     return sym(SCLTerminals.EOF);
21 %eofval}
22
23 %{
24     public SCLParserOptions options = SCLParserOptions.DEFAULT;
25     int stringStart;
26     TIntArrayList parenCountStack = new TIntArrayList(2);
27     int parenCount = 0;
28     TIntArrayList stateStack = new TIntArrayList(2);
29
30     StringBuffer string = new StringBuffer();
31     CompilationContext context;
32     
33     private Token sym(int id) {
34         return new Token(id, yychar, yychar+yylength(), yytext());
35     }
36     private Token sym(int id, String text) {
37         return new Token(id, yychar, yychar+yylength(), text);
38     }
39     public void setCompilationContext(CompilationContext context) {
40         this.context = context;
41     }
42     public boolean supportCHR() {
43         return context.header == null ? false : context.header.chr;
44     }
45     public boolean supportEDO() {
46         return context.header == null ? false : context.header.edo;
47     }
48 %}
49
50 letter          = [a-zA-Z_]
51 digit           = [0-9]
52 hexDigit        = [0-9a-fA-F]
53 id_char         = {letter} | {digit} | "'"
54 ord_symbol_char = [!$%&*+\/<=>@\\\^|\-:~]
55 symbol_char     = {ord_symbol_char} | "#"
56
57 prefix          = {letter} {id_char}* "."
58
59 annotation_id   = "@" {letter} {id_char}*
60                 | "(" ","+ ")"
61 id              = {prefix}* ("?")? {letter} {id_char}*
62                 | "(" ","+ ")"
63 symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
64 separatedDot    = "." [ \n\r]+
65 escaped_symbol_inner = {symbol_char}+ | "."
66 escaped_symbol  = "(" {escaped_symbol_inner} ")"
67 escaped_id      = "`" {id} "`"
68 queryOp         = "<" [&|!?] ">"
69 integer         = {digit}+
70 float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
71                 | {digit}+ [eE] ("-")? {digit}+
72 whitespace      = [ ]+
73 c_comment       = "//" [^\n\r]*
74 cpp_comment     = "/*" ~"*/"
75
76 //xmlName         = {letter} ({letter} | {digit} | [-.])*
77 existentialVar  = "?" {letter} {id_char}*
78
79 char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
80
81 %state STRING
82 %state LONG_STRING
83 %state CHAR
84 /*%state XML
85 %state XML_TAG*/
86
87 %%
88
89 <YYINITIAL> {
90   {c_comment}     { return sym(SCLTerminals.COMMENT); }
91   {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
92   forall          { return sym(SCLTerminals.FORALL); }
93   if              { return sym(SCLTerminals.IF); }
94   then            { return sym(SCLTerminals.THEN); }
95   else            { return sym(SCLTerminals.ELSE); }
96   where           { return sym(SCLTerminals.WHERE); }
97   when            { return sym(SCLTerminals.WHEN); }
98   ruleset         { return sym(supportCHR() ? SCLTerminals.RULESET : SCLTerminals.ID); }
99   rule            { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.RULE); }
100   abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
101   extends         { return sym(SCLTerminals.EXTENDS); }
102   mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
103   transformation  { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.TRANSFORMATION); }
104   select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
105   select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
106   select          { return sym(supportCHR() ? SCLTerminals.CHR_SELECT : SCLTerminals.SELECT); }
107   enforce         { return sym(SCLTerminals.ENFORCE); }
108   do              { return sym(SCLTerminals.DO); }
109   eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
110   mdo             { return sym(SCLTerminals.MDO); }
111   edo             { return sym(supportEDO() ? SCLTerminals.EDO : SCLTerminals.ID); }
112   class           { return sym(SCLTerminals.CLASS); }
113   effect          { return sym(SCLTerminals.EFFECT); }
114   match           { return sym(SCLTerminals.MATCH); }
115   with            { return sym(SCLTerminals.WITH); }
116   let             { return sym(SCLTerminals.LET); }
117   in              { return sym(SCLTerminals.IN); }
118   instance        { return sym(SCLTerminals.INSTANCE); }
119   deriving        { return sym(SCLTerminals.DERIVING); }
120   data            { return sym(SCLTerminals.DATA); }
121   type            { return sym(SCLTerminals.TYPE); }
122   infixl          { return sym(SCLTerminals.INFIXL); }
123   infixr          { return sym(SCLTerminals.INFIXR); }
124   infix           { return sym(SCLTerminals.INFIX); }
125   include         { return sym(SCLTerminals.INCLUDE); }  
126   import          { return sym(SCLTerminals.IMPORT); }
127   importJava      { return sym(SCLTerminals.IMPORTJAVA); }
128   hiding          { return sym(SCLTerminals.HIDING); }
129 //  relation        { return sym(SCLTerminals.RELATION); }
130   as              { return sym(SCLTerminals.AS); }
131   by              { return sym(SCLTerminals.BY); }
132   constraint      { return sym(SCLTerminals.CONSTRAINT); }
133   {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
134   "@"             { return sym(SCLTerminals.AT); }
135   "{"             { return sym(SCLTerminals.LBRACE); }
136   "}"             { return sym(SCLTerminals.RBRACE); }
137   "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
138   ")"             { --parenCount;
139                     if(parenCount == 0 && !parenCountStack.isEmpty()) { 
140                        parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
141                        string.setLength(0);
142                        stringStart=yychar;
143                        yybegin(stateStack.removeAt(stateStack.size()-1));
144                        return sym(SCLTerminals.CONTINUE_STRING);
145                     }
146                     else
147                        return sym(SCLTerminals.RPAREN);
148                   }
149   "["             { return sym(SCLTerminals.LBRACKET); }
150   "]"             { return sym(SCLTerminals.RBRACKET); }
151   "->"            { return sym(SCLTerminals.ARROW); }
152   "<-"            { return sym(SCLTerminals.BINDS); }
153   "=>"            { return sym(SCLTerminals.IMPLIES); }
154   ":-"            { return sym(SCLTerminals.FOLLOWS); }
155   ","             { return sym(SCLTerminals.COMMA); }
156   ".."            { return sym(SCLTerminals.DOTDOT, "."); }
157   {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
158   "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
159   "-"             { return sym(SCLTerminals.MINUS, "-"); }
160 //  "<" {existentialVar} ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
161   "<<"            { return sym(SCLTerminals.DOUBLE_LESS, "<<"); }
162   "<"             { return sym(SCLTerminals.LESS, "<"); }
163   ">>"            { return sym(SCLTerminals.DOUBLE_GREATER, ">>"); }
164   ">"             { return sym(SCLTerminals.GREATER, ">"); }
165   ";"             { return sym(SCLTerminals.SEMICOLON); }
166   "|"             { return sym(SCLTerminals.BAR); }
167   "="             { return sym(SCLTerminals.EQUALS); }
168   "::"            { return sym(SCLTerminals.HASTYPE); }
169   ":"             { return sym(SCLTerminals.COLON); }
170   "\\" " "* match { return sym(SCLTerminals.LAMBDA_MATCH); }
171   "\\"            { return sym(SCLTerminals.LAMBDA); }
172   "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
173   "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
174   {char_literal}  { return sym(SCLTerminals.CHAR); }
175   "_"             { return sym(SCLTerminals.BLANK); }
176   {id}            { return sym(SCLTerminals.ID); }
177   {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
178   {escaped_symbol} { String text = yytext();
179                      return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
180                   }
181   {escaped_id}    { String text = yytext();
182                      return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
183                   }
184   {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
185   "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
186   {float}         { return sym(SCLTerminals.FLOAT); }
187   {integer}       { return sym(SCLTerminals.INTEGER); }
188   {whitespace}    { }
189   \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
190   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
191   \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
192   .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
193 }
194
195 <STRING> {
196   \"              { yybegin(YYINITIAL); 
197                     return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
198                   }
199   [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
200   \\t             { string.append('\t'); }
201   \\n             { string.append('\n'); }
202
203   \\r             { string.append('\r'); }
204   \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
205   \\\(            { parenCountStack.add(parenCount);
206                     parenCount = 1;
207                     stateStack.add(STRING);
208                     yybegin(YYINITIAL); 
209                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
210                   }
211   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
212   \\[\'\"\\]      { string.append(yytext().substring(1)); }
213   \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
214   \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
215   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
216 }
217
218 <LONG_STRING> {
219   "\"\"\""        { yybegin(YYINITIAL); 
220                       return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
221                   }
222   \R              { string.append('\n'); }
223   [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
224   \\\(            { parenCountStack.add(parenCount);
225                     parenCount = 1;
226                     stateStack.add(LONG_STRING);
227                     yybegin(YYINITIAL); 
228                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
229                   }  
230   [\"\\]          { string.append( yytext() ); }
231   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
232   .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
233   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
234 }
235
236 /*
237 <XML> {
238   "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
239   "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
240   "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
241   [^<]+              { return sym(SCLTerminals.XML_TEXT); }
242 }
243
244 <XML_TAG> {
245   "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
246   ">"               { return sym(SCLTerminals.XML_TAG_END); }
247   {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
248   {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
249   "="               { return sym(SCLTerminals.EQUALS); }
250   "\"" [^\"]* "\""  { String text = yytext();
251                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
252   "'"  [^\']* "'"   { String text = yytext();
253                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
254   {whitespace}      { }
255 }
256 */