]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLLexer.flex
0aafe287261101a5ac1d099ae9b3c988e223a40b
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / internal / parsing / parser / SCLLexer.flex
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import org.simantics.scl.compiler.compilation.CompilationContext;
4 import org.simantics.scl.compiler.internal.parsing.Token;
5 import org.simantics.scl.compiler.errors.Locations;
6 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
7 import gnu.trove.list.array.TIntArrayList;
8
9 %%
10
11 %public
12 %char
13 %unicode
14 %class SCLLexer
15 %function nextToken
16 %type Token
17 %yylexthrow SCLSyntaxErrorException
18 %scanerror RuntimeException
19 %eofval{
20     return sym(SCLTerminals.EOF);
21 %eofval}
22
23 %{
24     public SCLParserOptions options = SCLParserOptions.DEFAULT;
25     int stringStart;
26     TIntArrayList parenCountStack = new TIntArrayList(2);
27     int parenCount = 0;
28     TIntArrayList stateStack = new TIntArrayList(2);
29
30     StringBuffer string = new StringBuffer();
31     CompilationContext context;
32     
33     private Token sym(int id) {
34         return new Token(id, yychar, yychar+yylength(), yytext());
35     }
36     private Token sym(int id, String text) {
37         return new Token(id, yychar, yychar+yylength(), text);
38     }
39     public void setCompilationContext(CompilationContext context) {
40         this.context = context;
41     }
42     public boolean supportCHR() {
43         return context.header == null ? false : context.header.chr;
44     }
45 %}
46
47 letter          = [a-zA-Z_]
48 digit           = [0-9]
49 hexDigit        = [0-9a-fA-F]
50 id_char         = {letter} | {digit} | "'"
51 ord_symbol_char = [!$%&*+\/<=>@\\\^|\-:~]
52 symbol_char     = {ord_symbol_char} | "#"
53
54 prefix          = {letter} {id_char}* "."
55
56 annotation_id   = "@" {letter} {id_char}*
57                 | "(" ","+ ")"
58 id              = {prefix}* ("?")? {letter} {id_char}*
59                 | "(" ","+ ")"
60 symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
61 separatedDot    = "." [ \n\r]+
62 escaped_symbol_inner = {symbol_char}+ | "."
63 escaped_symbol  = "(" {escaped_symbol_inner} ")"
64 escaped_id      = "`" {id} "`"
65 queryOp         = "<" [&|!?] ">"
66 integer         = {digit}+
67 float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
68                 | {digit}+ [eE] ("-")? {digit}+
69 whitespace      = [ ]+
70 c_comment       = "//" [^\n\r]*
71 cpp_comment     = "/*" ~"*/"
72
73 //xmlName         = {letter} ({letter} | {digit} | [-.])*
74 existentialVar  = "?" {letter} {id_char}*
75
76 char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
77
78 %state STRING
79 %state LONG_STRING
80 %state CHAR
81 /*%state XML
82 %state XML_TAG*/
83
84 %%
85
86 <YYINITIAL> {
87   {c_comment}     { return sym(SCLTerminals.COMMENT); }
88   {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
89   forall          { return sym(SCLTerminals.FORALL); }
90   if              { return sym(SCLTerminals.IF); }
91   then            { return sym(SCLTerminals.THEN); }
92   else            { return sym(SCLTerminals.ELSE); }
93   where           { return sym(SCLTerminals.WHERE); }
94   when            { return sym(SCLTerminals.WHEN); }
95   ruleset         { return sym(supportCHR() ? SCLTerminals.RULESET : SCLTerminals.ID); }
96   rule            { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.RULE); }
97   abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
98   extends         { return sym(SCLTerminals.EXTENDS); }
99   mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
100   transformation  { return sym(supportCHR() ? SCLTerminals.ID : SCLTerminals.TRANSFORMATION); }
101   select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
102   select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
103   select          { return sym(supportCHR() ? SCLTerminals.CHR_SELECT : SCLTerminals.SELECT); }
104   enforce         { return sym(SCLTerminals.ENFORCE); }
105   do              { return sym(SCLTerminals.DO); }
106   eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
107   mdo             { return sym(SCLTerminals.MDO); }
108   edo             { return sym(SCLTerminals.EDO); }
109   class           { return sym(SCLTerminals.CLASS); }
110   effect          { return sym(SCLTerminals.EFFECT); }
111   match           { return sym(SCLTerminals.MATCH); }
112   with            { return sym(SCLTerminals.WITH); }
113   let             { return sym(SCLTerminals.LET); }
114   in              { return sym(SCLTerminals.IN); }
115   instance        { return sym(SCLTerminals.INSTANCE); }
116   deriving        { return sym(SCLTerminals.DERIVING); }
117   data            { return sym(SCLTerminals.DATA); }
118   type            { return sym(SCLTerminals.TYPE); }
119   infixl          { return sym(SCLTerminals.INFIXL); }
120   infixr          { return sym(SCLTerminals.INFIXR); }
121   infix           { return sym(SCLTerminals.INFIX); }
122   include         { return sym(SCLTerminals.INCLUDE); }  
123   import          { return sym(SCLTerminals.IMPORT); }
124   importJava      { return sym(SCLTerminals.IMPORTJAVA); }
125   hiding          { return sym(SCLTerminals.HIDING); }
126 //  relation        { return sym(SCLTerminals.RELATION); }
127   as              { return sym(SCLTerminals.AS); }
128   by              { return sym(SCLTerminals.BY); }
129   constraint      { return sym(SCLTerminals.CONSTRAINT); }
130   {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
131   "@"             { return sym(SCLTerminals.AT); }
132   "{"             { return sym(SCLTerminals.LBRACE); }
133   "}"             { return sym(SCLTerminals.RBRACE); }
134   "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
135   ")"             { --parenCount;
136                     if(parenCount == 0 && !parenCountStack.isEmpty()) { 
137                        parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
138                        string.setLength(0);
139                        stringStart=yychar;
140                        yybegin(stateStack.removeAt(stateStack.size()-1));
141                        return sym(SCLTerminals.CONTINUE_STRING);
142                     }
143                     else
144                        return sym(SCLTerminals.RPAREN);
145                   }
146   "["             { return sym(SCLTerminals.LBRACKET); }
147   "]"             { return sym(SCLTerminals.RBRACKET); }
148   "->"            { return sym(SCLTerminals.ARROW); }
149   "<-"            { return sym(SCLTerminals.BINDS); }
150   "=>"            { return sym(SCLTerminals.IMPLIES); }
151   ":-"            { return sym(SCLTerminals.FOLLOWS); }
152   ","             { return sym(SCLTerminals.COMMA); }
153   ".."            { return sym(SCLTerminals.DOTDOT, "."); }
154   {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
155   "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
156   "-"             { return sym(SCLTerminals.MINUS, "-"); }
157 //  "<" {existentialVar} ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
158   "<"             { return sym(SCLTerminals.LESS, "<"); }
159   ">"             { return sym(SCLTerminals.GREATER, ">"); }
160   ";"             { return sym(SCLTerminals.SEMICOLON); }
161   "|"             { return sym(SCLTerminals.BAR); }
162   "="             { return sym(SCLTerminals.EQUALS); }
163   "::"            { return sym(SCLTerminals.HASTYPE); }
164   ":"             { return sym(SCLTerminals.COLON); }
165   "\\" " "* match { return sym(SCLTerminals.LAMBDA_MATCH); }
166   "\\"            { return sym(SCLTerminals.LAMBDA); }
167   "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
168   "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
169   {char_literal}  { return sym(SCLTerminals.CHAR); }
170   "_"             { return sym(SCLTerminals.BLANK); }
171   {id}            { return sym(SCLTerminals.ID); }
172   {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
173   {escaped_symbol} { String text = yytext();
174                      return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
175                   }
176   {escaped_id}    { String text = yytext();
177                      return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
178                   }
179   {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
180   "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
181   {float}         { return sym(SCLTerminals.FLOAT); }
182   {integer}       { return sym(SCLTerminals.INTEGER); }
183   {whitespace}    { }
184   \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
185   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
186   \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
187   .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
188 }
189
190 <STRING> {
191   \"              { yybegin(YYINITIAL); 
192                     return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
193                   }
194   [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
195   \\t             { string.append('\t'); }
196   \\n             { string.append('\n'); }
197
198   \\r             { string.append('\r'); }
199   \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
200   \\\(            { parenCountStack.add(parenCount);
201                     parenCount = 1;
202                     stateStack.add(STRING);
203                     yybegin(YYINITIAL); 
204                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
205                   }
206   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
207   \\[\'\"\\]      { string.append(yytext().substring(1)); }
208   \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
209   \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
210   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
211 }
212
213 <LONG_STRING> {
214   "\"\"\""        { yybegin(YYINITIAL); 
215                       return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
216                   }
217   \R              { string.append('\n'); }
218   [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
219   \\\(            { parenCountStack.add(parenCount);
220                     parenCount = 1;
221                     stateStack.add(LONG_STRING);
222                     yybegin(YYINITIAL); 
223                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
224                   }  
225   [\"\\]          { string.append( yytext() ); }
226   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
227   .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
228   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
229 }
230
231 /*
232 <XML> {
233   "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
234   "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
235   "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
236   [^<]+              { return sym(SCLTerminals.XML_TEXT); }
237 }
238
239 <XML_TAG> {
240   "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
241   ">"               { return sym(SCLTerminals.XML_TAG_END); }
242   {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
243   {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
244   "="               { return sym(SCLTerminals.EQUALS); }
245   "\"" [^\"]* "\""  { String text = yytext();
246                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
247   "'"  [^\']* "'"   { String text = yytext();
248                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
249   {whitespace}      { }
250 }
251 */