]> gerrit.simantics Code Review - simantics/platform.git/blob
b064a07aea9fb0a47e01c2010c2d6e50d47a56cc
[simantics/platform.git] /
1 package org.simantics.scl.compiler.internal.parsing.parser;
2
3 import org.simantics.scl.compiler.internal.parsing.Token;
4 import org.simantics.scl.compiler.errors.Locations;
5 import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
6 import gnu.trove.list.array.TIntArrayList;
7
8 %%
9
10 %public
11 %char
12 %unicode
13 %class SCLLexer
14 %function nextToken
15 %type Token
16 %yylexthrow SCLSyntaxErrorException
17 %scanerror RuntimeException
18 %eofval{
19     return sym(SCLTerminals.EOF);
20 %eofval}
21
22 %{
23     public SCLParserOptions options = SCLParserOptions.DEFAULT;
24     int stringStart;
25     TIntArrayList parenCountStack = new TIntArrayList(2);
26     int parenCount = 0;
27     TIntArrayList stateStack = new TIntArrayList(2);
28
29     StringBuffer string = new StringBuffer();
30     
31     private Token sym(int id) {
32         return new Token(id, yychar, yychar+yylength(), yytext());
33     }
34     private Token sym(int id, String text) {
35         return new Token(id, yychar, yychar+yylength(), text);
36     }
37 %}
38
39 letter          = [a-zA-Z_]
40 digit           = [0-9]
41 hexDigit        = [0-9a-fA-F]
42 id_char         = {letter} | {digit} | "'"
43 ord_symbol_char = [!$%&*+\/<=>@\\\^|\-:~]
44 symbol_char     = {ord_symbol_char} | "#"
45
46 prefix          = {letter} {id_char}* "."
47
48 annotation_id   = "@" {letter} {id_char}*
49                 | "(" ","+ ")"
50 id              = {prefix}* ("?")? {letter} {id_char}*
51                 | "(" ","+ ")"
52 symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
53 separatedDot    = "." [ \n\r]+
54 escaped_symbol_inner = {symbol_char}+ | "."
55 escaped_symbol  = "(" {escaped_symbol_inner} ")"
56 escaped_id      = "`" {id} "`"
57 queryOp         = "<" [&|!?] ">"
58 integer         = {digit}+
59 float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
60                 | {digit}+ [eE] ("-")? {digit}+
61 whitespace      = [ ]+
62 c_comment       = "//" [^\n\r]*
63 cpp_comment     = "/*" ~"*/"
64
65 //xmlName         = {letter} ({letter} | {digit} | [-.])*
66 existentialVar  = "?" {letter} {id_char}*
67
68 char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
69
70 %state STRING
71 %state LONG_STRING
72 %state CHAR
73 /*%state XML
74 %state XML_TAG*/
75
76 %%
77
78 <YYINITIAL> {
79   {c_comment}     { return sym(SCLTerminals.COMMENT); }
80   {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
81   forall          { return sym(SCLTerminals.FORALL); }
82   if              { return sym(SCLTerminals.IF); }
83   then            { return sym(SCLTerminals.THEN); }
84   else            { return sym(SCLTerminals.ELSE); }
85   where           { return sym(SCLTerminals.WHERE); }
86   when            { return sym(SCLTerminals.WHEN); }
87   rule            { return sym(SCLTerminals.RULE); }
88   abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
89   extends         { return sym(SCLTerminals.EXTENDS); }
90   mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
91   transformation  { return sym(SCLTerminals.TRANSFORMATION); }
92   select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
93   select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
94   select          { return sym(SCLTerminals.SELECT); }
95   enforce         { return sym(SCLTerminals.ENFORCE); }
96   do              { return sym(SCLTerminals.DO); }
97   eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
98   mdo             { return sym(SCLTerminals.MDO); }
99   class           { return sym(SCLTerminals.CLASS); }
100   effect          { return sym(SCLTerminals.EFFECT); }
101   match           { return sym(SCLTerminals.MATCH); }
102   with            { return sym(SCLTerminals.WITH); }
103   let             { return sym(SCLTerminals.LET); }
104   in              { return sym(SCLTerminals.IN); }
105   instance        { return sym(SCLTerminals.INSTANCE); }
106   deriving        { return sym(SCLTerminals.DERIVING); }
107   data            { return sym(SCLTerminals.DATA); }
108   type            { return sym(SCLTerminals.TYPE); }
109   infixl          { return sym(SCLTerminals.INFIXL); }
110   infixr          { return sym(SCLTerminals.INFIXR); }
111   infix           { return sym(SCLTerminals.INFIX); }
112   include         { return sym(SCLTerminals.INCLUDE); }  
113   import          { return sym(SCLTerminals.IMPORT); }
114   importJava      { return sym(SCLTerminals.IMPORTJAVA); }
115   hiding          { return sym(SCLTerminals.HIDING); }
116 //  relation        { return sym(SCLTerminals.RELATION); }
117   as              { return sym(SCLTerminals.AS); }
118   by              { return sym(SCLTerminals.BY); }
119   constraint      { return sym(SCLTerminals.CONSTRAINT); }
120   {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
121   "@"             { return sym(SCLTerminals.AT); }
122   "{"             { return sym(SCLTerminals.LBRACE); }
123   "}"             { return sym(SCLTerminals.RBRACE); }
124   "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
125   ")"             { --parenCount;
126                     if(parenCount == 0 && !parenCountStack.isEmpty()) { 
127                        parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
128                        string.setLength(0);
129                        stringStart=yychar;
130                        yybegin(stateStack.removeAt(stateStack.size()-1));
131                        return sym(SCLTerminals.CONTINUE_STRING);
132                     }
133                     else
134                        return sym(SCLTerminals.RPAREN);
135                   }
136   "["             { return sym(SCLTerminals.LBRACKET); }
137   "]"             { return sym(SCLTerminals.RBRACKET); }
138   "->"            { return sym(SCLTerminals.ARROW); }
139   "<-"            { return sym(SCLTerminals.BINDS); }
140   "=>"            { return sym(SCLTerminals.IMPLIES); }
141   ":-"            { return sym(SCLTerminals.FOLLOWS); }
142   ","             { return sym(SCLTerminals.COMMA); }
143   ".."            { return sym(SCLTerminals.DOTDOT, "."); }
144   {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
145   "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
146   "-"             { return sym(SCLTerminals.MINUS, "-"); }
147 //  "<" {existentialVar} ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
148   "<"             { return sym(SCLTerminals.LESS, "<"); }
149   ">"             { return sym(SCLTerminals.GREATER, ">"); }
150   ";"             { return sym(SCLTerminals.SEMICOLON); }
151   "|"             { return sym(SCLTerminals.BAR); }
152   "="             { return sym(SCLTerminals.EQUALS); }
153   "::"            { return sym(SCLTerminals.HASTYPE); }
154   ":"             { return sym(SCLTerminals.COLON); }
155   "\\" " "* match { return sym(SCLTerminals.LAMBDA_MATCH); }
156   "\\"            { return sym(SCLTerminals.LAMBDA); }
157   "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
158   "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
159   {char_literal}  { return sym(SCLTerminals.CHAR); }
160   "_"             { return sym(SCLTerminals.BLANK); }
161   {id}            { return sym(SCLTerminals.ID); }
162   {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
163   {escaped_symbol} { String text = yytext();
164                      return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
165                   }
166   {escaped_id}    { String text = yytext();
167                      return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
168                   }
169   {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
170   "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
171   {float}         { return sym(SCLTerminals.FLOAT); }
172   {integer}       { return sym(SCLTerminals.INTEGER); }
173   {whitespace}    { }
174   \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
175   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
176   \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
177   .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
178 }
179
180 <STRING> {
181   \"              { yybegin(YYINITIAL); 
182                     return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
183                   }
184   [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
185   \\t             { string.append('\t'); }
186   \\n             { string.append('\n'); }
187
188   \\r             { string.append('\r'); }
189   \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
190   \\\(            { parenCountStack.add(parenCount);
191                     parenCount = 1;
192                     stateStack.add(STRING);
193                     yybegin(YYINITIAL); 
194                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
195                   }
196   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
197   \\[\'\"\\]      { string.append(yytext().substring(1)); }
198   \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
199   \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
200   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
201 }
202
203 <LONG_STRING> {
204   "\"\"\""        { yybegin(YYINITIAL); 
205                       return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
206                   }
207   \R              { string.append('\n'); }
208   [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
209   \\\(            { parenCountStack.add(parenCount);
210                     parenCount = 1;
211                     stateStack.add(LONG_STRING);
212                     yybegin(YYINITIAL); 
213                     return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
214                   }  
215   [\"\\]          { string.append( yytext() ); }
216   \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
217   .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
218   <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
219 }
220
221 /*
222 <XML> {
223   "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
224   "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
225   "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
226   [^<]+              { return sym(SCLTerminals.XML_TEXT); }
227 }
228
229 <XML_TAG> {
230   "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
231   ">"               { return sym(SCLTerminals.XML_TAG_END); }
232   {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
233   {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
234   "="               { return sym(SCLTerminals.EQUALS); }
235   "\"" [^\"]* "\""  { String text = yytext();
236                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
237   "'"  [^\']* "'"   { String text = yytext();
238                       return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
239   {whitespace}      { }
240 }
241 */