]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLLexer.flex
Migrated source code from Simantics SVN
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / internal / parsing / parser / SCLLexer.flex
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLLexer.flex b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/internal/parsing/parser/SCLLexer.flex
new file mode 100644 (file)
index 0000000..ecae4e5
--- /dev/null
@@ -0,0 +1,239 @@
+package org.simantics.scl.compiler.internal.parsing.parser;
+
+import org.simantics.scl.compiler.internal.parsing.Token;
+import org.simantics.scl.compiler.errors.Locations;
+import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException;
+import gnu.trove.list.array.TIntArrayList;
+
+%%
+
+%public
+%char
+%unicode
+%class SCLLexer
+%function nextToken
+%type Token
+%yylexthrow SCLSyntaxErrorException
+%scanerror RuntimeException
+%eofval{
+    return sym(SCLTerminals.EOF);
+%eofval}
+
+%{
+    public SCLParserOptions options = SCLParserOptions.DEFAULT;
+    int stringStart;
+    TIntArrayList parenCountStack = new TIntArrayList(2);
+    int parenCount = 0;
+    TIntArrayList stateStack = new TIntArrayList(2);
+
+    StringBuffer string = new StringBuffer();
+    
+    private Token sym(int id) {
+        return new Token(id, yychar, yychar+yylength(), yytext());
+    }
+    private Token sym(int id, String text) {
+        return new Token(id, yychar, yychar+yylength(), text);
+    }
+%}
+
+letter          = [a-zA-Z_]
+digit           = [0-9]
+hexDigit        = [0-9a-fA-F]
+id_char         = {letter} | {digit} | "'"
+ord_symbol_char = [!$%&*+\/<=>?@\\\^|\-:~]
+symbol_char     = {ord_symbol_char} | "#"
+
+prefix          = {letter} {id_char}* "."
+
+annotation_id   = "@" {letter} {id_char}*
+                | "(" ","+ ")"
+id              = {prefix}* ("?")? {letter} {id_char}*
+                | "(" ","+ ")"
+symbol          = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+
+separatedDot    = "." [ \n\r]+
+escaped_symbol_inner = {symbol_char}+ | "."
+escaped_symbol  = "(" {escaped_symbol_inner} ")"
+escaped_id      = "`" {id} "`"
+queryOp         = "<" [&|!?] ">"
+integer         = {digit}+
+float           = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)?
+                | {digit}+ [eE] ("-")? {digit}+
+whitespace      = [ ]+
+c_comment       = "//" [^\n\r]*
+cpp_comment     = "/*" ~"*/"
+
+//xmlName         = {letter} ({letter} | {digit} | [-.])*
+existentialVar  = "?" {letter} {id_char}*
+
+char_literal    = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'"
+
+%state STRING
+%state LONG_STRING
+%state CHAR
+/*%state XML
+%state XML_TAG*/
+
+%%
+
+<YYINITIAL> {
+  {c_comment}     { return sym(SCLTerminals.COMMENT); }
+  {cpp_comment}   { return sym(SCLTerminals.COMMENT); }
+  forall          { return sym(SCLTerminals.FORALL); }
+  if              { return sym(SCLTerminals.IF); }
+  then            { return sym(SCLTerminals.THEN); }
+  else            { return sym(SCLTerminals.ELSE); }
+  where           { return sym(SCLTerminals.WHERE); }
+  when            { return sym(SCLTerminals.WHEN); }
+  rule            { return sym(SCLTerminals.RULE); }
+  abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); }
+  extends         { return sym(SCLTerminals.EXTENDS); }
+  mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); }
+  transformation  { return sym(SCLTerminals.TRANSFORMATION); }
+  select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); }
+  select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); }
+  select          { return sym(SCLTerminals.SELECT); }
+  enforce         { return sym(SCLTerminals.ENFORCE); }
+  do              { return sym(SCLTerminals.DO); }
+  eq              { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); }
+  mdo             { return sym(SCLTerminals.MDO); }
+  class           { return sym(SCLTerminals.CLASS); }
+  effect          { return sym(SCLTerminals.EFFECT); }
+  match           { return sym(SCLTerminals.MATCH); }
+  with            { return sym(SCLTerminals.WITH); }
+  let             { return sym(SCLTerminals.LET); }
+  in              { return sym(SCLTerminals.IN); }
+  instance        { return sym(SCLTerminals.INSTANCE); }
+  deriving        { return sym(SCLTerminals.DERIVING); }
+  data            { return sym(SCLTerminals.DATA); }
+  type            { return sym(SCLTerminals.TYPE); }
+  infixl          { return sym(SCLTerminals.INFIXL); }
+  infixr          { return sym(SCLTerminals.INFIXR); }
+  infix           { return sym(SCLTerminals.INFIX); }
+  include         { return sym(SCLTerminals.INCLUDE); }  
+  import          { return sym(SCLTerminals.IMPORT); }
+  importJava      { return sym(SCLTerminals.IMPORTJAVA); }
+  hiding          { return sym(SCLTerminals.HIDING); }
+//  relation        { return sym(SCLTerminals.RELATION); }
+  as              { return sym(SCLTerminals.AS); }
+  by              { return sym(SCLTerminals.BY); }
+  {queryOp}       { return sym(SCLTerminals.QUERY_OP); }
+  "@"             { return sym(SCLTerminals.AT); }
+  "{"             { return sym(SCLTerminals.LBRACE); }
+  "}"             { return sym(SCLTerminals.RBRACE); }
+  "("             { ++parenCount; return sym(SCLTerminals.LPAREN); }
+  ")"             { --parenCount;
+                    if(parenCount == 0 && !parenCountStack.isEmpty()) { 
+                       parenCount = parenCountStack.removeAt(parenCountStack.size()-1);
+                       string.setLength(0);
+                       stringStart=yychar;
+                       yybegin(stateStack.removeAt(stateStack.size()-1));
+                       return sym(SCLTerminals.CONTINUE_STRING);
+                    }
+                    else
+                       return sym(SCLTerminals.RPAREN);
+                  }
+  "["             { return sym(SCLTerminals.LBRACKET); }
+  "]"             { return sym(SCLTerminals.RBRACKET); }
+  "->"            { return sym(SCLTerminals.ARROW); }
+  "<-"            { return sym(SCLTerminals.BINDS); }
+  "=>"            { return sym(SCLTerminals.IMPLIES); }
+  ":-"            { return sym(SCLTerminals.FOLLOWS); }
+  ","             { return sym(SCLTerminals.COMMA); }
+  ".."            { return sym(SCLTerminals.DOTDOT, "."); }
+  {separatedDot}  { return sym(SCLTerminals.SEPARATED_DOT, "."); }
+  "."             { return sym(SCLTerminals.ATTACHED_DOT, "."); }
+  "-"             { return sym(SCLTerminals.MINUS, "-"); }
+//  "<" existentialVar ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); }
+  "<"             { return sym(SCLTerminals.LESS, "<"); }
+  ">"             { return sym(SCLTerminals.GREATER, ">"); }
+  ";"             { return sym(SCLTerminals.SEMICOLON); }
+  "|"             { return sym(SCLTerminals.BAR); }
+  "="             { return sym(SCLTerminals.EQUALS); }
+  "::"            { return sym(SCLTerminals.HASTYPE); }
+  ":"             { return sym(SCLTerminals.COLON); }
+  "\\"            { return sym(SCLTerminals.LAMBDA); }
+  "\"\"\""        { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); }
+  "\""            { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); }
+  {char_literal}  { return sym(SCLTerminals.CHAR); }
+  "_"             { return sym(SCLTerminals.BLANK); }
+  {id}            { return sym(SCLTerminals.ID); }
+  {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); }
+  {escaped_symbol} { String text = yytext();
+                     return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); 
+                  }
+  {escaped_id}    { String text = yytext();
+                     return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); 
+                  }
+  {symbol}        { return sym(SCLTerminals.SYMBOL, yytext().trim()); }
+  "#"             { return sym(SCLTerminals.ATTACHED_HASH, "#"); }
+  {float}         { return sym(SCLTerminals.FLOAT); }
+  {integer}       { return sym(SCLTerminals.INTEGER); }
+  {whitespace}    { }
+  \R              { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); }
+  \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
+  \t              { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); }
+  .               { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); }
+}
+
+<STRING> {
+  \"              { yybegin(YYINITIAL); 
+                    return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); 
+                  }
+  [^\n\r\"\\\ufffd]+ { string.append( yytext() ); }
+  \\t             { string.append('\t'); }
+  \\n             { string.append('\n'); }
+
+  \\r             { string.append('\r'); }
+  \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); }
+  \\\(            { parenCountStack.add(parenCount);
+                    parenCount = 1;
+                    stateStack.add(STRING);
+                    yybegin(YYINITIAL); 
+                    return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
+                  }
+  \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
+  \\[\'\"\\]      { string.append(yytext().substring(1)); }
+  \\.             { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); }  
+  \R              { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
+  <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
+}
+
+<LONG_STRING> {
+  "\"\"\""        { yybegin(YYINITIAL); 
+                      return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString());
+                  }
+  \R              { string.append('\n'); }
+  [^\"\n\r\\\ufffd]+ { string.append( yytext() ); }
+  \\\(            { parenCountStack.add(parenCount);
+                    parenCount = 1;
+                    stateStack.add(LONG_STRING);
+                    yybegin(YYINITIAL); 
+                    return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); 
+                  }  
+  [\"\\]          { string.append( yytext() ); }
+  \ufffd          { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); }
+  .               { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
+  <<EOF>>         { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); }
+}
+
+/*
+<XML> {
+  "<" {xmlName}      { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); }
+  "</" {xmlName} ">" { return sym(SCLTerminals.XML_CLOSING_TAG); }
+  "</" {existentialVar} ">" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); }
+  [^<]+              { return sym(SCLTerminals.XML_TEXT); }
+}
+
+<XML_TAG> {
+  "/>"              { return sym(SCLTerminals.XML_EMPTY_TAG_END); }
+  ">"               { return sym(SCLTerminals.XML_TAG_END); }
+  {xmlName}         { return sym(SCLTerminals.XML_ATTRIBUTE); }  
+  {existentialVar}  { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); }
+  "="               { return sym(SCLTerminals.EQUALS); }
+  "\"" [^\"]* "\""  { String text = yytext();
+                      return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
+  "'"  [^\']* "'"   { String text = yytext();
+                      return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); }
+  {whitespace}      { }
+}
+*/
\ No newline at end of file