package org.simantics.scl.compiler.internal.parsing.parser; import org.simantics.scl.compiler.internal.parsing.Token; import org.simantics.scl.compiler.errors.Locations; import org.simantics.scl.compiler.internal.parsing.exceptions.SCLSyntaxErrorException; import gnu.trove.list.array.TIntArrayList; %% %public %char %unicode %class SCLLexer %function nextToken %type Token %yylexthrow SCLSyntaxErrorException %scanerror RuntimeException %eofval{ return sym(SCLTerminals.EOF); %eofval} %{ public SCLParserOptions options = SCLParserOptions.DEFAULT; int stringStart; TIntArrayList parenCountStack = new TIntArrayList(2); int parenCount = 0; TIntArrayList stateStack = new TIntArrayList(2); StringBuffer string = new StringBuffer(); private Token sym(int id) { return new Token(id, yychar, yychar+yylength(), yytext()); } private Token sym(int id, String text) { return new Token(id, yychar, yychar+yylength(), text); } %} letter = [a-zA-Z_] digit = [0-9] hexDigit = [0-9a-fA-F] id_char = {letter} | {digit} | "'" ord_symbol_char = [!$%&*+\/<=>@\\\^|\-:~] symbol_char = {ord_symbol_char} | "#" prefix = {letter} {id_char}* "." annotation_id = "@" {letter} {id_char}* | "(" ","+ ")" id = {prefix}* ("?")? {letter} {id_char}* | "(" ","+ ")" symbol = {ord_symbol_char}+ | {symbol_char}{symbol_char}+ | "#" [ \n\r]+ separatedDot = "." [ \n\r]+ escaped_symbol_inner = {symbol_char}+ | "." escaped_symbol = "(" {escaped_symbol_inner} ")" escaped_id = "`" {id} "`" queryOp = "<" [&|!?] ">" integer = {digit}+ float = {digit}+ "." {digit}+ ([eE] ("-")? {digit}+)? | {digit}+ [eE] ("-")? {digit}+ whitespace = [ ]+ c_comment = "//" [^\n\r]* cpp_comment = "/*" ~"*/" //xmlName = {letter} ({letter} | {digit} | [-.])* existentialVar = "?" {letter} {id_char}* char_literal = "'" ([^'\\\ufffd] | "\\" [^\ufffd]) "'" %state STRING %state LONG_STRING %state CHAR /*%state XML %state XML_TAG*/ %% { {c_comment} { return sym(SCLTerminals.COMMENT); } {cpp_comment} { return sym(SCLTerminals.COMMENT); } forall { return sym(SCLTerminals.FORALL); } if { return sym(SCLTerminals.IF); } then { return sym(SCLTerminals.THEN); } else { return sym(SCLTerminals.ELSE); } where { return sym(SCLTerminals.WHERE); } when { return sym(SCLTerminals.WHEN); } rule { return sym(SCLTerminals.RULE); } abstract{whitespace}rule { return sym(SCLTerminals.ABSTRACT_RULE); } extends { return sym(SCLTerminals.EXTENDS); } mapping{whitespace}relation { return sym(SCLTerminals.MAPPING_RELATION); } transformation { return sym(SCLTerminals.TRANSFORMATION); } select{whitespace}first { return sym(SCLTerminals.SELECT_FIRST); } select{whitespace}distinct { return sym(SCLTerminals.SELECT_DISTINCT); } select { return sym(SCLTerminals.SELECT); } enforce { return sym(SCLTerminals.ENFORCE); } do { return sym(SCLTerminals.DO); } eq { return sym(options.supportEq ? SCLTerminals.EQ : SCLTerminals.ID); } mdo { return sym(SCLTerminals.MDO); } class { return sym(SCLTerminals.CLASS); } effect { return sym(SCLTerminals.EFFECT); } match { return sym(SCLTerminals.MATCH); } with { return sym(SCLTerminals.WITH); } let { return sym(SCLTerminals.LET); } in { return sym(SCLTerminals.IN); } instance { return sym(SCLTerminals.INSTANCE); } deriving { return sym(SCLTerminals.DERIVING); } data { return sym(SCLTerminals.DATA); } type { return sym(SCLTerminals.TYPE); } infixl { return sym(SCLTerminals.INFIXL); } infixr { return sym(SCLTerminals.INFIXR); } infix { return sym(SCLTerminals.INFIX); } include { return sym(SCLTerminals.INCLUDE); } import { return sym(SCLTerminals.IMPORT); } importJava { return sym(SCLTerminals.IMPORTJAVA); } hiding { return sym(SCLTerminals.HIDING); } // relation { return sym(SCLTerminals.RELATION); } as { return sym(SCLTerminals.AS); } by { return sym(SCLTerminals.BY); } constraint { return sym(SCLTerminals.CONSTRAINT); } {queryOp} { return sym(SCLTerminals.QUERY_OP); } "@" { return sym(SCLTerminals.AT); } "{" { return sym(SCLTerminals.LBRACE); } "}" { return sym(SCLTerminals.RBRACE); } "(" { ++parenCount; return sym(SCLTerminals.LPAREN); } ")" { --parenCount; if(parenCount == 0 && !parenCountStack.isEmpty()) { parenCount = parenCountStack.removeAt(parenCountStack.size()-1); string.setLength(0); stringStart=yychar; yybegin(stateStack.removeAt(stateStack.size()-1)); return sym(SCLTerminals.CONTINUE_STRING); } else return sym(SCLTerminals.RPAREN); } "[" { return sym(SCLTerminals.LBRACKET); } "]" { return sym(SCLTerminals.RBRACKET); } "->" { return sym(SCLTerminals.ARROW); } "<-" { return sym(SCLTerminals.BINDS); } "=>" { return sym(SCLTerminals.IMPLIES); } ":-" { return sym(SCLTerminals.FOLLOWS); } "," { return sym(SCLTerminals.COMMA); } ".." { return sym(SCLTerminals.DOTDOT, "."); } {separatedDot} { return sym(SCLTerminals.SEPARATED_DOT, "."); } "." { return sym(SCLTerminals.ATTACHED_DOT, "."); } "-" { return sym(SCLTerminals.MINUS, "-"); } // "<" {existentialVar} ">" { yybegin(XML); return sym(SCLTerminals.XML_BEGIN); } "<" { return sym(SCLTerminals.LESS, "<"); } ">" { return sym(SCLTerminals.GREATER, ">"); } ";" { return sym(SCLTerminals.SEMICOLON); } "|" { return sym(SCLTerminals.BAR); } "=" { return sym(SCLTerminals.EQUALS); } "::" { return sym(SCLTerminals.HASTYPE); } ":" { return sym(SCLTerminals.COLON); } "\\" " "* match { return sym(SCLTerminals.LAMBDA_MATCH); } "\\" { return sym(SCLTerminals.LAMBDA); } "\"\"\"" { string.setLength(0); stringStart=yychar; yybegin(LONG_STRING); return sym(SCLTerminals.BEGIN_STRING); } "\"" { string.setLength(0); stringStart=yychar; yybegin(STRING); return sym(SCLTerminals.BEGIN_STRING); } {char_literal} { return sym(SCLTerminals.CHAR); } "_" { return sym(SCLTerminals.BLANK); } {id} { return sym(SCLTerminals.ID); } {annotation_id} { return sym(SCLTerminals.ANNOTATION_ID); } {escaped_symbol} { String text = yytext(); return sym(SCLTerminals.ID, text.substring(1, text.length()-1)); } {escaped_id} { String text = yytext(); return sym(SCLTerminals.SYMBOL, text.substring(1, text.length()-1)); } {symbol} { return sym(SCLTerminals.SYMBOL, yytext().trim()); } "#" { return sym(SCLTerminals.ATTACHED_HASH, "#"); } {float} { return sym(SCLTerminals.FLOAT); } {integer} { return sym(SCLTerminals.INTEGER); } {whitespace} { } \R { return new Token(SCLTerminals.EOL, yychar, yychar+yylength(), ""); } \ufffd { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); } \t { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Tabulator is not allowed except inside string literals."); } . { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Illegal character '" + yytext() + "'."); } } { \" { yybegin(YYINITIAL); return new Token(SCLTerminals.END_STRING, stringStart, yychar+1, string.toString()); } [^\n\r\"\\\ufffd]+ { string.append( yytext() ); } \\t { string.append('\t'); } \\n { string.append('\n'); } \\r { string.append('\r'); } \\u{hexDigit}{hexDigit}{hexDigit}{hexDigit} { string.append((char)Integer.parseInt(yytext().substring(2), 16)); } \\\( { parenCountStack.add(parenCount); parenCount = 1; stateStack.add(STRING); yybegin(YYINITIAL); return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); } \ufffd { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); } \\[\'\"\\] { string.append(yytext().substring(1)); } \\. { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Illegal string escape character."); } \R { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); } <> { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); } } { "\"\"\"" { yybegin(YYINITIAL); return new Token(SCLTerminals.END_STRING, stringStart, yychar+3, string.toString()); } \R { string.append('\n'); } [^\"\n\r\\\ufffd]+ { string.append( yytext() ); } \\\( { parenCountStack.add(parenCount); parenCount = 1; stateStack.add(LONG_STRING); yybegin(YYINITIAL); return new Token(SCLTerminals.SUSPEND_STRING, stringStart, yychar+1, string.toString()); } [\"\\] { string.append( yytext() ); } \ufffd { throw new SCLSyntaxErrorException(Locations.location(yychar, yychar+1), "Character does not conform to UTF-8 encoding."); } . { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); } <> { throw new SCLSyntaxErrorException(Locations.location(stringStart, yychar), "Unclosed string literal."); } } /* { "<" {xmlName} { yybegin(XML_TAG); return sym(SCLTerminals.XML_TAG_BEGIN); } "" { return sym(SCLTerminals.XML_CLOSING_TAG); } "" { yybegin(YYINITIAL); return sym(SCLTerminals.XML_END); } [^<]+ { return sym(SCLTerminals.XML_TEXT); } } { "/>" { return sym(SCLTerminals.XML_EMPTY_TAG_END); } ">" { return sym(SCLTerminals.XML_TAG_END); } {xmlName} { return sym(SCLTerminals.XML_ATTRIBUTE); } {existentialVar} { return sym(SCLTerminals.XML_EXISTENTIAL_VAR); } "=" { return sym(SCLTerminals.EQUALS); } "\"" [^\"]* "\"" { String text = yytext(); return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); } "'" [^\']* "'" { String text = yytext(); return sym(SCLTerminals.STRING, text.substring(1, text.length()-1)); } {whitespace} { } } */