bundles/org.simantics.databoard/cpp/DataBoardTest/libantlr3c-3.2/src/antlr3baserecognizer.c

   1 /** \file
   2  * Contains the base functions that all recognizers require.
   3  * Any function can be overridden by a lexer/parser/tree parser or by the
   4  * ANTLR3 programmer.
   5  *
   6  * \addtogroup pANTLR3_BASE_RECOGNIZER
   7  * @{
   8  */
   9 #include    <antlr3baserecognizer.h>
  10
  11 // [The "BSD licence"]
  12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
  13 // http://www.temporal-wave.com
  14 // http://www.linkedin.com/in/jimidle
  15 //
  16 // All rights reserved.
  17 //
  18 // Redistribution and use in source and binary forms, with or without
  19 // modification, are permitted provided that the following conditions
  20 // are met:
  21 // 1. Redistributions of source code must retain the above copyright
  22 //    notice, this list of conditions and the following disclaimer.
  23 // 2. Redistributions in binary form must reproduce the above copyright
  24 //    notice, this list of conditions and the following disclaimer in the
  25 //    documentation and/or other materials provided with the distribution.
  26 // 3. The name of the author may not be used to endorse or promote products
  27 //    derived from this software without specific prior written permission.
  28 //
  29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39
  40 #ifdef  ANTLR3_WINDOWS
  41 #pragma warning( disable : 4100 )
  42 #endif
  43
  44 /* Interface functions -standard implementations cover parser and treeparser
  45  * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
  46  * most of these functions.
  47  */
  48 static void                                     beginResync                                     (pANTLR3_BASE_RECOGNIZER recognizer);
  49 static pANTLR3_BITSET           computeErrorRecoverySet     (pANTLR3_BASE_RECOGNIZER recognizer);
  50 static void                                     endResync                                       (pANTLR3_BASE_RECOGNIZER recognizer);
  51 static void                                     beginBacktrack                          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
  52 static void                                     endBacktrack                            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
  53
  54 static void *                           match                                           (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
  55 static void                                     matchAny                                        (pANTLR3_BASE_RECOGNIZER recognizer);
  56 static void                                     mismatch                                        (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
  57 static ANTLR3_BOOLEAN           mismatchIsUnwantedToken         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
  58 static ANTLR3_BOOLEAN           mismatchIsMissingToken          (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
  59 static void                                     reportError                                     (pANTLR3_BASE_RECOGNIZER recognizer);
  60 static pANTLR3_BITSET           computeCSRuleFollow                     (pANTLR3_BASE_RECOGNIZER recognizer);
  61 static pANTLR3_BITSET           combineFollows                          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
  62 static void                                     displayRecognitionError     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
  63 static void                                     recover                                         (pANTLR3_BASE_RECOGNIZER recognizer);
  64 static void     *                               recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
  65 static void     *                               recoverFromMismatchedSet    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
  66 static ANTLR3_BOOLEAN           recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
  67 static void                                     consumeUntil                            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
  68 static void                                     consumeUntilSet                         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
  69 static pANTLR3_STACK            getRuleInvocationStack      (pANTLR3_BASE_RECOGNIZER recognizer);
  70 static pANTLR3_STACK            getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
  71 static pANTLR3_HASH_TABLE       toStrings                                       (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
  72 static ANTLR3_MARKER            getRuleMemoization                      (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
  73 static ANTLR3_BOOLEAN           alreadyParsedRule                       (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
  74 static void                                     memoize                                         (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
  75 static ANTLR3_BOOLEAN           synpred                                         (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
  76 static void                                     reset                                           (pANTLR3_BASE_RECOGNIZER recognizer);
  77 static void                                     freeBR                                          (pANTLR3_BASE_RECOGNIZER recognizer);
  78 static void *                           getCurrentInputSymbol           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
  79 static void *                           getMissingSymbol                        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION      e,
  80                                                                                                                         ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
  81 static ANTLR3_UINT32            getNumberOfSyntaxErrors         (pANTLR3_BASE_RECOGNIZER recognizer);
  82
  83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
  84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
  85 {
  86     pANTLR3_BASE_RECOGNIZER recognizer;
  87
  88     // Allocate memory for the structure
  89     //
  90     recognizer      = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
  91
  92     if  (recognizer == NULL)
  93     {
  94                 // Allocation failed
  95                 //
  96                 return  NULL;
  97     }
  98
  99
 100         // If we have been supplied with a pre-existing recognizer state
 101         // then we just install it, otherwise we must create one from scratch
 102         //
 103         if      (state == NULL)
 104         {
 105                 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
 106
 107                 if      (recognizer->state == NULL)
 108                 {
 109                         ANTLR3_FREE(recognizer);
 110                         return  NULL;
 111                 }
 112
 113                 // Initialize any new recognizer state
 114                 //
 115                 recognizer->state->errorRecovery        = ANTLR3_FALSE;
 116                 recognizer->state->lastErrorIndex       = -1;
 117                 recognizer->state->failed               = ANTLR3_FALSE;
 118                 recognizer->state->errorCount           = 0;
 119                 recognizer->state->backtracking         = 0;
 120                 recognizer->state->following            = NULL;
 121                 recognizer->state->ruleMemo             = NULL;
 122                 recognizer->state->tokenNames           = NULL;
 123                 recognizer->state->sizeHint             = sizeHint;
 124                 recognizer->state->tokSource            = NULL;
 125                 recognizer->state->tokFactory           = NULL;
 126
 127                 // Rather than check to see if we must initialize
 128                 // the stack every time we are asked for an new rewrite stream
 129                 // we just always create an empty stack and then just
 130                 // free it when the base recognizer is freed.
 131                 //
 132                 recognizer->state->rStreams             = antlr3VectorNew(0);  // We don't know the size.
 133
 134                 if      (recognizer->state->rStreams == NULL)
 135                 {
 136                         // Out of memory
 137                         //
 138                         ANTLR3_FREE(recognizer->state);
 139                         ANTLR3_FREE(recognizer);
 140                         return  NULL;
 141                 }
 142         }
 143         else
 144         {
 145                 // Install the one we were given, and do not reset it here
 146                 // as it will either already have been initialized or will
 147                 // be in a state that needs to be preserved.
 148                 //
 149                 recognizer->state = state;
 150         }
 151
 152     // Install the BR API
 153     //
 154     recognizer->alreadyParsedRule           = alreadyParsedRule;
 155     recognizer->beginResync                 = beginResync;
 156     recognizer->combineFollows              = combineFollows;
 157     recognizer->beginBacktrack              = beginBacktrack;
 158     recognizer->endBacktrack                = endBacktrack;
 159     recognizer->computeCSRuleFollow         = computeCSRuleFollow;
 160     recognizer->computeErrorRecoverySet     = computeErrorRecoverySet;
 161     recognizer->consumeUntil                = consumeUntil;
 162     recognizer->consumeUntilSet             = consumeUntilSet;
 163     recognizer->displayRecognitionError     = displayRecognitionError;
 164     recognizer->endResync                   = endResync;
 165     recognizer->exConstruct                 = antlr3MTExceptionNew;
 166     recognizer->getRuleInvocationStack      = getRuleInvocationStack;
 167     recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
 168     recognizer->getRuleMemoization          = getRuleMemoization;
 169     recognizer->match                       = match;
 170     recognizer->matchAny                    = matchAny;
 171     recognizer->memoize                     = memoize;
 172     recognizer->mismatch                    = mismatch;
 173     recognizer->mismatchIsUnwantedToken     = mismatchIsUnwantedToken;
 174     recognizer->mismatchIsMissingToken      = mismatchIsMissingToken;
 175     recognizer->recover                     = recover;
 176     recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
 177     recognizer->recoverFromMismatchedSet    = recoverFromMismatchedSet;
 178     recognizer->recoverFromMismatchedToken  = recoverFromMismatchedToken;
 179     recognizer->getNumberOfSyntaxErrors     = getNumberOfSyntaxErrors;
 180     recognizer->reportError                 = reportError;
 181     recognizer->reset                       = reset;
 182     recognizer->synpred                     = synpred;
 183     recognizer->toStrings                   = toStrings;
 184     recognizer->getCurrentInputSymbol       = getCurrentInputSymbol;
 185     recognizer->getMissingSymbol            = getMissingSymbol;
 186     recognizer->debugger                    = NULL;
 187
 188     recognizer->free = freeBR;
 189
 190     /* Initialize variables
 191      */
 192     recognizer->type                    = type;
 193
 194
 195     return  recognizer;
 196 }
 197 static void
 198 freeBR      (pANTLR3_BASE_RECOGNIZER recognizer)
 199 {
 200     pANTLR3_EXCEPTION thisE;
 201
 202         // Did we have a state allocated?
 203         //
 204         if      (recognizer->state != NULL)
 205         {
 206                 // Free any rule memoization we set up
 207                 //
 208                 if      (recognizer->state->ruleMemo != NULL)
 209                 {
 210                         recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
 211                         recognizer->state->ruleMemo = NULL;
 212                 }
 213
 214                 // Free any exception space we have left around
 215                 //
 216                 thisE = recognizer->state->exception;
 217                 if      (thisE != NULL)
 218                 {
 219                         thisE->freeEx(thisE);
 220                 }
 221
 222                 // Free any rewrite streams we have allocated
 223                 //
 224                 if      (recognizer->state->rStreams != NULL)
 225                 {
 226                         recognizer->state->rStreams->free(recognizer->state->rStreams);
 227                 }
 228
 229                 // Free up any token factory we created (error recovery for instance)
 230                 //
 231                 if      (recognizer->state->tokFactory != NULL)
 232                 {
 233                         recognizer->state->tokFactory->close(recognizer->state->tokFactory);
 234                 }
 235                 // Free the shared state memory
 236                 //
 237                 ANTLR3_FREE(recognizer->state);
 238         }
 239
 240         // Free the actual recognizer space
 241         //
 242     ANTLR3_FREE(recognizer);
 243 }
 244
 245 /**
 246  * Creates a new Mismatched Token Exception and inserts in the recognizer
 247  * exception stack.
 248  *
 249  * \param recognizer
 250  * Context pointer for this recognizer
 251  *
 252  */
 253 ANTLR3_API      void
 254 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
 255 {
 256     /* Create a basic recognition exception structure
 257      */
 258     antlr3RecognitionExceptionNew(recognizer);
 259
 260     /* Now update it to indicate this is a Mismatched token exception
 261      */
 262     recognizer->state->exception->name          = ANTLR3_MISMATCHED_EX_NAME;
 263     recognizer->state->exception->type          = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
 264
 265     return;
 266 }
 267
 268 ANTLR3_API      void
 269 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
 270 {
 271         pANTLR3_EXCEPTION                               ex;
 272         pANTLR3_LEXER                                   lexer;
 273         pANTLR3_PARSER                                  parser;
 274         pANTLR3_TREE_PARSER                             tparser;
 275
 276         pANTLR3_INPUT_STREAM                    ins;
 277         pANTLR3_INT_STREAM                              is;
 278         pANTLR3_COMMON_TOKEN_STREAM         cts;
 279         pANTLR3_TREE_NODE_STREAM            tns;
 280
 281         ins         = NULL;
 282         cts         = NULL;
 283         tns         = NULL;
 284         is          = NULL;
 285         lexer   = NULL;
 286         parser  = NULL;
 287         tparser = NULL;
 288
 289         switch  (recognizer->type)
 290         {
 291         case    ANTLR3_TYPE_LEXER:
 292
 293                 lexer   = (pANTLR3_LEXER) (recognizer->super);
 294                 ins     = lexer->input;
 295                 is      = ins->istream;
 296
 297                 break;
 298
 299         case    ANTLR3_TYPE_PARSER:
 300
 301                 parser  = (pANTLR3_PARSER) (recognizer->super);
 302                 cts     = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
 303                 is      = parser->tstream->istream;
 304
 305                 break;
 306
 307         case    ANTLR3_TYPE_TREE_PARSER:
 308
 309                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
 310                 tns     = tparser->ctnstream->tnstream;
 311                 is      = tns->istream;
 312
 313                 break;
 314
 315         default:
 316
 317                 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
 318                 return;
 319
 320                 break;
 321         }
 322
 323         /* Create a basic exception structure
 324          */
 325         ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
 326                 (void *)ANTLR3_RECOGNITION_EX_NAME,
 327                 NULL,
 328                 ANTLR3_FALSE);
 329
 330         /* Rest of information depends on the base type of the
 331          * input stream.
 332          */
 333         switch  (is->type & ANTLR3_INPUT_MASK)
 334         {
 335         case    ANTLR3_CHARSTREAM:
 336
 337                 ex->c                   = is->_LA                       (is, 1);                                        /* Current input character                      */
 338                 ex->line                = ins->getLine                  (ins);                                          /* Line number comes from stream                */
 339                 ex->charPositionInLine  = ins->getCharPositionInLine    (ins);      /* Line offset also comes from the stream   */
 340                 ex->index               = is->index                     (is);
 341                 ex->streamName          = ins->fileName;
 342                 ex->message             = "Unexpected character";
 343                 break;
 344
 345         case    ANTLR3_TOKENSTREAM:
 346
 347                 ex->token               = cts->tstream->_LT                                             (cts->tstream, 1);          /* Current input token                          */
 348                 ex->line                = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine                  (ex->token);
 349                 ex->charPositionInLine  = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine    (ex->token);
 350                 ex->index               = cts->tstream->istream->index                                  (cts->tstream->istream);
 351                 if      (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
 352                 {
 353                         ex->streamName          = NULL;
 354                 }
 355                 else
 356                 {
 357                         ex->streamName          = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
 358                 }
 359                 ex->message             = "Unexpected token";
 360                 break;
 361
 362         case    ANTLR3_COMMONTREENODE:
 363
 364                 ex->token               = tns->_LT                                                  (tns, 1);       /* Current input tree node                      */
 365                 ex->line                = ((pANTLR3_BASE_TREE)(ex->token))->getLine                 (ex->token);
 366                 ex->charPositionInLine  = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine   (ex->token);
 367                 ex->index               = tns->istream->index                                       (tns->istream);
 368
 369                 // Are you ready for this? Deep breath now...
 370                 //
 371                 {
 372                         pANTLR3_COMMON_TREE tnode;
 373
 374                         tnode           = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
 375
 376                         if      (tnode->token    == NULL)
 377                         {
 378                                 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
 379                         }
 380                         else
 381                         {
 382                                 if      (tnode->token->input == NULL)
 383                                 {
 384                                         ex->streamName          = NULL;
 385                                 }
 386                                 else
 387                                 {
 388                                         ex->streamName          = tnode->token->input->fileName;
 389                                 }
 390                         }
 391                         ex->message             = "Unexpected node";
 392                 }
 393                 break;
 394         }
 395
 396         ex->input                                               = is;
 397         ex->nextException                               = recognizer->state->exception; /* So we don't leak the memory */
 398         recognizer->state->exception    = ex;
 399         recognizer->state->error            = ANTLR3_TRUE;          /* Exception is outstanding */
 400
 401         return;
 402 }
 403
 404
 405 /// Match current input symbol against ttype.  Upon error, do one token
 406 /// insertion or deletion if possible.
 407 /// To turn off single token insertion or deletion error
 408 /// recovery, override mismatchRecover() and have it call
 409 /// plain mismatch(), which does not recover.  Then any error
 410 /// in a rule will cause an exception and immediate exit from
 411 /// rule.  Rule would recover by resynchronizing to the set of
 412 /// symbols that can follow rule ref.
 413 ///
 414 static void *
 415 match(  pANTLR3_BASE_RECOGNIZER recognizer,
 416                 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
 417 {
 418     pANTLR3_PARSER                      parser;
 419     pANTLR3_TREE_PARSER     tparser;
 420     pANTLR3_INT_STREAM      is;
 421         void                                    * matchedSymbol;
 422
 423     switch      (recognizer->type)
 424     {
 425                 case    ANTLR3_TYPE_PARSER:
 426
 427                         parser  = (pANTLR3_PARSER) (recognizer->super);
 428                         tparser = NULL;
 429                         is      = parser->tstream->istream;
 430
 431                         break;
 432
 433                 case    ANTLR3_TYPE_TREE_PARSER:
 434
 435                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
 436                         parser  = NULL;
 437                         is      = tparser->ctnstream->tnstream->istream;
 438
 439                         break;
 440
 441                 default:
 442
 443                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
 444                         return ANTLR3_FALSE;
 445
 446                         break;
 447     }
 448
 449         // Pick up the current input token/node for assignment to labels
 450         //
 451         matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
 452
 453     if  (is->_LA(is, 1) == ttype)
 454     {
 455                 // The token was the one we were told to expect
 456                 //
 457                 is->consume(is);                                                                        // Consume that token from the stream
 458                 recognizer->state->errorRecovery        = ANTLR3_FALSE; // Not in error recovery now (if we were)
 459                 recognizer->state->failed                       = ANTLR3_FALSE; // The match was a success
 460                 return matchedSymbol;                                                           // We are done
 461     }
 462
 463     // We did not find the expected token type, if we are backtracking then
 464     // we just set the failed flag and return.
 465     //
 466     if  (recognizer->state->backtracking > 0)
 467     {
 468                 // Backtracking is going on
 469                 //
 470                 recognizer->state->failed  = ANTLR3_TRUE;
 471                 return matchedSymbol;
 472         }
 473
 474     // We did not find the expected token and there is no backtracking
 475     // going on, so we mismatch, which creates an exception in the recognizer exception
 476     // stack.
 477     //
 478         matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
 479     return matchedSymbol;
 480 }
 481
 482 /// Consumes the next token, whatever it is, and resets the recognizer state
 483 /// so that it is not in error.
 484 ///
 485 /// \param recognizer
 486 /// Recognizer context pointer
 487 ///
 488 static void
 489 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
 490 {
 491     pANTLR3_PARSER          parser;
 492     pANTLR3_TREE_PARSER     tparser;
 493     pANTLR3_INT_STREAM      is;
 494
 495     switch      (recognizer->type)
 496     {
 497                 case    ANTLR3_TYPE_PARSER:
 498
 499                         parser  = (pANTLR3_PARSER) (recognizer->super);
 500                         tparser = NULL;
 501                         is      = parser->tstream->istream;
 502
 503                         break;
 504
 505                 case    ANTLR3_TYPE_TREE_PARSER:
 506
 507                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
 508                         parser  = NULL;
 509                         is      = tparser->ctnstream->tnstream->istream;
 510
 511                         break;
 512
 513                 default:
 514
 515                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
 516                         return;
 517
 518                 break;
 519     }
 520     recognizer->state->errorRecovery    = ANTLR3_FALSE;
 521     recognizer->state->failed               = ANTLR3_FALSE;
 522     is->consume(is);
 523
 524     return;
 525 }
 526 ///
 527 ///
 528 static ANTLR3_BOOLEAN
 529 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
 530 {
 531         ANTLR3_UINT32 nextt;
 532
 533         nextt = is->_LA(is, 2);
 534
 535         if      (nextt == ttype)
 536         {
 537                 if      (recognizer->state->exception != NULL)
 538                 {
 539                         recognizer->state->exception->expecting = nextt;
 540                 }
 541                 return ANTLR3_TRUE;             // This token is unknown, but the next one is the one we wanted
 542         }
 543         else
 544         {
 545                 return ANTLR3_FALSE;    // Neither this token, nor the one following is the one we wanted
 546         }
 547 }
 548
 549 ///
 550 ///
 551 static ANTLR3_BOOLEAN
 552 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
 553 {
 554         ANTLR3_BOOLEAN  retcode;
 555         pANTLR3_BITSET  followClone;
 556         pANTLR3_BITSET  viableTokensFollowingThisRule;
 557
 558         if      (follow == NULL)
 559         {
 560                 // There is no information about the tokens that can follow the last one
 561                 // hence we must say that the current one we found is not a member of the
 562                 // follow set and does not indicate a missing token. We will just consume this
 563                 // single token and see if the parser works it out from there.
 564                 //
 565                 return  ANTLR3_FALSE;
 566         }
 567
 568         followClone                                             = NULL;
 569         viableTokensFollowingThisRule   = NULL;
 570
 571         // The C bitset maps are laid down at compile time by the
 572         // C code generation. Hence we cannot remove things from them
 573         // and so on. So, in order to remove EOR (if we need to) then
 574         // we clone the static bitset.
 575         //
 576         followClone = antlr3BitsetLoad(follow);
 577         if      (followClone == NULL)
 578         {
 579                 return ANTLR3_FALSE;
 580         }
 581
 582         // Compute what can follow this grammar reference
 583         //
 584         if      (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
 585         {
 586                 // EOR can follow, but if we are not the start symbol, we
 587                 // need to remove it.
 588                 //
 589                 if      (recognizer->state->following->vector->count >= 0)
 590                 {
 591                         followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
 592                 }
 593
 594                 // Now compute the visiable tokens that can follow this rule, according to context
 595                 // and make them part of the follow set.
 596                 //
 597                 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
 598                 followClone->borInPlace(followClone, viableTokensFollowingThisRule);
 599         }
 600
 601         /// if current token is consistent with what could come after set
 602         /// then we know we're missing a token; error recovery is free to
 603         /// "insert" the missing token
 604         ///
 605         /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
 606         /// in follow set to indicate that the fall of the start symbol is
 607         /// in the set (EOF can follow).
 608         ///
 609         if      (               followClone->isMember(followClone, is->_LA(is, 1))
 610                         ||      followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
 611                 )
 612         {
 613                 retcode = ANTLR3_TRUE;
 614         }
 615         else
 616         {
 617                 retcode = ANTLR3_FALSE;
 618         }
 619
 620         if      (viableTokensFollowingThisRule != NULL)
 621         {
 622                 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
 623         }
 624         if      (followClone != NULL)
 625         {
 626                 followClone->free(followClone);
 627         }
 628
 629         return retcode;
 630
 631 }
 632
 633 /// Factor out what to do upon token mismatch so tree parsers can behave
 634 /// differently.  Override and call mismatchRecover(input, ttype, follow)
 635 /// to get single token insertion and deletion.  Use this to turn off
 636 /// single token insertion and deletion. Override mismatchRecover
 637 /// to call this instead.
 638 ///
 639 /// \remark mismatch only works for parsers and must be overridden for anything else.
 640 ///
 641 static  void
 642 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
 643 {
 644     pANTLR3_PARSER          parser;
 645     pANTLR3_TREE_PARSER     tparser;
 646     pANTLR3_INT_STREAM      is;
 647
 648     // Install a mismatched token exception in the exception stack
 649     //
 650     antlr3MTExceptionNew(recognizer);
 651     recognizer->state->exception->expecting    = ttype;
 652
 653     switch      (recognizer->type)
 654     {
 655                 case    ANTLR3_TYPE_PARSER:
 656
 657                         parser  = (pANTLR3_PARSER) (recognizer->super);
 658                         tparser = NULL;
 659                         is      = parser->tstream->istream;
 660
 661                         break;
 662
 663                 default:
 664
 665                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
 666                         return;
 667
 668                         break;
 669     }
 670
 671         if      (mismatchIsUnwantedToken(recognizer, is, ttype))
 672         {
 673                 // Create a basic recognition exception structure
 674                 //
 675             antlr3RecognitionExceptionNew(recognizer);
 676
 677                 // Now update it to indicate this is an unwanted token exception
 678                 //
 679                 recognizer->state->exception->name              = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
 680                 recognizer->state->exception->type              = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
 681
 682                 return;
 683         }
 684
 685         if      (mismatchIsMissingToken(recognizer, is, follow))
 686         {
 687                 // Create a basic recognition exception structure
 688                 //
 689             antlr3RecognitionExceptionNew(recognizer);
 690
 691                 // Now update it to indicate this is an unwanted token exception
 692                 //
 693                 recognizer->state->exception->name              = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
 694                 recognizer->state->exception->type              = ANTLR3_MISSING_TOKEN_EXCEPTION;
 695
 696                 return;
 697         }
 698
 699         // Just a mismatched token is all we can dtermine
 700         //
 701         antlr3MTExceptionNew(recognizer);
 702
 703         return;
 704 }
 705 /// Report a recognition problem.
 706 ///
 707 /// This method sets errorRecovery to indicate the parser is recovering
 708 /// not parsing.  Once in recovery mode, no errors are generated.
 709 /// To get out of recovery mode, the parser must successfully match
 710 /// a token (after a resync).  So it will go:
 711 ///
 712 ///             1. error occurs
 713 ///             2. enter recovery mode, report error
 714 ///             3. consume until token found in resynch set
 715 ///             4. try to resume parsing
 716 ///             5. next match() will reset errorRecovery mode
 717 ///
 718 /// If you override, make sure to update errorCount if you care about that.
 719 ///
 720 static void
 721 reportError                 (pANTLR3_BASE_RECOGNIZER recognizer)
 722 {
 723     if  (recognizer->state->errorRecovery == ANTLR3_TRUE)
 724     {
 725                 // Already in error recovery so don't display another error while doing so
 726                 //
 727                 return;
 728     }
 729
 730     // Signal we are in error recovery now
 731     //
 732     recognizer->state->errorRecovery = ANTLR3_TRUE;
 733
 734         // Indicate this recognizer had an error while processing.
 735         //
 736         recognizer->state->errorCount++;
 737
 738         // Call the error display routine
 739         //
 740     recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
 741 }
 742
 743 static void
 744 beginBacktrack          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
 745 {
 746         if      (recognizer->debugger != NULL)
 747         {
 748                 recognizer->debugger->beginBacktrack(recognizer->debugger, level);
 749         }
 750 }
 751
 752 static void
 753 endBacktrack            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
 754 {
 755         if      (recognizer->debugger != NULL)
 756         {
 757                 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
 758         }
 759 }
 760 static void
 761 beginResync                 (pANTLR3_BASE_RECOGNIZER recognizer)
 762 {
 763         if      (recognizer->debugger != NULL)
 764         {
 765                 recognizer->debugger->beginResync(recognizer->debugger);
 766         }
 767 }
 768
 769 static void
 770 endResync                   (pANTLR3_BASE_RECOGNIZER recognizer)
 771 {
 772         if      (recognizer->debugger != NULL)
 773         {
 774                 recognizer->debugger->endResync(recognizer->debugger);
 775         }
 776 }
 777
 778 /// Compute the error recovery set for the current rule.
 779 /// Documentation below is from the Java implementation.
 780 ///
 781 /// During rule invocation, the parser pushes the set of tokens that can
 782 /// follow that rule reference on the stack; this amounts to
 783 /// computing FIRST of what follows the rule reference in the
 784 /// enclosing rule. This local follow set only includes tokens
 785 /// from within the rule; i.e., the FIRST computation done by
 786 /// ANTLR stops at the end of a rule.
 787 //
 788 /// EXAMPLE
 789 //
 790 /// When you find a "no viable alt exception", the input is not
 791 /// consistent with any of the alternatives for rule r.  The best
 792 /// thing to do is to consume tokens until you see something that
 793 /// can legally follow a call to r *or* any rule that called r.
 794 /// You don't want the exact set of viable next tokens because the
 795 /// input might just be missing a token--you might consume the
 796 /// rest of the input looking for one of the missing tokens.
 797 ///
 798 /// Consider grammar:
 799 ///
 800 /// a : '[' b ']'
 801 ///   | '(' b ')'
 802 ///   ;
 803 /// b : c '^' INT ;
 804 /// c : ID
 805 ///   | INT
 806 ///   ;
 807 ///
 808 /// At each rule invocation, the set of tokens that could follow
 809 /// that rule is pushed on a stack.  Here are the various "local"
 810 /// follow sets:
 811 ///
 812 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
 813 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
 814 /// FOLLOW(c_in_b) = FIRST('^') = '^'
 815 ///
 816 /// Upon erroneous input "[]", the call chain is
 817 ///
 818 /// a -> b -> c
 819 ///
 820 /// and, hence, the follow context stack is:
 821 ///
 822 /// depth  local follow set     after call to rule
 823 ///   0         <EOF>                    a (from main())
 824 ///   1          ']'                     b
 825 ///   3          '^'                     c
 826 ///
 827 /// Notice that ')' is not included, because b would have to have
 828 /// been called from a different context in rule a for ')' to be
 829 /// included.
 830 ///
 831 /// For error recovery, we cannot consider FOLLOW(c)
 832 /// (context-sensitive or otherwise).  We need the combined set of
 833 /// all context-sensitive FOLLOW sets--the set of all tokens that
 834 /// could follow any reference in the call chain.  We need to
 835 /// resync to one of those tokens.  Note that FOLLOW(c)='^' and if
 836 /// we resync'd to that token, we'd consume until EOF.  We need to
 837 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
 838 /// In this case, for input "[]", LA(1) is in this set so we would
 839 /// not consume anything and after printing an error rule c would
 840 /// return normally.  It would not find the required '^' though.
 841 /// At this point, it gets a mismatched token error and throws an
 842 /// exception (since LA(1) is not in the viable following token
 843 /// set).  The rule exception handler tries to recover, but finds
 844 /// the same recovery set and doesn't consume anything.  Rule b
 845 /// exits normally returning to rule a.  Now it finds the ']' (and
 846 /// with the successful match exits errorRecovery mode).
 847 ///
 848 /// So, you can see that the parser walks up call chain looking
 849 /// for the token that was a member of the recovery set.
 850 ///
 851 /// Errors are not generated in errorRecovery mode.
 852 ///
 853 /// ANTLR's error recovery mechanism is based upon original ideas:
 854 ///
 855 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
 856 ///
 857 /// and
 858 ///
 859 /// "A note on error recovery in recursive descent parsers":
 860 /// http://portal.acm.org/citation.cfm?id=947902.947905
 861 ///
 862 /// Later, Josef Grosch had some good ideas:
 863 ///
 864 /// "Efficient and Comfortable Error Recovery in Recursive Descent
 865 /// Parsers":
 866 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
 867 ///
 868 /// Like Grosch I implemented local FOLLOW sets that are combined
 869 /// at run-time upon error to avoid overhead during parsing.
 870 ///
 871 static pANTLR3_BITSET
 872 computeErrorRecoverySet     (pANTLR3_BASE_RECOGNIZER recognizer)
 873 {
 874     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
 875 }
 876
 877 /// Compute the context-sensitive FOLLOW set for current rule.
 878 /// Documentation below is from the Java runtime.
 879 ///
 880 /// This is the set of token types that can follow a specific rule
 881 /// reference given a specific call chain.  You get the set of
 882 /// viable tokens that can possibly come next (look ahead depth 1)
 883 /// given the current call chain.  Contrast this with the
 884 /// definition of plain FOLLOW for rule r:
 885 ///
 886 ///  FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
 887 ///
 888 /// where x in T* and alpha, beta in V*; T is set of terminals and
 889 /// V is the set of terminals and non terminals.  In other words,
 890 /// FOLLOW(r) is the set of all tokens that can possibly follow
 891 /// references to r in///any* sentential form (context).  At
 892 /// runtime, however, we know precisely which context applies as
 893 /// we have the call chain.  We may compute the exact (rather
 894 /// than covering superset) set of following tokens.
 895 ///
 896 /// For example, consider grammar:
 897 ///
 898 /// stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
 899 ///      | "return" expr '.'
 900 ///      ;
 901 /// expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
 902 /// atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
 903 ///      | '(' expr ')'
 904 ///      ;
 905 ///
 906 /// The FOLLOW sets are all inclusive whereas context-sensitive
 907 /// FOLLOW sets are precisely what could follow a rule reference.
 908 /// For input input "i=(3);", here is the derivation:
 909 ///
 910 /// stat => ID '=' expr ';'
 911 ///      => ID '=' atom ('+' atom)* ';'
 912 ///      => ID '=' '(' expr ')' ('+' atom)* ';'
 913 ///      => ID '=' '(' atom ')' ('+' atom)* ';'
 914 ///      => ID '=' '(' INT ')' ('+' atom)* ';'
 915 ///      => ID '=' '(' INT ')' ';'
 916 ///
 917 /// At the "3" token, you'd have a call chain of
 918 ///
 919 ///   stat -> expr -> atom -> expr -> atom
 920 ///
 921 /// What can follow that specific nested ref to atom?  Exactly ')'
 922 /// as you can see by looking at the derivation of this specific
 923 /// input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
 924 ///
 925 /// You want the exact viable token set when recovering from a
 926 /// token mismatch.  Upon token mismatch, if LA(1) is member of
 927 /// the viable next token set, then you know there is most likely
 928 /// a missing token in the input stream.  "Insert" one by just not
 929 /// throwing an exception.
 930 ///
 931 static pANTLR3_BITSET
 932 computeCSRuleFollow         (pANTLR3_BASE_RECOGNIZER recognizer)
 933 {
 934     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
 935 }
 936
 937 /// Compute the current followset for the input stream.
 938 ///
 939 static pANTLR3_BITSET
 940 combineFollows              (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
 941 {
 942     pANTLR3_BITSET      followSet;
 943     pANTLR3_BITSET      localFollowSet;
 944     ANTLR3_UINT32       top;
 945     ANTLR3_UINT32       i;
 946
 947     top = recognizer->state->following->size(recognizer->state->following);
 948
 949     followSet       = antlr3BitsetNew(0);
 950         localFollowSet  = NULL;
 951
 952     for (i = top; i>0; i--)
 953     {
 954                 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
 955
 956                 if  (localFollowSet != NULL)
 957                 {
 958                         followSet->borInPlace(followSet, localFollowSet);
 959
 960                         if      (exact == ANTLR3_TRUE)
 961                         {
 962                                 if      (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
 963                                 {
 964                                         // Only leave EOR in the set if at top (start rule); this lets us know
 965                                         // if we have to include the follow(start rule); I.E., EOF
 966                                         //
 967                                         if      (i>1)
 968                                         {
 969                                                 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
 970                                         }
 971                                 }
 972                                 else
 973                                 {
 974                                         break;  // Cannot see End Of Rule from here, just drop out
 975                                 }
 976                         }
 977                         localFollowSet->free(localFollowSet);
 978                         localFollowSet = NULL;
 979                 }
 980     }
 981
 982         if      (localFollowSet != NULL)
 983         {
 984                 localFollowSet->free(localFollowSet);
 985         }
 986     return  followSet;
 987 }
 988
 989 /// Standard/Example error display method.
 990 /// No generic error message display funciton coudl possibly do everything correctly
 991 /// for all possible parsers. Hence you are provided with this example routine, which
 992 /// you should override in your parser/tree parser to do as you will.
 993 ///
 994 /// Here we depart somewhat from the Java runtime as that has now split up a lot
 995 /// of the error display routines into spearate units. However, ther is little advantage
 996 /// to this in the C version as you will probably implement all such routines as a
 997 /// separate translation unit, rather than install them all as pointers to functions
 998 /// in the base recognizer.
 999 ///
1000 static void
1001 displayRecognitionError     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1002 {
1003         pANTLR3_PARSER                  parser;
1004         pANTLR3_TREE_PARSER         tparser;
1005         pANTLR3_INT_STREAM          is;
1006         pANTLR3_STRING                  ttext;
1007         pANTLR3_STRING                  ftext;
1008         pANTLR3_EXCEPTION           ex;
1009         pANTLR3_COMMON_TOKEN    theToken;
1010         pANTLR3_BASE_TREE           theBaseTree;
1011         pANTLR3_COMMON_TREE         theCommonTree;
1012
1013         // Retrieve some info for easy reading.
1014         //
1015         ex          =           recognizer->state->exception;
1016         ttext   =               NULL;
1017
1018         // See if there is a 'filename' we can use
1019         //
1020         if      (ex->streamName == NULL)
1021         {
1022                 if      (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1023                 {
1024                         ANTLR3_FPRINTF(stderr, "-end of input-(");
1025                 }
1026                 else
1027                 {
1028                         ANTLR3_FPRINTF(stderr, "-unknown source-(");
1029                 }
1030         }
1031         else
1032         {
1033                 ftext = ex->streamName->to8(ex->streamName);
1034                 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1035         }
1036
1037         // Next comes the line number
1038         //
1039
1040         ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1041         ANTLR3_FPRINTF(stderr, " : error %d : %s",
1042                                                                                 recognizer->state->exception->type,
1043                                         (pANTLR3_UINT8)    (recognizer->state->exception->message));
1044
1045
1046         // How we determine the next piece is dependent on which thing raised the
1047         // error.
1048         //
1049         switch  (recognizer->type)
1050         {
1051         case    ANTLR3_TYPE_PARSER:
1052
1053                 // Prepare the knowledge we know we have
1054                 //
1055                 parser      = (pANTLR3_PARSER) (recognizer->super);
1056                 tparser     = NULL;
1057                 is                      = parser->tstream->istream;
1058                 theToken    = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1059                 ttext       = theToken->toString(theToken);
1060
1061                 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1062                 if  (theToken != NULL)
1063                 {
1064                         if (theToken->type == ANTLR3_TOKEN_EOF)
1065                         {
1066                                 ANTLR3_FPRINTF(stderr, ", at <EOF>");
1067                         }
1068                         else
1069                         {
1070                                 // Guard against null text in a token
1071                                 //
1072                                 ANTLR3_FPRINTF(stderr, "\n    near %s\n    ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1073                         }
1074                 }
1075                 break;
1076
1077         case    ANTLR3_TYPE_TREE_PARSER:
1078
1079                 tparser         = (pANTLR3_TREE_PARSER) (recognizer->super);
1080                 parser          = NULL;
1081                 is                      = tparser->ctnstream->tnstream->istream;
1082                 theBaseTree     = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1083                 ttext           = theBaseTree->toStringTree(theBaseTree);
1084
1085                 if  (theBaseTree != NULL)
1086                 {
1087                         theCommonTree   = (pANTLR3_COMMON_TREE)     theBaseTree->super;
1088
1089                         if      (theCommonTree != NULL)
1090                         {
1091                                 theToken        = (pANTLR3_COMMON_TOKEN)    theBaseTree->getToken(theBaseTree);
1092                         }
1093                         ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1094                         ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1095                 }
1096                 break;
1097
1098         default:
1099
1100                 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1101                 return;
1102                 break;
1103         }
1104
1105         // Although this function should generally be provided by the implementation, this one
1106         // should be as helpful as possible for grammar developers and serve as an example
1107         // of what you can do with each exception type. In general, when you make up your
1108         // 'real' handler, you should debug the routine with all possible errors you expect
1109         // which will then let you be as specific as possible about all circumstances.
1110         //
1111         // Note that in the general case, errors thrown by tree parsers indicate a problem
1112         // with the output of the parser or with the tree grammar itself. The job of the parser
1113         // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1114         // at that stage should really be semantic errors that your own code determines and handles
1115         // in whatever way is appropriate.
1116         //
1117         switch  (ex->type)
1118         {
1119         case    ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1120
1121                 // Indicates that the recognizer was fed a token which seesm to be
1122                 // spurious input. We can detect this when the token that follows
1123                 // this unwanted token would normally be part of the syntactically
1124                 // correct stream. Then we can see that the token we are looking at
1125                 // is just something that should not be there and throw this exception.
1126                 //
1127                 if      (tokenNames == NULL)
1128                 {
1129                         ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1130                 }
1131                 else
1132                 {
1133                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1134                         {
1135                                 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1136                         }
1137                         else
1138                         {
1139                                 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1140                         }
1141                 }
1142                 break;
1143
1144         case    ANTLR3_MISSING_TOKEN_EXCEPTION:
1145
1146                 // Indicates that the recognizer detected that the token we just
1147                 // hit would be valid syntactically if preceeded by a particular
1148                 // token. Perhaps a missing ';' at line end or a missing ',' in an
1149                 // expression list, and such like.
1150                 //
1151                 if      (tokenNames == NULL)
1152                 {
1153                         ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1154                 }
1155                 else
1156                 {
1157                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1158                         {
1159                                 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1160                         }
1161                         else
1162                         {
1163                                 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1164                         }
1165                 }
1166                 break;
1167
1168         case    ANTLR3_RECOGNITION_EXCEPTION:
1169
1170                 // Indicates that the recognizer received a token
1171                 // in the input that was not predicted. This is the basic exception type
1172                 // from which all others are derived. So we assume it was a syntax error.
1173                 // You may get this if there are not more tokens and more are needed
1174                 // to complete a parse for instance.
1175                 //
1176                 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1177                 break;
1178
1179         case    ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1180
1181                 // We were expecting to see one thing and got another. This is the
1182                 // most common error if we coudl not detect a missing or unwanted token.
1183                 // Here you can spend your efforts to
1184                 // derive more useful error messages based on the expected
1185                 // token set and the last token and so on. The error following
1186                 // bitmaps do a good job of reducing the set that we were looking
1187                 // for down to something small. Knowing what you are parsing may be
1188                 // able to allow you to be even more specific about an error.
1189                 //
1190                 if      (tokenNames == NULL)
1191                 {
1192                         ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1193                 }
1194                 else
1195                 {
1196                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1197                         {
1198                                 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1199                         }
1200                         else
1201                         {
1202                                 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1203                         }
1204                 }
1205                 break;
1206
1207         case    ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1208
1209                 // We could not pick any alt decision from the input given
1210                 // so god knows what happened - however when you examine your grammar,
1211                 // you should. It means that at the point where the current token occurred
1212                 // that the DFA indicates nowhere to go from here.
1213                 //
1214                 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1215
1216                 break;
1217
1218         case    ANTLR3_MISMATCHED_SET_EXCEPTION:
1219
1220                 {
1221                         ANTLR3_UINT32     count;
1222                         ANTLR3_UINT32     bit;
1223                         ANTLR3_UINT32     size;
1224                         ANTLR3_UINT32     numbits;
1225                         pANTLR3_BITSET    errBits;
1226
1227                         // This means we were able to deal with one of a set of
1228                         // possible tokens at this point, but we did not see any
1229                         // member of that set.
1230                         //
1231                         ANTLR3_FPRINTF(stderr, " : unexpected input...\n  expected one of : ");
1232
1233                         // What tokens could we have accepted at this point in the
1234                         // parse?
1235                         //
1236                         count   = 0;
1237                         errBits = antlr3BitsetLoad              (ex->expectingSet);
1238                         numbits = errBits->numBits              (errBits);
1239                         size    = errBits->size                 (errBits);
1240
1241                         if  (size > 0)
1242                         {
1243                                 // However many tokens we could have dealt with here, it is usually
1244                                 // not useful to print ALL of the set here. I arbitrarily chose 8
1245                                 // here, but you should do whatever makes sense for you of course.
1246                                 // No token number 0, so look for bit 1 and on.
1247                                 //
1248                                 for     (bit = 1; bit < numbits && count < 8 && count < size; bit++)
1249                                 {
1250                                         // TODO: This doesn;t look right - should be asking if the bit is set!!
1251                                         //
1252                                         if  (tokenNames[bit])
1253                                         {
1254                                                 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1255                                                 count++;
1256                                         }
1257                                 }
1258                                 ANTLR3_FPRINTF(stderr, "\n");
1259                         }
1260                         else
1261                         {
1262                                 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1263                                 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1264                         }
1265                 }
1266                 break;
1267
1268         case    ANTLR3_EARLY_EXIT_EXCEPTION:
1269
1270                 // We entered a loop requiring a number of token sequences
1271                 // but found a token that ended that sequence earlier than
1272                 // we should have done.
1273                 //
1274                 ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1275                 break;
1276
1277         default:
1278
1279                 // We don't handle any other exceptions here, but you can
1280                 // if you wish. If we get an exception that hits this point
1281                 // then we are just going to report what we know about the
1282                 // token.
1283                 //
1284                 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1285                 break;
1286         }
1287
1288         // Here you have the token that was in error which if this is
1289         // the standard implementation will tell you the line and offset
1290         // and also record the address of the start of the line in the
1291         // input stream. You could therefore print the source line and so on.
1292         // Generally though, I would expect that your lexer/parser will keep
1293         // its own map of lines and source pointers or whatever as there
1294         // are a lot of specific things you need to know about the input
1295         // to do something like that.
1296         // Here is where you do it though :-).
1297         //
1298 }
1299
1300 /// Return how many syntax errors were detected by this recognizer
1301 ///
1302 static ANTLR3_UINT32
1303 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1304 {
1305         return  recognizer->state->errorCount;
1306 }
1307
1308 /// Recover from an error found on the input stream.  Mostly this is
1309 /// NoViableAlt exceptions, but could be a mismatched token that
1310 /// the match() routine could not recover from.
1311 ///
1312 static void
1313 recover                     (pANTLR3_BASE_RECOGNIZER recognizer)
1314 {
1315     // Used to compute the follow set of tokens
1316     //
1317     pANTLR3_BITSET                      followSet;
1318     pANTLR3_PARSER                      parser;
1319     pANTLR3_TREE_PARSER     tparser;
1320     pANTLR3_INT_STREAM      is;
1321
1322     switch      (recognizer->type)
1323     {
1324                 case    ANTLR3_TYPE_PARSER:
1325
1326                 parser  = (pANTLR3_PARSER) (recognizer->super);
1327                 tparser = NULL;
1328                 is              = parser->tstream->istream;
1329
1330         break;
1331
1332     case        ANTLR3_TYPE_TREE_PARSER:
1333
1334                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1335                 parser  = NULL;
1336                 is              = tparser->ctnstream->tnstream->istream;
1337
1338         break;
1339
1340     default:
1341
1342                 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1343                 return;
1344
1345         break;
1346     }
1347
1348         // Are we about to repeat the same error?
1349         //
1350     if  (recognizer->state->lastErrorIndex == is->index(is))
1351     {
1352                 // The last error was at the same token index point. This must be a case
1353                 // where LT(1) is in the recovery token set so nothing is
1354                 // consumed. Consume a single token so at least to prevent
1355                 // an infinite loop; this is a failsafe.
1356                 //
1357                 is->consume(is);
1358     }
1359
1360     // Record error index position
1361     //
1362     recognizer->state->lastErrorIndex    = is->index(is);
1363
1364     // Work out the follows set for error recovery
1365     //
1366     followSet   = recognizer->computeErrorRecoverySet(recognizer);
1367
1368     // Call resync hook (for debuggers and so on)
1369     //
1370     recognizer->beginResync(recognizer);
1371
1372     // Consume tokens until we have resynced to something in the follows set
1373     //
1374     recognizer->consumeUntilSet(recognizer, followSet);
1375
1376     // End resync hook
1377     //
1378     recognizer->endResync(recognizer);
1379
1380     // Destroy the temporary bitset we produced.
1381     //
1382     followSet->free(followSet);
1383
1384     // Reset the inError flag so we don't re-report the exception
1385     //
1386     recognizer->state->error    = ANTLR3_FALSE;
1387     recognizer->state->failed   = ANTLR3_FALSE;
1388 }
1389
1390
1391 /// Attempt to recover from a single missing or extra token.
1392 ///
1393 /// EXTRA TOKEN
1394 ///
1395 /// LA(1) is not what we are looking for.  If LA(2) has the right token,
1396 /// however, then assume LA(1) is some extra spurious token.  Delete it
1397 /// and LA(2) as if we were doing a normal match(), which advances the
1398 /// input.
1399 ///
1400 /// MISSING TOKEN
1401 ///
1402 /// If current token is consistent with what could come after
1403 /// ttype then it is ok to "insert" the missing token, else throw
1404 /// exception For example, Input "i=(3;" is clearly missing the
1405 /// ')'.  When the parser returns from the nested call to expr, it
1406 /// will have call chain:
1407 ///
1408 ///    stat -> expr -> atom
1409 ///
1410 /// and it will be trying to match the ')' at this point in the
1411 /// derivation:
1412 ///
1413 ///       => ID '=' '(' INT ')' ('+' atom)* ';'
1414 ///                          ^
1415 /// match() will see that ';' doesn't match ')' and report a
1416 /// mismatched token error.  To recover, it sees that LA(1)==';'
1417 /// is in the set of tokens that can follow the ')' token
1418 /// reference in rule atom.  It can assume that you forgot the ')'.
1419 ///
1420 /// The exception that was passed in, in the java implementation is
1421 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1422 /// error flag and rules cascade back when this is set.
1423 ///
1424 static void *
1425 recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1426 {
1427         pANTLR3_PARSER                    parser;
1428         pANTLR3_TREE_PARSER           tparser;
1429         pANTLR3_INT_STREAM            is;
1430         void                                    * matchedSymbol;
1431
1432         // Invoke the debugger event if there is a debugger listening to us
1433         //
1434         if      (recognizer->debugger != NULL)
1435         {
1436                 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
1437         }
1438
1439         switch  (recognizer->type)
1440         {
1441         case    ANTLR3_TYPE_PARSER:
1442
1443                 parser  = (pANTLR3_PARSER) (recognizer->super);
1444                 tparser = NULL;
1445                 is      = parser->tstream->istream;
1446
1447                 break;
1448
1449         case    ANTLR3_TYPE_TREE_PARSER:
1450
1451                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1452                 parser  = NULL;
1453                 is      = tparser->ctnstream->tnstream->istream;
1454
1455                 break;
1456
1457         default:
1458
1459                 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1460                 return NULL;
1461
1462                 break;
1463         }
1464
1465         // Create an exception if we need one
1466         //
1467         if      (recognizer->state->exception == NULL)
1468         {
1469                 antlr3RecognitionExceptionNew(recognizer);
1470         }
1471
1472         // If the next token after the one we are looking at in the input stream
1473         // is what we are looking for then we remove the one we have discovered
1474         // from the stream by consuming it, then consume this next one along too as
1475         // if nothing had happened.
1476         //
1477         if      ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1478         {
1479                 recognizer->state->exception->type              = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1480                 recognizer->state->exception->message   = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1481
1482                 // Call resync hook (for debuggers and so on)
1483                 //
1484                 if      (recognizer->debugger != NULL)
1485                 {
1486                         recognizer->debugger->beginResync(recognizer->debugger);
1487                 }
1488
1489                 recognizer->beginResync(recognizer);
1490
1491                 // "delete" the extra token
1492                 //
1493                 recognizer->beginResync(recognizer);
1494                 is->consume(is);
1495                 recognizer->endResync(recognizer);
1496                 // End resync hook
1497                 //
1498                 if      (recognizer->debugger != NULL)
1499                 {
1500                         recognizer->debugger->endResync(recognizer->debugger);
1501                 }
1502
1503                 // Print out the error after we consume so that ANTLRWorks sees the
1504                 // token in the exception.
1505                 //
1506                 recognizer->reportError(recognizer);
1507
1508                 // Return the token we are actually matching
1509                 //
1510                 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1511
1512                 // Consume the token that the rule actually expected to get as if everything
1513                 // was hunky dory.
1514                 //
1515                 is->consume(is);
1516
1517                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1518
1519                 return  matchedSymbol;
1520         }
1521
1522         // Single token deletion (Unwanted above) did not work
1523         // so we see if we can insert a token instead by calculating which
1524         // token would be missing
1525         //
1526         if      (mismatchIsMissingToken(recognizer, is, follow))
1527         {
1528                 // We can fake the missing token and proceed
1529                 //
1530                 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1531                 recognizer->state->exception->type              = ANTLR3_MISSING_TOKEN_EXCEPTION;
1532                 recognizer->state->exception->message   = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1533                 recognizer->state->exception->token             = matchedSymbol;
1534                 recognizer->state->exception->expecting = ttype;
1535
1536                 // Print out the error after we insert so that ANTLRWorks sees the
1537                 // token in the exception.
1538                 //
1539                 recognizer->reportError(recognizer);
1540
1541                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1542
1543                 return  matchedSymbol;
1544         }
1545
1546
1547         // Neither deleting nor inserting tokens allows recovery
1548         // must just report the exception.
1549         //
1550         recognizer->state->error            = ANTLR3_TRUE;
1551         return NULL;
1552 }
1553
1554 static void *
1555 recoverFromMismatchedSet            (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1556 {
1557     pANTLR3_PARSER                      parser;
1558     pANTLR3_TREE_PARSER     tparser;
1559     pANTLR3_INT_STREAM      is;
1560         pANTLR3_COMMON_TOKEN    matchedSymbol;
1561
1562     switch      (recognizer->type)
1563     {
1564     case        ANTLR3_TYPE_PARSER:
1565
1566                 parser  = (pANTLR3_PARSER) (recognizer->super);
1567                 tparser = NULL;
1568                 is      = parser->tstream->istream;
1569
1570         break;
1571
1572     case        ANTLR3_TYPE_TREE_PARSER:
1573
1574                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1575                 parser  = NULL;
1576                 is      = tparser->ctnstream->tnstream->istream;
1577
1578         break;
1579
1580     default:
1581
1582                 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1583                 return NULL;
1584
1585         break;
1586     }
1587
1588         if      (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1589         {
1590                 // We can fake the missing token and proceed
1591                 //
1592                 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1593                 recognizer->state->exception->type      = ANTLR3_MISSING_TOKEN_EXCEPTION;
1594                 recognizer->state->exception->token     = matchedSymbol;
1595
1596                 // Print out the error after we insert so that ANTLRWorks sees the
1597                 // token in the exception.
1598                 //
1599                 recognizer->reportError(recognizer);
1600
1601                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1602
1603                 return  matchedSymbol;
1604         }
1605
1606     // TODO - Single token deletion like in recoverFromMismatchedToken()
1607     //
1608     recognizer->state->error    = ANTLR3_TRUE;
1609         recognizer->state->failed       = ANTLR3_TRUE;
1610         return NULL;
1611 }
1612
1613 /// This code is factored out from mismatched token and mismatched set
1614 ///  recovery.  It handles "single token insertion" error recovery for
1615 /// both.  No tokens are consumed to recover from insertions.  Return
1616 /// true if recovery was possible else return false.
1617 ///
1618 static ANTLR3_BOOLEAN
1619 recoverFromMismatchedElement        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1620 {
1621     pANTLR3_BITSET          viableToksFollowingRule;
1622     pANTLR3_BITSET          follow;
1623     pANTLR3_PARSER          parser;
1624     pANTLR3_TREE_PARSER     tparser;
1625     pANTLR3_INT_STREAM      is;
1626
1627     switch      (recognizer->type)
1628     {
1629     case        ANTLR3_TYPE_PARSER:
1630
1631                 parser  = (pANTLR3_PARSER) (recognizer->super);
1632                 tparser = NULL;
1633                 is      = parser->tstream->istream;
1634
1635         break;
1636
1637     case        ANTLR3_TYPE_TREE_PARSER:
1638
1639                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1640                 parser  = NULL;
1641                 is      = tparser->ctnstream->tnstream->istream;
1642
1643         break;
1644
1645     default:
1646
1647                 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1648                 return ANTLR3_FALSE;
1649
1650         break;
1651     }
1652
1653     follow      = antlr3BitsetLoad(followBits);
1654
1655     if  (follow == NULL)
1656     {
1657                 /* The follow set is NULL, which means we don't know what can come
1658                  * next, so we "hit and hope" by just signifying that we cannot
1659                  * recover, which will just cause the next token to be consumed,
1660                  * which might dig us out.
1661                  */
1662                 return  ANTLR3_FALSE;
1663     }
1664
1665     /* We have a bitmap for the follow set, hence we can compute
1666      * what can follow this grammar element reference.
1667      */
1668     if  (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1669     {
1670                 /* First we need to know which of the available tokens are viable
1671                  * to follow this reference.
1672                  */
1673                 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
1674
1675                 /* Remove the EOR token, which we do not wish to compute with
1676                  */
1677                 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1678                 viableToksFollowingRule->free(viableToksFollowingRule);
1679                 /* We now have the computed set of what can follow the current token
1680                  */
1681     }
1682
1683     /* We can now see if the current token works with the set of tokens
1684      * that could follow the current grammar reference. If it looks like it
1685      * is consistent, then we can "insert" that token by not throwing
1686      * an exception and assuming that we saw it.
1687      */
1688     if  ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1689     {
1690                 /* report the error, but don't cause any rules to abort and stuff
1691                  */
1692                 recognizer->reportError(recognizer);
1693                 if      (follow != NULL)
1694                 {
1695                         follow->free(follow);
1696                 }
1697                 recognizer->state->error                        = ANTLR3_FALSE;
1698                 recognizer->state->failed                       = ANTLR3_FALSE;
1699                 return ANTLR3_TRUE;     /* Success in recovery  */
1700     }
1701
1702     if  (follow != NULL)
1703     {
1704                 follow->free(follow);
1705     }
1706
1707     /* We could not find anything viable to do, so this is going to
1708      * cause an exception.
1709      */
1710     return  ANTLR3_FALSE;
1711 }
1712
1713 /// Eat tokens from the input stream until we get one of JUST the right type
1714 ///
1715 static void
1716 consumeUntil    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1717 {
1718     ANTLR3_UINT32                       ttype;
1719     pANTLR3_PARSER                      parser;
1720     pANTLR3_TREE_PARSER     tparser;
1721     pANTLR3_INT_STREAM      is;
1722
1723     switch      (recognizer->type)
1724     {
1725                 case    ANTLR3_TYPE_PARSER:
1726
1727                         parser  = (pANTLR3_PARSER) (recognizer->super);
1728                         tparser = NULL;
1729                         is      = parser->tstream->istream;
1730
1731                         break;
1732
1733                 case    ANTLR3_TYPE_TREE_PARSER:
1734
1735                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1736                         parser  = NULL;
1737                         is      = tparser->ctnstream->tnstream->istream;
1738
1739                         break;
1740
1741                 default:
1742
1743                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1744                         return;
1745
1746                         break;
1747     }
1748
1749     // What do have at the moment?
1750     //
1751     ttype       = is->_LA(is, 1);
1752
1753     // Start eating tokens until we get to the one we want.
1754     //
1755     while   (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1756     {
1757                 is->consume(is);
1758                 ttype   = is->_LA(is, 1);
1759     }
1760 }
1761
1762 /// Eat tokens from the input stream until we find one that
1763 /// belongs to the supplied set.
1764 ///
1765 static void
1766 consumeUntilSet                     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1767 {
1768     ANTLR3_UINT32           ttype;
1769     pANTLR3_PARSER          parser;
1770     pANTLR3_TREE_PARSER     tparser;
1771     pANTLR3_INT_STREAM      is;
1772
1773     switch      (recognizer->type)
1774     {
1775                 case    ANTLR3_TYPE_PARSER:
1776
1777                         parser  = (pANTLR3_PARSER) (recognizer->super);
1778                         tparser = NULL;
1779                         is      = parser->tstream->istream;
1780
1781                         break;
1782
1783                 case    ANTLR3_TYPE_TREE_PARSER:
1784
1785                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1786                         parser  = NULL;
1787                         is      = tparser->ctnstream->tnstream->istream;
1788
1789                         break;
1790
1791                 default:
1792
1793                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1794                         return;
1795
1796                         break;
1797     }
1798
1799     // What do have at the moment?
1800     //
1801     ttype       = is->_LA(is, 1);
1802
1803     // Start eating tokens until we get to one we want.
1804     //
1805     while   (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1806     {
1807                 is->consume(is);
1808                 ttype   = is->_LA(is, 1);
1809     }
1810 }
1811
1812 /** Return the rule invocation stack (how we got here in the parse.
1813  *  In the java version Ter just asks the JVM for all the information
1814  *  but in C we don't get this information, so I am going to do nothing
1815  *  right now.
1816  */
1817 static pANTLR3_STACK
1818 getRuleInvocationStack              (pANTLR3_BASE_RECOGNIZER recognizer)
1819 {
1820     return NULL;
1821 }
1822
1823 static pANTLR3_STACK
1824 getRuleInvocationStackNamed         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1825 {
1826     return NULL;
1827 }
1828
1829 /** Convenience method for template rewrites - NYI.
1830  */
1831 static pANTLR3_HASH_TABLE
1832 toStrings                           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1833 {
1834     return NULL;
1835 }
1836
1837 static  void ANTLR3_CDECL
1838 freeIntTrie    (void * trie)
1839 {
1840     ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1841 }
1842
1843
1844 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1845  *  start index before. If the rule has not parsed input starting from the supplied start index,
1846  *  then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1847  *  then it will return the point where it last stopped parsing after that start point.
1848  *
1849  * \remark
1850  * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1851  * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1852  * version of the table.
1853  */
1854 static ANTLR3_MARKER
1855 getRuleMemoization                  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1856 {
1857     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1858      */
1859     pANTLR3_INT_TRIE    ruleList;
1860     ANTLR3_MARKER       stopIndex;
1861     pANTLR3_TRIE_ENTRY  entry;
1862
1863     /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1864      * as we will need it eventually if we are being asked for the memo here.
1865      */
1866     entry       = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1867
1868     if  (entry == NULL)
1869     {
1870                 /* Did not find it, so create a new one for it, with a bit depth based on the
1871                  * size of the input stream. We need the bit depth to incorporate the number if
1872                  * bits required to represent the largest possible stop index in the input, which is the
1873                  * last character. An int stream is free to return the largest 64 bit offset if it has
1874                  * no idea of the size, but you should remember that this will cause the leftmost
1875                  * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1876                  */
1877                 ruleList    = antlr3IntTrieNew(63);     /* Depth is theoretically 64 bits, but probably not ;-) */
1878
1879                 if (ruleList != NULL)
1880                 {
1881                         recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1882                 }
1883
1884                 /* We cannot have a stopIndex in a trie we have just created of course
1885                  */
1886                 return  MEMO_RULE_UNKNOWN;
1887     }
1888
1889     ruleList    = (pANTLR3_INT_TRIE) (entry->data.ptr);
1890
1891     /* See if there is a stop index associated with the supplied start index.
1892      */
1893     stopIndex   = 0;
1894
1895     entry = ruleList->get(ruleList, ruleParseStart);
1896     if (entry != NULL)
1897     {
1898                 stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1899     }
1900
1901     if  (stopIndex == 0)
1902     {
1903                 return MEMO_RULE_UNKNOWN;
1904     }
1905
1906     return  stopIndex;
1907 }
1908
1909 /** Has this rule already parsed input at the current index in the
1910  *  input stream?  Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1911  *  if we have not.
1912  *
1913  *  This method has a side-effect: if we have seen this input for
1914  *  this rule and successfully parsed before, then seek ahead to
1915  *  1 past the stop token matched for this rule last time.
1916  */
1917 static ANTLR3_BOOLEAN
1918 alreadyParsedRule                   (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1919 {
1920     ANTLR3_MARKER                       stopIndex;
1921     pANTLR3_LEXER                       lexer;
1922     pANTLR3_PARSER                      parser;
1923     pANTLR3_TREE_PARSER     tparser;
1924     pANTLR3_INT_STREAM      is;
1925
1926     switch      (recognizer->type)
1927     {
1928                 case    ANTLR3_TYPE_PARSER:
1929
1930                         parser  = (pANTLR3_PARSER) (recognizer->super);
1931                         tparser = NULL;
1932                         lexer   = NULL;
1933                         is      = parser->tstream->istream;
1934
1935                         break;
1936
1937                 case    ANTLR3_TYPE_TREE_PARSER:
1938
1939                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1940                         parser  = NULL;
1941                         lexer   = NULL;
1942                         is      = tparser->ctnstream->tnstream->istream;
1943
1944                         break;
1945
1946                 case    ANTLR3_TYPE_LEXER:
1947
1948                         lexer   = (pANTLR3_LEXER)   (recognizer->super);
1949                         parser  = NULL;
1950                         tparser = NULL;
1951                         is      = lexer->input->istream;
1952                         break;
1953
1954                 default:
1955
1956                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1957                         return ANTLR3_FALSE;
1958
1959                         break;
1960     }
1961
1962     /* See if we have a memo marker for this.
1963      */
1964     stopIndex       = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1965
1966     if  (stopIndex  == MEMO_RULE_UNKNOWN)
1967     {
1968                 return ANTLR3_FALSE;
1969     }
1970
1971     if  (stopIndex == MEMO_RULE_FAILED)
1972     {
1973                 recognizer->state->failed = ANTLR3_TRUE;
1974     }
1975     else
1976     {
1977                 is->seek(is, stopIndex+1);
1978     }
1979
1980     /* If here then the rule was executed for this input already
1981      */
1982     return  ANTLR3_TRUE;
1983 }
1984
1985 /** Record whether or not this rule parsed the input at this position
1986  *  successfully.
1987  */
1988 static void
1989 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1990 {
1991     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1992      */
1993     pANTLR3_INT_TRIE        ruleList;
1994     pANTLR3_TRIE_ENTRY      entry;
1995     ANTLR3_MARKER           stopIndex;
1996     pANTLR3_LEXER           lexer;
1997     pANTLR3_PARSER          parser;
1998     pANTLR3_TREE_PARSER     tparser;
1999     pANTLR3_INT_STREAM      is;
2000
2001     switch      (recognizer->type)
2002     {
2003                 case    ANTLR3_TYPE_PARSER:
2004
2005                         parser  = (pANTLR3_PARSER) (recognizer->super);
2006                         tparser = NULL;
2007                         is      = parser->tstream->istream;
2008
2009                         break;
2010
2011                 case    ANTLR3_TYPE_TREE_PARSER:
2012
2013                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2014                         parser  = NULL;
2015                         is      = tparser->ctnstream->tnstream->istream;
2016
2017                         break;
2018
2019                 case    ANTLR3_TYPE_LEXER:
2020
2021                         lexer   = (pANTLR3_LEXER)   (recognizer->super);
2022                         parser  = NULL;
2023                         tparser = NULL;
2024                         is              = lexer->input->istream;
2025                         break;
2026
2027                 default:
2028
2029                         ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2030                         return;
2031
2032                         break;
2033     }
2034
2035     stopIndex   = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2036
2037     entry       = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2038
2039     if  (entry != NULL)
2040     {
2041                 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2042
2043                 /* If we don't already have this entry, append it. The memoize trie does not
2044                  * accept duplicates so it won't add it if already there and we just ignore the
2045                  * return code as we don't care if it is there already.
2046                  */
2047                 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2048     }
2049 }
2050 /** A syntactic predicate.  Returns true/false depending on whether
2051  *  the specified grammar fragment matches the current input stream.
2052  *  This resets the failed instance var afterwards.
2053  */
2054 static ANTLR3_BOOLEAN
2055 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2056 {
2057     ANTLR3_MARKER   start;
2058     pANTLR3_PARSER          parser;
2059     pANTLR3_TREE_PARSER     tparser;
2060     pANTLR3_INT_STREAM      is;
2061
2062     switch      (recognizer->type)
2063     {
2064                 case    ANTLR3_TYPE_PARSER:
2065
2066                         parser  = (pANTLR3_PARSER) (recognizer->super);
2067                         tparser = NULL;
2068                         is      = parser->tstream->istream;
2069
2070                         break;
2071
2072                 case    ANTLR3_TYPE_TREE_PARSER:
2073
2074                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2075                         parser  = NULL;
2076                         is      = tparser->ctnstream->tnstream->istream;
2077
2078                         break;
2079
2080                 default:
2081
2082                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2083                         return ANTLR3_FALSE;
2084
2085                         break;
2086     }
2087
2088     /* Begin backtracking so we can get back to where we started after trying out
2089      * the syntactic predicate.
2090      */
2091     start   = is->mark(is);
2092     recognizer->state->backtracking++;
2093
2094     /* Try the syntactical predicate
2095      */
2096     predicate(ctx);
2097
2098     /* Reset
2099      */
2100     is->rewind(is, start);
2101     recognizer->state->backtracking--;
2102
2103     if  (recognizer->state->failed == ANTLR3_TRUE)
2104     {
2105                 /* Predicate failed
2106                  */
2107                 recognizer->state->failed = ANTLR3_FALSE;
2108                 return  ANTLR3_FALSE;
2109     }
2110     else
2111     {
2112                 /* Predicate was successful
2113                  */
2114                 recognizer->state->failed       = ANTLR3_FALSE;
2115                 return  ANTLR3_TRUE;
2116     }
2117 }
2118
2119 static void
2120 reset(pANTLR3_BASE_RECOGNIZER recognizer)
2121 {
2122     if  (recognizer->state->following != NULL)
2123     {
2124                 recognizer->state->following->free(recognizer->state->following);
2125     }
2126
2127         // Reset the state flags
2128         //
2129         recognizer->state->errorRecovery        = ANTLR3_FALSE;
2130         recognizer->state->lastErrorIndex       = -1;
2131         recognizer->state->failed                       = ANTLR3_FALSE;
2132         recognizer->state->errorCount           = 0;
2133         recognizer->state->backtracking         = 0;
2134         recognizer->state->following            = NULL;
2135
2136         if      (recognizer->state != NULL)
2137         {
2138                 if      (recognizer->state->ruleMemo != NULL)
2139                 {
2140                         recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2141                         recognizer->state->ruleMemo = antlr3IntTrieNew(15);     /* 16 bit depth is enough for 32768 rules! */
2142                 }
2143         }
2144
2145
2146     // Install a new following set
2147     //
2148     recognizer->state->following   = antlr3StackNew(8);
2149
2150 }
2151
2152 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2153 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2154 //
2155 static void *
2156 getCurrentInputSymbol           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2157 {
2158         return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2159 }
2160
2161 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2162 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2163 //
2164 static void *
2165 getMissingSymbol                        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION      e,
2166                                                                         ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2167 {
2168         pANTLR3_TOKEN_STREAM                    ts;
2169         pANTLR3_COMMON_TOKEN_STREAM             cts;
2170         pANTLR3_COMMON_TOKEN                    token;
2171         pANTLR3_COMMON_TOKEN                    current;
2172         pANTLR3_STRING                                  text;
2173
2174         // Dereference the standard pointers
2175         //
2176         ts              = (pANTLR3_TOKEN_STREAM)istream->super;
2177         cts             = (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
2178
2179         // Work out what to use as the current symbol to make a line and offset etc
2180         // If we are at EOF, we use the token before EOF
2181         //
2182         current = ts->_LT(ts, 1);
2183         if      (current->getType(current) == ANTLR3_TOKEN_EOF)
2184         {
2185                 current = ts->_LT(ts, -1);
2186         }
2187
2188         // Create a new empty token
2189         //
2190         if      (recognizer->state->tokFactory == NULL)
2191         {
2192                 // We don't yet have a token factory for making tokens
2193                 // we just need a fake one using the input stream of the current
2194                 // token.
2195                 //
2196                 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2197         }
2198         token   = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2199
2200         // Set some of the token properties based on the current token
2201         //
2202         token->setLine                                  (token, current->getLine(current));
2203         token->setCharPositionInLine    (token, current->getCharPositionInLine(current));
2204         token->setChannel                               (token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
2205         token->setType                                  (token, expectedTokenType);
2206     token->user1                    = current->user1;
2207     token->user2                    = current->user2;
2208     token->user3                    = current->user3;
2209     token->custom                   = current->custom;
2210     token->lineStart                = current->lineStart;
2211
2212         // Create the token text that shows it has been inserted
2213         //
2214         token->setText8(token, (pANTLR3_UINT8)"<missing ");
2215         text = token->getText(token);
2216
2217         if      (text != NULL)
2218         {
2219                 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2220                 text->append8(text, (const char *)">");
2221         }
2222
2223         // Finally return the pointer to our new token
2224         //
2225         return  token;
2226 }
2227
2228
2229 #ifdef  ANTLR3_WINDOWS
2230 #pragma warning( default : 4100 )
2231 #endif
2232
2233 /// @}
2234 ///
2235