]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.databoard/cpp/DataBoardTest/libantlr3c-3.2/src/antlr3baserecognizer.c
Fixed all line endings of the repository
[simantics/platform.git] / bundles / org.simantics.databoard / cpp / DataBoardTest / libantlr3c-3.2 / src / antlr3baserecognizer.c
1 /** \file
2  * Contains the base functions that all recognizers require.
3  * Any function can be overridden by a lexer/parser/tree parser or by the
4  * ANTLR3 programmer.
5  * 
6  * \addtogroup pANTLR3_BASE_RECOGNIZER
7  * @{
8  */
9 #include    <antlr3baserecognizer.h>
10
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
15 //
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
20 // are met:
21 // 1. Redistributions of source code must retain the above copyright
22 //    notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 //    notice, this list of conditions and the following disclaimer in the
25 //    documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 //    derived from this software without specific prior written permission.
28 //
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
40 #ifdef  ANTLR3_WINDOWS
41 #pragma warning( disable : 4100 )
42 #endif
43
44 /* Interface functions -standard implementations cover parser and treeparser
45  * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
46  * most of these functions.
47  */
48 static void                                     beginResync                                     (pANTLR3_BASE_RECOGNIZER recognizer);
49 static pANTLR3_BITSET           computeErrorRecoverySet     (pANTLR3_BASE_RECOGNIZER recognizer);
50 static void                                     endResync                                       (pANTLR3_BASE_RECOGNIZER recognizer);
51 static void                                     beginBacktrack                          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
52 static void                                     endBacktrack                            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
53
54 static void *                           match                                           (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
55 static void                                     matchAny                                        (pANTLR3_BASE_RECOGNIZER recognizer);
56 static void                                     mismatch                                        (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
57 static ANTLR3_BOOLEAN           mismatchIsUnwantedToken         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
58 static ANTLR3_BOOLEAN           mismatchIsMissingToken          (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
59 static void                                     reportError                                     (pANTLR3_BASE_RECOGNIZER recognizer);
60 static pANTLR3_BITSET           computeCSRuleFollow                     (pANTLR3_BASE_RECOGNIZER recognizer);
61 static pANTLR3_BITSET           combineFollows                          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
62 static void                                     displayRecognitionError     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
63 static void                                     recover                                         (pANTLR3_BASE_RECOGNIZER recognizer);
64 static void     *                               recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
65 static void     *                               recoverFromMismatchedSet    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
66 static ANTLR3_BOOLEAN           recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
67 static void                                     consumeUntil                            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
68 static void                                     consumeUntilSet                         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
69 static pANTLR3_STACK            getRuleInvocationStack      (pANTLR3_BASE_RECOGNIZER recognizer);
70 static pANTLR3_STACK            getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
71 static pANTLR3_HASH_TABLE       toStrings                                       (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
72 static ANTLR3_MARKER            getRuleMemoization                      (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
73 static ANTLR3_BOOLEAN           alreadyParsedRule                       (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
74 static void                                     memoize                                         (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
75 static ANTLR3_BOOLEAN           synpred                                         (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
76 static void                                     reset                                           (pANTLR3_BASE_RECOGNIZER recognizer);
77 static void                                     freeBR                                          (pANTLR3_BASE_RECOGNIZER recognizer);
78 static void *                           getCurrentInputSymbol           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
79 static void *                           getMissingSymbol                        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION      e,
80                                                                                                                         ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
81 static ANTLR3_UINT32            getNumberOfSyntaxErrors         (pANTLR3_BASE_RECOGNIZER recognizer);
82
83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
85 {
86     pANTLR3_BASE_RECOGNIZER recognizer;
87
88     // Allocate memory for the structure
89     //
90     recognizer      = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
91
92     if  (recognizer == NULL)
93     {
94                 // Allocation failed
95                 //
96                 return  NULL;
97     }
98
99         
100         // If we have been supplied with a pre-existing recognizer state
101         // then we just install it, otherwise we must create one from scratch
102         //
103         if      (state == NULL)
104         {
105                 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
106
107                 if      (recognizer->state == NULL)
108                 {
109                         ANTLR3_FREE(recognizer);
110                         return  NULL;
111                 }
112
113                 // Initialize any new recognizer state
114                 //
115                 recognizer->state->errorRecovery        = ANTLR3_FALSE;
116                 recognizer->state->lastErrorIndex       = -1;
117                 recognizer->state->failed               = ANTLR3_FALSE;
118                 recognizer->state->errorCount           = 0;
119                 recognizer->state->backtracking         = 0;
120                 recognizer->state->following            = NULL;
121                 recognizer->state->ruleMemo             = NULL;
122                 recognizer->state->tokenNames           = NULL;
123                 recognizer->state->sizeHint             = sizeHint;
124                 recognizer->state->tokSource            = NULL;
125                 recognizer->state->tokFactory           = NULL;
126
127                 // Rather than check to see if we must initialize
128                 // the stack every time we are asked for an new rewrite stream
129                 // we just always create an empty stack and then just
130                 // free it when the base recognizer is freed.
131                 //
132                 recognizer->state->rStreams             = antlr3VectorNew(0);  // We don't know the size.
133
134                 if      (recognizer->state->rStreams == NULL)
135                 {
136                         // Out of memory
137                         //
138                         ANTLR3_FREE(recognizer->state);
139                         ANTLR3_FREE(recognizer);
140                         return  NULL;
141                 }
142         }
143         else
144         {
145                 // Install the one we were given, and do not reset it here
146                 // as it will either already have been initialized or will
147                 // be in a state that needs to be preserved.
148                 //
149                 recognizer->state = state;
150         }
151                 
152     // Install the BR API
153     //
154     recognizer->alreadyParsedRule           = alreadyParsedRule;
155     recognizer->beginResync                 = beginResync;
156     recognizer->combineFollows              = combineFollows;
157     recognizer->beginBacktrack              = beginBacktrack;
158     recognizer->endBacktrack                = endBacktrack;
159     recognizer->computeCSRuleFollow         = computeCSRuleFollow;
160     recognizer->computeErrorRecoverySet     = computeErrorRecoverySet;
161     recognizer->consumeUntil                = consumeUntil;
162     recognizer->consumeUntilSet             = consumeUntilSet;
163     recognizer->displayRecognitionError     = displayRecognitionError;
164     recognizer->endResync                   = endResync;
165     recognizer->exConstruct                 = antlr3MTExceptionNew;
166     recognizer->getRuleInvocationStack      = getRuleInvocationStack;
167     recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
168     recognizer->getRuleMemoization          = getRuleMemoization;
169     recognizer->match                       = match;
170     recognizer->matchAny                    = matchAny;
171     recognizer->memoize                     = memoize;
172     recognizer->mismatch                    = mismatch;
173     recognizer->mismatchIsUnwantedToken     = mismatchIsUnwantedToken;
174     recognizer->mismatchIsMissingToken      = mismatchIsMissingToken;
175     recognizer->recover                     = recover;
176     recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
177     recognizer->recoverFromMismatchedSet    = recoverFromMismatchedSet;
178     recognizer->recoverFromMismatchedToken  = recoverFromMismatchedToken;
179     recognizer->getNumberOfSyntaxErrors     = getNumberOfSyntaxErrors;
180     recognizer->reportError                 = reportError;
181     recognizer->reset                       = reset;
182     recognizer->synpred                     = synpred;
183     recognizer->toStrings                   = toStrings;
184     recognizer->getCurrentInputSymbol       = getCurrentInputSymbol;
185     recognizer->getMissingSymbol            = getMissingSymbol;
186     recognizer->debugger                    = NULL;
187
188     recognizer->free = freeBR;
189
190     /* Initialize variables
191      */
192     recognizer->type                    = type;
193
194
195     return  recognizer;
196 }
197 static void     
198 freeBR      (pANTLR3_BASE_RECOGNIZER recognizer)
199 {
200     pANTLR3_EXCEPTION thisE;
201
202         // Did we have a state allocated?
203         //
204         if      (recognizer->state != NULL)
205         {
206                 // Free any rule memoization we set up
207                 //
208                 if      (recognizer->state->ruleMemo != NULL)
209                 {
210                         recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
211                         recognizer->state->ruleMemo = NULL;
212                 }
213
214                 // Free any exception space we have left around
215                 //
216                 thisE = recognizer->state->exception;
217                 if      (thisE != NULL)
218                 {
219                         thisE->freeEx(thisE);
220                 }
221
222                 // Free any rewrite streams we have allocated
223                 //
224                 if      (recognizer->state->rStreams != NULL)
225                 {
226                         recognizer->state->rStreams->free(recognizer->state->rStreams);
227                 }
228
229                 // Free up any token factory we created (error recovery for instance)
230                 //
231                 if      (recognizer->state->tokFactory != NULL)
232                 {
233                         recognizer->state->tokFactory->close(recognizer->state->tokFactory);
234                 }
235                 // Free the shared state memory
236                 //
237                 ANTLR3_FREE(recognizer->state);
238         }
239
240         // Free the actual recognizer space
241         //
242     ANTLR3_FREE(recognizer);
243 }
244
245 /**
246  * Creates a new Mismatched Token Exception and inserts in the recognizer
247  * exception stack.
248  * 
249  * \param recognizer
250  * Context pointer for this recognizer
251  * 
252  */
253 ANTLR3_API      void
254 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
255 {
256     /* Create a basic recognition exception structure
257      */
258     antlr3RecognitionExceptionNew(recognizer);
259
260     /* Now update it to indicate this is a Mismatched token exception
261      */
262     recognizer->state->exception->name          = ANTLR3_MISMATCHED_EX_NAME;
263     recognizer->state->exception->type          = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
264
265     return;
266 }
267
268 ANTLR3_API      void
269 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
270 {
271         pANTLR3_EXCEPTION                               ex;
272         pANTLR3_LEXER                                   lexer;
273         pANTLR3_PARSER                                  parser;
274         pANTLR3_TREE_PARSER                             tparser;
275
276         pANTLR3_INPUT_STREAM                    ins;
277         pANTLR3_INT_STREAM                              is;
278         pANTLR3_COMMON_TOKEN_STREAM         cts;
279         pANTLR3_TREE_NODE_STREAM            tns;
280
281         ins         = NULL;
282         cts         = NULL;
283         tns         = NULL;
284         is          = NULL;
285         lexer   = NULL;
286         parser  = NULL;
287         tparser = NULL;
288
289         switch  (recognizer->type)
290         {
291         case    ANTLR3_TYPE_LEXER:
292
293                 lexer   = (pANTLR3_LEXER) (recognizer->super);
294                 ins     = lexer->input;
295                 is      = ins->istream;
296
297                 break;
298
299         case    ANTLR3_TYPE_PARSER:
300
301                 parser  = (pANTLR3_PARSER) (recognizer->super);
302                 cts     = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
303                 is      = parser->tstream->istream;
304
305                 break;
306
307         case    ANTLR3_TYPE_TREE_PARSER:
308
309                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
310                 tns     = tparser->ctnstream->tnstream;
311                 is      = tns->istream;
312
313                 break;
314
315         default:
316
317                 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
318                 return;
319
320                 break;
321         }
322
323         /* Create a basic exception structure
324          */
325         ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
326                 (void *)ANTLR3_RECOGNITION_EX_NAME,
327                 NULL,
328                 ANTLR3_FALSE);
329
330         /* Rest of information depends on the base type of the 
331          * input stream.
332          */
333         switch  (is->type & ANTLR3_INPUT_MASK)
334         {
335         case    ANTLR3_CHARSTREAM:
336
337                 ex->c                   = is->_LA                       (is, 1);                                        /* Current input character                      */
338                 ex->line                = ins->getLine                  (ins);                                          /* Line number comes from stream                */
339                 ex->charPositionInLine  = ins->getCharPositionInLine    (ins);      /* Line offset also comes from the stream   */
340                 ex->index               = is->index                     (is);
341                 ex->streamName          = ins->fileName;
342                 ex->message             = "Unexpected character";
343                 break;
344
345         case    ANTLR3_TOKENSTREAM:
346
347                 ex->token               = cts->tstream->_LT                                             (cts->tstream, 1);          /* Current input token                          */
348                 ex->line                = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine                  (ex->token);
349                 ex->charPositionInLine  = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine    (ex->token);
350                 ex->index               = cts->tstream->istream->index                                  (cts->tstream->istream);
351                 if      (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
352                 {
353                         ex->streamName          = NULL;
354                 }
355                 else
356                 {
357                         ex->streamName          = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
358                 }
359                 ex->message             = "Unexpected token";
360                 break;
361
362         case    ANTLR3_COMMONTREENODE:
363
364                 ex->token               = tns->_LT                                                  (tns, 1);       /* Current input tree node                      */
365                 ex->line                = ((pANTLR3_BASE_TREE)(ex->token))->getLine                 (ex->token);
366                 ex->charPositionInLine  = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine   (ex->token);
367                 ex->index               = tns->istream->index                                       (tns->istream);
368
369                 // Are you ready for this? Deep breath now...
370                 //
371                 {
372                         pANTLR3_COMMON_TREE tnode;
373
374                         tnode           = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
375
376                         if      (tnode->token    == NULL)
377                         {
378                                 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
379                         }
380                         else
381                         {
382                                 if      (tnode->token->input == NULL)
383                                 {
384                                         ex->streamName          = NULL;
385                                 }
386                                 else
387                                 {
388                                         ex->streamName          = tnode->token->input->fileName;
389                                 }
390                         }
391                         ex->message             = "Unexpected node";
392                 }
393                 break;
394         }
395
396         ex->input                                               = is;
397         ex->nextException                               = recognizer->state->exception; /* So we don't leak the memory */
398         recognizer->state->exception    = ex;
399         recognizer->state->error            = ANTLR3_TRUE;          /* Exception is outstanding */
400
401         return;
402 }
403
404
405 /// Match current input symbol against ttype.  Upon error, do one token
406 /// insertion or deletion if possible.  
407 /// To turn off single token insertion or deletion error
408 /// recovery, override mismatchRecover() and have it call
409 /// plain mismatch(), which does not recover.  Then any error
410 /// in a rule will cause an exception and immediate exit from
411 /// rule.  Rule would recover by resynchronizing to the set of
412 /// symbols that can follow rule ref.
413 ///
414 static void *
415 match(  pANTLR3_BASE_RECOGNIZER recognizer,
416                 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
417 {
418     pANTLR3_PARSER                      parser;
419     pANTLR3_TREE_PARSER     tparser;
420     pANTLR3_INT_STREAM      is;
421         void                                    * matchedSymbol;
422
423     switch      (recognizer->type)
424     {
425                 case    ANTLR3_TYPE_PARSER:
426
427                         parser  = (pANTLR3_PARSER) (recognizer->super);
428                         tparser = NULL;
429                         is      = parser->tstream->istream;
430
431                         break;
432
433                 case    ANTLR3_TYPE_TREE_PARSER:
434
435                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
436                         parser  = NULL;
437                         is      = tparser->ctnstream->tnstream->istream;
438
439                         break;
440
441                 default:
442                     
443                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
444                         return ANTLR3_FALSE;
445
446                         break;
447     }
448
449         // Pick up the current input token/node for assignment to labels
450         //
451         matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
452
453     if  (is->_LA(is, 1) == ttype)
454     {
455                 // The token was the one we were told to expect
456                 //
457                 is->consume(is);                                                                        // Consume that token from the stream
458                 recognizer->state->errorRecovery        = ANTLR3_FALSE; // Not in error recovery now (if we were)
459                 recognizer->state->failed                       = ANTLR3_FALSE; // The match was a success
460                 return matchedSymbol;                                                           // We are done
461     }
462
463     // We did not find the expected token type, if we are backtracking then
464     // we just set the failed flag and return.
465     //
466     if  (recognizer->state->backtracking > 0)
467     {
468                 // Backtracking is going on
469                 //
470                 recognizer->state->failed  = ANTLR3_TRUE;
471                 return matchedSymbol;
472         }
473
474     // We did not find the expected token and there is no backtracking
475     // going on, so we mismatch, which creates an exception in the recognizer exception
476     // stack.
477     //
478         matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
479     return matchedSymbol;
480 }
481
482 /// Consumes the next token, whatever it is, and resets the recognizer state
483 /// so that it is not in error.
484 ///
485 /// \param recognizer
486 /// Recognizer context pointer
487 ///
488 static void
489 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
490 {
491     pANTLR3_PARSER          parser;
492     pANTLR3_TREE_PARSER     tparser;
493     pANTLR3_INT_STREAM      is;
494
495     switch      (recognizer->type)
496     {
497                 case    ANTLR3_TYPE_PARSER:
498
499                         parser  = (pANTLR3_PARSER) (recognizer->super);
500                         tparser = NULL;
501                         is      = parser->tstream->istream;
502
503                         break;
504
505                 case    ANTLR3_TYPE_TREE_PARSER:
506
507                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
508                         parser  = NULL;
509                         is      = tparser->ctnstream->tnstream->istream;
510
511                         break;
512
513                 default:
514                     
515                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
516                         return;
517
518                 break;
519     }
520     recognizer->state->errorRecovery    = ANTLR3_FALSE;
521     recognizer->state->failed               = ANTLR3_FALSE;
522     is->consume(is);
523
524     return;
525 }
526 ///
527 ///
528 static ANTLR3_BOOLEAN
529 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
530 {
531         ANTLR3_UINT32 nextt;
532
533         nextt = is->_LA(is, 2);
534
535         if      (nextt == ttype)
536         {
537                 if      (recognizer->state->exception != NULL)
538                 {
539                         recognizer->state->exception->expecting = nextt;
540                 }
541                 return ANTLR3_TRUE;             // This token is unknown, but the next one is the one we wanted
542         }
543         else
544         {
545                 return ANTLR3_FALSE;    // Neither this token, nor the one following is the one we wanted
546         }
547 }
548
549 ///
550 ///
551 static ANTLR3_BOOLEAN
552 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
553 {
554         ANTLR3_BOOLEAN  retcode;
555         pANTLR3_BITSET  followClone;
556         pANTLR3_BITSET  viableTokensFollowingThisRule;
557
558         if      (follow == NULL)
559         {
560                 // There is no information about the tokens that can follow the last one
561                 // hence we must say that the current one we found is not a member of the 
562                 // follow set and does not indicate a missing token. We will just consume this
563                 // single token and see if the parser works it out from there.
564                 //
565                 return  ANTLR3_FALSE;
566         }
567
568         followClone                                             = NULL;
569         viableTokensFollowingThisRule   = NULL;
570
571         // The C bitset maps are laid down at compile time by the
572         // C code generation. Hence we cannot remove things from them
573         // and so on. So, in order to remove EOR (if we need to) then
574         // we clone the static bitset.
575         //
576         followClone = antlr3BitsetLoad(follow);
577         if      (followClone == NULL)
578         {
579                 return ANTLR3_FALSE;
580         }
581
582         // Compute what can follow this grammar reference
583         //
584         if      (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
585         {
586                 // EOR can follow, but if we are not the start symbol, we
587                 // need to remove it.
588                 //
589                 if      (recognizer->state->following->vector->count >= 0)
590                 {
591                         followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
592                 }
593
594                 // Now compute the visiable tokens that can follow this rule, according to context
595                 // and make them part of the follow set.
596                 //
597                 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
598                 followClone->borInPlace(followClone, viableTokensFollowingThisRule);
599         }
600
601         /// if current token is consistent with what could come after set
602         /// then we know we're missing a token; error recovery is free to
603         /// "insert" the missing token
604         ///
605         /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
606         /// in follow set to indicate that the fall of the start symbol is
607         /// in the set (EOF can follow).
608         ///
609         if      (               followClone->isMember(followClone, is->_LA(is, 1))
610                         ||      followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
611                 )
612         {
613                 retcode = ANTLR3_TRUE;
614         }
615         else
616         {
617                 retcode = ANTLR3_FALSE;
618         }
619
620         if      (viableTokensFollowingThisRule != NULL)
621         {
622                 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
623         }
624         if      (followClone != NULL)
625         {
626                 followClone->free(followClone);
627         }
628
629         return retcode;
630
631 }
632
633 /// Factor out what to do upon token mismatch so tree parsers can behave
634 /// differently.  Override and call mismatchRecover(input, ttype, follow)
635 /// to get single token insertion and deletion.  Use this to turn off
636 /// single token insertion and deletion. Override mismatchRecover
637 /// to call this instead.
638 ///
639 /// \remark mismatch only works for parsers and must be overridden for anything else.
640 ///
641 static  void
642 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
643 {
644     pANTLR3_PARSER          parser;
645     pANTLR3_TREE_PARSER     tparser;
646     pANTLR3_INT_STREAM      is;
647
648     // Install a mismatched token exception in the exception stack
649     //
650     antlr3MTExceptionNew(recognizer);
651     recognizer->state->exception->expecting    = ttype;
652
653     switch      (recognizer->type)
654     {
655                 case    ANTLR3_TYPE_PARSER:
656
657                         parser  = (pANTLR3_PARSER) (recognizer->super);
658                         tparser = NULL;
659                         is      = parser->tstream->istream;
660
661                         break;
662
663                 default:
664                     
665                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
666                         return;
667
668                         break;
669     }
670
671         if      (mismatchIsUnwantedToken(recognizer, is, ttype))
672         {
673                 // Create a basic recognition exception structure
674                 //
675             antlr3RecognitionExceptionNew(recognizer);
676                 
677                 // Now update it to indicate this is an unwanted token exception
678                 //
679                 recognizer->state->exception->name              = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
680                 recognizer->state->exception->type              = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
681
682                 return;
683         }
684         
685         if      (mismatchIsMissingToken(recognizer, is, follow))
686         {
687                 // Create a basic recognition exception structure
688                 //
689             antlr3RecognitionExceptionNew(recognizer);
690                 
691                 // Now update it to indicate this is an unwanted token exception
692                 //
693                 recognizer->state->exception->name              = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
694                 recognizer->state->exception->type              = ANTLR3_MISSING_TOKEN_EXCEPTION;
695
696                 return;
697         }
698
699         // Just a mismatched token is all we can dtermine
700         //
701         antlr3MTExceptionNew(recognizer);
702
703         return;
704 }
705 /// Report a recognition problem.
706 ///
707 /// This method sets errorRecovery to indicate the parser is recovering
708 /// not parsing.  Once in recovery mode, no errors are generated.
709 /// To get out of recovery mode, the parser must successfully match
710 /// a token (after a resync).  So it will go:
711 ///
712 ///             1. error occurs
713 ///             2. enter recovery mode, report error
714 ///             3. consume until token found in resynch set
715 ///             4. try to resume parsing
716 ///             5. next match() will reset errorRecovery mode
717 ///
718 /// If you override, make sure to update errorCount if you care about that.
719 ///
720 static void                     
721 reportError                 (pANTLR3_BASE_RECOGNIZER recognizer)
722 {
723     if  (recognizer->state->errorRecovery == ANTLR3_TRUE)
724     {
725                 // Already in error recovery so don't display another error while doing so
726                 //
727                 return;
728     }
729
730     // Signal we are in error recovery now
731     //
732     recognizer->state->errorRecovery = ANTLR3_TRUE;
733         
734         // Indicate this recognizer had an error while processing.
735         //
736         recognizer->state->errorCount++;
737
738         // Call the error display routine
739         //
740     recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
741 }
742
743 static void
744 beginBacktrack          (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
745 {
746         if      (recognizer->debugger != NULL)
747         {
748                 recognizer->debugger->beginBacktrack(recognizer->debugger, level);
749         }
750 }
751
752 static void
753 endBacktrack            (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
754 {
755         if      (recognizer->debugger != NULL)
756         {
757                 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
758         }
759 }
760 static void                     
761 beginResync                 (pANTLR3_BASE_RECOGNIZER recognizer)
762 {
763         if      (recognizer->debugger != NULL)
764         {
765                 recognizer->debugger->beginResync(recognizer->debugger);
766         }
767 }
768
769 static void                     
770 endResync                   (pANTLR3_BASE_RECOGNIZER recognizer)
771 {
772         if      (recognizer->debugger != NULL)
773         {
774                 recognizer->debugger->endResync(recognizer->debugger);
775         }
776 }
777
778 /// Compute the error recovery set for the current rule.
779 /// Documentation below is from the Java implementation.
780 ///
781 /// During rule invocation, the parser pushes the set of tokens that can
782 /// follow that rule reference on the stack; this amounts to
783 /// computing FIRST of what follows the rule reference in the
784 /// enclosing rule. This local follow set only includes tokens
785 /// from within the rule; i.e., the FIRST computation done by
786 /// ANTLR stops at the end of a rule.
787 //
788 /// EXAMPLE
789 //
790 /// When you find a "no viable alt exception", the input is not
791 /// consistent with any of the alternatives for rule r.  The best
792 /// thing to do is to consume tokens until you see something that
793 /// can legally follow a call to r *or* any rule that called r.
794 /// You don't want the exact set of viable next tokens because the
795 /// input might just be missing a token--you might consume the
796 /// rest of the input looking for one of the missing tokens.
797 ///
798 /// Consider grammar:
799 ///
800 /// a : '[' b ']'
801 ///   | '(' b ')'
802 ///   ;
803 /// b : c '^' INT ;
804 /// c : ID
805 ///   | INT
806 ///   ;
807 ///
808 /// At each rule invocation, the set of tokens that could follow
809 /// that rule is pushed on a stack.  Here are the various "local"
810 /// follow sets:
811 ///
812 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
813 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
814 /// FOLLOW(c_in_b) = FIRST('^') = '^'
815 ///
816 /// Upon erroneous input "[]", the call chain is
817 ///
818 /// a -> b -> c
819 ///
820 /// and, hence, the follow context stack is:
821 ///
822 /// depth  local follow set     after call to rule
823 ///   0         <EOF>                    a (from main())
824 ///   1          ']'                     b
825 ///   3          '^'                     c
826 ///
827 /// Notice that ')' is not included, because b would have to have
828 /// been called from a different context in rule a for ')' to be
829 /// included.
830 ///
831 /// For error recovery, we cannot consider FOLLOW(c)
832 /// (context-sensitive or otherwise).  We need the combined set of
833 /// all context-sensitive FOLLOW sets--the set of all tokens that
834 /// could follow any reference in the call chain.  We need to
835 /// resync to one of those tokens.  Note that FOLLOW(c)='^' and if
836 /// we resync'd to that token, we'd consume until EOF.  We need to
837 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
838 /// In this case, for input "[]", LA(1) is in this set so we would
839 /// not consume anything and after printing an error rule c would
840 /// return normally.  It would not find the required '^' though.
841 /// At this point, it gets a mismatched token error and throws an
842 /// exception (since LA(1) is not in the viable following token
843 /// set).  The rule exception handler tries to recover, but finds
844 /// the same recovery set and doesn't consume anything.  Rule b
845 /// exits normally returning to rule a.  Now it finds the ']' (and
846 /// with the successful match exits errorRecovery mode).
847 ///
848 /// So, you can see that the parser walks up call chain looking
849 /// for the token that was a member of the recovery set.
850 ///
851 /// Errors are not generated in errorRecovery mode.
852 ///
853 /// ANTLR's error recovery mechanism is based upon original ideas:
854 ///
855 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
856 ///
857 /// and
858 ///
859 /// "A note on error recovery in recursive descent parsers":
860 /// http://portal.acm.org/citation.cfm?id=947902.947905
861 ///
862 /// Later, Josef Grosch had some good ideas:
863 ///
864 /// "Efficient and Comfortable Error Recovery in Recursive Descent
865 /// Parsers":
866 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
867 ///
868 /// Like Grosch I implemented local FOLLOW sets that are combined
869 /// at run-time upon error to avoid overhead during parsing.
870 ///
871 static pANTLR3_BITSET           
872 computeErrorRecoverySet     (pANTLR3_BASE_RECOGNIZER recognizer)
873 {
874     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
875 }
876
877 /// Compute the context-sensitive FOLLOW set for current rule.
878 /// Documentation below is from the Java runtime.
879 ///
880 /// This is the set of token types that can follow a specific rule
881 /// reference given a specific call chain.  You get the set of
882 /// viable tokens that can possibly come next (look ahead depth 1)
883 /// given the current call chain.  Contrast this with the
884 /// definition of plain FOLLOW for rule r:
885 ///
886 ///  FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
887 ///
888 /// where x in T* and alpha, beta in V*; T is set of terminals and
889 /// V is the set of terminals and non terminals.  In other words,
890 /// FOLLOW(r) is the set of all tokens that can possibly follow
891 /// references to r in///any* sentential form (context).  At
892 /// runtime, however, we know precisely which context applies as
893 /// we have the call chain.  We may compute the exact (rather
894 /// than covering superset) set of following tokens.
895 ///
896 /// For example, consider grammar:
897 ///
898 /// stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
899 ///      | "return" expr '.'
900 ///      ;
901 /// expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
902 /// atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
903 ///      | '(' expr ')'
904 ///      ;
905 ///
906 /// The FOLLOW sets are all inclusive whereas context-sensitive
907 /// FOLLOW sets are precisely what could follow a rule reference.
908 /// For input input "i=(3);", here is the derivation:
909 ///
910 /// stat => ID '=' expr ';'
911 ///      => ID '=' atom ('+' atom)* ';'
912 ///      => ID '=' '(' expr ')' ('+' atom)* ';'
913 ///      => ID '=' '(' atom ')' ('+' atom)* ';'
914 ///      => ID '=' '(' INT ')' ('+' atom)* ';'
915 ///      => ID '=' '(' INT ')' ';'
916 ///
917 /// At the "3" token, you'd have a call chain of
918 ///
919 ///   stat -> expr -> atom -> expr -> atom
920 ///
921 /// What can follow that specific nested ref to atom?  Exactly ')'
922 /// as you can see by looking at the derivation of this specific
923 /// input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
924 ///
925 /// You want the exact viable token set when recovering from a
926 /// token mismatch.  Upon token mismatch, if LA(1) is member of
927 /// the viable next token set, then you know there is most likely
928 /// a missing token in the input stream.  "Insert" one by just not
929 /// throwing an exception.
930 ///
931 static pANTLR3_BITSET           
932 computeCSRuleFollow         (pANTLR3_BASE_RECOGNIZER recognizer)
933 {
934     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
935 }
936
937 /// Compute the current followset for the input stream.
938 ///
939 static pANTLR3_BITSET           
940 combineFollows              (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
941 {
942     pANTLR3_BITSET      followSet;
943     pANTLR3_BITSET      localFollowSet;
944     ANTLR3_UINT32       top;
945     ANTLR3_UINT32       i;
946
947     top = recognizer->state->following->size(recognizer->state->following);
948
949     followSet       = antlr3BitsetNew(0);
950         localFollowSet  = NULL;
951
952     for (i = top; i>0; i--)
953     {
954                 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
955
956                 if  (localFollowSet != NULL)
957                 {
958                         followSet->borInPlace(followSet, localFollowSet);
959
960                         if      (exact == ANTLR3_TRUE)
961                         {
962                                 if      (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
963                                 {
964                                         // Only leave EOR in the set if at top (start rule); this lets us know
965                                         // if we have to include the follow(start rule); I.E., EOF
966                                         //
967                                         if      (i>1)
968                                         {
969                                                 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
970                                         }
971                                 }
972                                 else
973                                 {
974                                         break;  // Cannot see End Of Rule from here, just drop out
975                                 }
976                         }
977                         localFollowSet->free(localFollowSet);
978                         localFollowSet = NULL;
979                 }
980     }
981
982         if      (localFollowSet != NULL)
983         {
984                 localFollowSet->free(localFollowSet);
985         }
986     return  followSet;
987 }
988
989 /// Standard/Example error display method.
990 /// No generic error message display funciton coudl possibly do everything correctly
991 /// for all possible parsers. Hence you are provided with this example routine, which
992 /// you should override in your parser/tree parser to do as you will.
993 ///
994 /// Here we depart somewhat from the Java runtime as that has now split up a lot
995 /// of the error display routines into spearate units. However, ther is little advantage
996 /// to this in the C version as you will probably implement all such routines as a 
997 /// separate translation unit, rather than install them all as pointers to functions
998 /// in the base recognizer.
999 ///
1000 static void                     
1001 displayRecognitionError     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1002 {
1003         pANTLR3_PARSER                  parser;
1004         pANTLR3_TREE_PARSER         tparser;
1005         pANTLR3_INT_STREAM          is;
1006         pANTLR3_STRING                  ttext;
1007         pANTLR3_STRING                  ftext;
1008         pANTLR3_EXCEPTION           ex;
1009         pANTLR3_COMMON_TOKEN    theToken;
1010         pANTLR3_BASE_TREE           theBaseTree;
1011         pANTLR3_COMMON_TREE         theCommonTree;
1012
1013         // Retrieve some info for easy reading.
1014         //
1015         ex          =           recognizer->state->exception;
1016         ttext   =               NULL;
1017
1018         // See if there is a 'filename' we can use
1019         //
1020         if      (ex->streamName == NULL)
1021         {
1022                 if      (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1023                 {
1024                         ANTLR3_FPRINTF(stderr, "-end of input-(");
1025                 }
1026                 else
1027                 {
1028                         ANTLR3_FPRINTF(stderr, "-unknown source-(");
1029                 }
1030         }
1031         else
1032         {
1033                 ftext = ex->streamName->to8(ex->streamName);
1034                 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1035         }
1036
1037         // Next comes the line number
1038         //
1039
1040         ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1041         ANTLR3_FPRINTF(stderr, " : error %d : %s", 
1042                                                                                 recognizer->state->exception->type,
1043                                         (pANTLR3_UINT8)    (recognizer->state->exception->message));
1044
1045
1046         // How we determine the next piece is dependent on which thing raised the
1047         // error.
1048         //
1049         switch  (recognizer->type)
1050         {
1051         case    ANTLR3_TYPE_PARSER:
1052
1053                 // Prepare the knowledge we know we have
1054                 //
1055                 parser      = (pANTLR3_PARSER) (recognizer->super);
1056                 tparser     = NULL;
1057                 is                      = parser->tstream->istream;
1058                 theToken    = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1059                 ttext       = theToken->toString(theToken);
1060
1061                 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1062                 if  (theToken != NULL)
1063                 {
1064                         if (theToken->type == ANTLR3_TOKEN_EOF)
1065                         {
1066                                 ANTLR3_FPRINTF(stderr, ", at <EOF>");
1067                         }
1068                         else
1069                         {
1070                                 // Guard against null text in a token
1071                                 //
1072                                 ANTLR3_FPRINTF(stderr, "\n    near %s\n    ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1073                         }
1074                 }
1075                 break;
1076
1077         case    ANTLR3_TYPE_TREE_PARSER:
1078
1079                 tparser         = (pANTLR3_TREE_PARSER) (recognizer->super);
1080                 parser          = NULL;
1081                 is                      = tparser->ctnstream->tnstream->istream;
1082                 theBaseTree     = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1083                 ttext           = theBaseTree->toStringTree(theBaseTree);
1084
1085                 if  (theBaseTree != NULL)
1086                 {
1087                         theCommonTree   = (pANTLR3_COMMON_TREE)     theBaseTree->super;
1088
1089                         if      (theCommonTree != NULL)
1090                         {
1091                                 theToken        = (pANTLR3_COMMON_TOKEN)    theBaseTree->getToken(theBaseTree);
1092                         }
1093                         ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1094                         ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1095                 }
1096                 break;
1097
1098         default:
1099
1100                 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1101                 return;
1102                 break;
1103         }
1104
1105         // Although this function should generally be provided by the implementation, this one
1106         // should be as helpful as possible for grammar developers and serve as an example
1107         // of what you can do with each exception type. In general, when you make up your
1108         // 'real' handler, you should debug the routine with all possible errors you expect
1109         // which will then let you be as specific as possible about all circumstances.
1110         //
1111         // Note that in the general case, errors thrown by tree parsers indicate a problem
1112         // with the output of the parser or with the tree grammar itself. The job of the parser
1113         // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1114         // at that stage should really be semantic errors that your own code determines and handles
1115         // in whatever way is appropriate.
1116         //
1117         switch  (ex->type)
1118         {
1119         case    ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1120
1121                 // Indicates that the recognizer was fed a token which seesm to be
1122                 // spurious input. We can detect this when the token that follows
1123                 // this unwanted token would normally be part of the syntactically
1124                 // correct stream. Then we can see that the token we are looking at
1125                 // is just something that should not be there and throw this exception.
1126                 //
1127                 if      (tokenNames == NULL)
1128                 {
1129                         ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1130                 }
1131                 else
1132                 {
1133                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1134                         {
1135                                 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1136                         }
1137                         else
1138                         {
1139                                 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1140                         }
1141                 }
1142                 break;
1143
1144         case    ANTLR3_MISSING_TOKEN_EXCEPTION:
1145
1146                 // Indicates that the recognizer detected that the token we just
1147                 // hit would be valid syntactically if preceeded by a particular 
1148                 // token. Perhaps a missing ';' at line end or a missing ',' in an
1149                 // expression list, and such like.
1150                 //
1151                 if      (tokenNames == NULL)
1152                 {
1153                         ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1154                 }
1155                 else
1156                 {
1157                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1158                         {
1159                                 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1160                         }
1161                         else
1162                         {
1163                                 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1164                         }
1165                 }
1166                 break;
1167
1168         case    ANTLR3_RECOGNITION_EXCEPTION:
1169
1170                 // Indicates that the recognizer received a token
1171                 // in the input that was not predicted. This is the basic exception type 
1172                 // from which all others are derived. So we assume it was a syntax error.
1173                 // You may get this if there are not more tokens and more are needed
1174                 // to complete a parse for instance.
1175                 //
1176                 ANTLR3_FPRINTF(stderr, " : syntax error...\n");    
1177                 break;
1178
1179         case    ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1180
1181                 // We were expecting to see one thing and got another. This is the
1182                 // most common error if we coudl not detect a missing or unwanted token.
1183                 // Here you can spend your efforts to
1184                 // derive more useful error messages based on the expected
1185                 // token set and the last token and so on. The error following
1186                 // bitmaps do a good job of reducing the set that we were looking
1187                 // for down to something small. Knowing what you are parsing may be
1188                 // able to allow you to be even more specific about an error.
1189                 //
1190                 if      (tokenNames == NULL)
1191                 {
1192                         ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1193                 }
1194                 else
1195                 {
1196                         if      (ex->expecting == ANTLR3_TOKEN_EOF)
1197                         {
1198                                 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1199                         }
1200                         else
1201                         {
1202                                 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1203                         }
1204                 }
1205                 break;
1206
1207         case    ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1208
1209                 // We could not pick any alt decision from the input given
1210                 // so god knows what happened - however when you examine your grammar,
1211                 // you should. It means that at the point where the current token occurred
1212                 // that the DFA indicates nowhere to go from here.
1213                 //
1214                 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1215
1216                 break;
1217
1218         case    ANTLR3_MISMATCHED_SET_EXCEPTION:
1219
1220                 {
1221                         ANTLR3_UINT32     count;
1222                         ANTLR3_UINT32     bit;
1223                         ANTLR3_UINT32     size;
1224                         ANTLR3_UINT32     numbits;
1225                         pANTLR3_BITSET    errBits;
1226
1227                         // This means we were able to deal with one of a set of
1228                         // possible tokens at this point, but we did not see any
1229                         // member of that set.
1230                         //
1231                         ANTLR3_FPRINTF(stderr, " : unexpected input...\n  expected one of : ");
1232
1233                         // What tokens could we have accepted at this point in the
1234                         // parse?
1235                         //
1236                         count   = 0;
1237                         errBits = antlr3BitsetLoad              (ex->expectingSet);
1238                         numbits = errBits->numBits              (errBits);
1239                         size    = errBits->size                 (errBits);
1240
1241                         if  (size > 0)
1242                         {
1243                                 // However many tokens we could have dealt with here, it is usually
1244                                 // not useful to print ALL of the set here. I arbitrarily chose 8
1245                                 // here, but you should do whatever makes sense for you of course.
1246                                 // No token number 0, so look for bit 1 and on.
1247                                 //
1248                                 for     (bit = 1; bit < numbits && count < 8 && count < size; bit++)
1249                                 {
1250                                         // TODO: This doesn;t look right - should be asking if the bit is set!!
1251                                         //
1252                                         if  (tokenNames[bit])
1253                                         {
1254                                                 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]); 
1255                                                 count++;
1256                                         }
1257                                 }
1258                                 ANTLR3_FPRINTF(stderr, "\n");
1259                         }
1260                         else
1261                         {
1262                                 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1263                                 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1264                         }
1265                 }
1266                 break;
1267
1268         case    ANTLR3_EARLY_EXIT_EXCEPTION:
1269
1270                 // We entered a loop requiring a number of token sequences
1271                 // but found a token that ended that sequence earlier than
1272                 // we should have done.
1273                 //
1274                 ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1275                 break;
1276
1277         default:
1278
1279                 // We don't handle any other exceptions here, but you can
1280                 // if you wish. If we get an exception that hits this point
1281                 // then we are just going to report what we know about the
1282                 // token.
1283                 //
1284                 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1285                 break;
1286         }
1287
1288         // Here you have the token that was in error which if this is
1289         // the standard implementation will tell you the line and offset
1290         // and also record the address of the start of the line in the
1291         // input stream. You could therefore print the source line and so on.
1292         // Generally though, I would expect that your lexer/parser will keep
1293         // its own map of lines and source pointers or whatever as there
1294         // are a lot of specific things you need to know about the input
1295         // to do something like that.
1296         // Here is where you do it though :-).
1297         //
1298 }
1299
1300 /// Return how many syntax errors were detected by this recognizer
1301 ///
1302 static ANTLR3_UINT32
1303 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1304 {
1305         return  recognizer->state->errorCount;
1306 }
1307
1308 /// Recover from an error found on the input stream.  Mostly this is
1309 /// NoViableAlt exceptions, but could be a mismatched token that
1310 /// the match() routine could not recover from.
1311 ///
1312 static void                     
1313 recover                     (pANTLR3_BASE_RECOGNIZER recognizer)
1314 {
1315     // Used to compute the follow set of tokens
1316     //
1317     pANTLR3_BITSET                      followSet;
1318     pANTLR3_PARSER                      parser;
1319     pANTLR3_TREE_PARSER     tparser;
1320     pANTLR3_INT_STREAM      is;
1321
1322     switch      (recognizer->type)
1323     {
1324                 case    ANTLR3_TYPE_PARSER:
1325
1326                 parser  = (pANTLR3_PARSER) (recognizer->super);
1327                 tparser = NULL;
1328                 is              = parser->tstream->istream;
1329
1330         break;
1331
1332     case        ANTLR3_TYPE_TREE_PARSER:
1333
1334                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1335                 parser  = NULL;
1336                 is              = tparser->ctnstream->tnstream->istream;
1337
1338         break;
1339
1340     default:
1341             
1342                 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1343                 return;
1344
1345         break;
1346     }
1347
1348         // Are we about to repeat the same error?
1349         //
1350     if  (recognizer->state->lastErrorIndex == is->index(is))
1351     {
1352                 // The last error was at the same token index point. This must be a case
1353                 // where LT(1) is in the recovery token set so nothing is
1354                 // consumed. Consume a single token so at least to prevent
1355                 // an infinite loop; this is a failsafe.
1356                 //
1357                 is->consume(is);
1358     }
1359
1360     // Record error index position
1361     //
1362     recognizer->state->lastErrorIndex    = is->index(is);
1363     
1364     // Work out the follows set for error recovery
1365     //
1366     followSet   = recognizer->computeErrorRecoverySet(recognizer);
1367
1368     // Call resync hook (for debuggers and so on)
1369     //
1370     recognizer->beginResync(recognizer);
1371
1372     // Consume tokens until we have resynced to something in the follows set
1373     //
1374     recognizer->consumeUntilSet(recognizer, followSet);
1375
1376     // End resync hook 
1377     //
1378     recognizer->endResync(recognizer);
1379
1380     // Destroy the temporary bitset we produced.
1381     //
1382     followSet->free(followSet);
1383
1384     // Reset the inError flag so we don't re-report the exception
1385     //
1386     recognizer->state->error    = ANTLR3_FALSE;
1387     recognizer->state->failed   = ANTLR3_FALSE;
1388 }
1389
1390
1391 /// Attempt to recover from a single missing or extra token.
1392 ///
1393 /// EXTRA TOKEN
1394 ///
1395 /// LA(1) is not what we are looking for.  If LA(2) has the right token,
1396 /// however, then assume LA(1) is some extra spurious token.  Delete it
1397 /// and LA(2) as if we were doing a normal match(), which advances the
1398 /// input.
1399 ///
1400 /// MISSING TOKEN
1401 ///
1402 /// If current token is consistent with what could come after
1403 /// ttype then it is ok to "insert" the missing token, else throw
1404 /// exception For example, Input "i=(3;" is clearly missing the
1405 /// ')'.  When the parser returns from the nested call to expr, it
1406 /// will have call chain:
1407 ///
1408 ///    stat -> expr -> atom
1409 ///
1410 /// and it will be trying to match the ')' at this point in the
1411 /// derivation:
1412 ///
1413 ///       => ID '=' '(' INT ')' ('+' atom)* ';'
1414 ///                          ^
1415 /// match() will see that ';' doesn't match ')' and report a
1416 /// mismatched token error.  To recover, it sees that LA(1)==';'
1417 /// is in the set of tokens that can follow the ')' token
1418 /// reference in rule atom.  It can assume that you forgot the ')'.
1419 ///
1420 /// The exception that was passed in, in the java implementation is
1421 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1422 /// error flag and rules cascade back when this is set.
1423 ///
1424 static void *   
1425 recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1426 {
1427         pANTLR3_PARSER                    parser;
1428         pANTLR3_TREE_PARSER           tparser;
1429         pANTLR3_INT_STREAM            is;
1430         void                                    * matchedSymbol;
1431
1432         // Invoke the debugger event if there is a debugger listening to us
1433         //
1434         if      (recognizer->debugger != NULL)
1435         {
1436                 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
1437         }
1438
1439         switch  (recognizer->type)
1440         {
1441         case    ANTLR3_TYPE_PARSER:
1442
1443                 parser  = (pANTLR3_PARSER) (recognizer->super);
1444                 tparser = NULL;
1445                 is      = parser->tstream->istream;
1446
1447                 break;
1448
1449         case    ANTLR3_TYPE_TREE_PARSER:
1450
1451                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1452                 parser  = NULL;
1453                 is      = tparser->ctnstream->tnstream->istream;
1454
1455                 break;
1456
1457         default:
1458
1459                 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1460                 return NULL;
1461
1462                 break;
1463         }
1464
1465         // Create an exception if we need one
1466         //
1467         if      (recognizer->state->exception == NULL)
1468         {
1469                 antlr3RecognitionExceptionNew(recognizer);
1470         }
1471
1472         // If the next token after the one we are looking at in the input stream
1473         // is what we are looking for then we remove the one we have discovered
1474         // from the stream by consuming it, then consume this next one along too as
1475         // if nothing had happened.
1476         //
1477         if      ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1478         {
1479                 recognizer->state->exception->type              = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1480                 recognizer->state->exception->message   = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1481
1482                 // Call resync hook (for debuggers and so on)
1483                 //
1484                 if      (recognizer->debugger != NULL)
1485                 {
1486                         recognizer->debugger->beginResync(recognizer->debugger);
1487                 }
1488
1489                 recognizer->beginResync(recognizer);
1490
1491                 // "delete" the extra token
1492                 //
1493                 recognizer->beginResync(recognizer);
1494                 is->consume(is);
1495                 recognizer->endResync(recognizer);
1496                 // End resync hook 
1497                 //
1498                 if      (recognizer->debugger != NULL)
1499                 {
1500                         recognizer->debugger->endResync(recognizer->debugger);
1501                 }
1502
1503                 // Print out the error after we consume so that ANTLRWorks sees the
1504                 // token in the exception.
1505                 //
1506                 recognizer->reportError(recognizer);
1507
1508                 // Return the token we are actually matching
1509                 //
1510                 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1511
1512                 // Consume the token that the rule actually expected to get as if everything
1513                 // was hunky dory.
1514                 //
1515                 is->consume(is);
1516
1517                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1518
1519                 return  matchedSymbol;
1520         }
1521
1522         // Single token deletion (Unwanted above) did not work
1523         // so we see if we can insert a token instead by calculating which
1524         // token would be missing
1525         //
1526         if      (mismatchIsMissingToken(recognizer, is, follow))
1527         {
1528                 // We can fake the missing token and proceed
1529                 //
1530                 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1531                 recognizer->state->exception->type              = ANTLR3_MISSING_TOKEN_EXCEPTION;
1532                 recognizer->state->exception->message   = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1533                 recognizer->state->exception->token             = matchedSymbol;
1534                 recognizer->state->exception->expecting = ttype;
1535
1536                 // Print out the error after we insert so that ANTLRWorks sees the
1537                 // token in the exception.
1538                 //
1539                 recognizer->reportError(recognizer);
1540
1541                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1542
1543                 return  matchedSymbol;
1544         }
1545
1546
1547         // Neither deleting nor inserting tokens allows recovery
1548         // must just report the exception.
1549         //
1550         recognizer->state->error            = ANTLR3_TRUE;
1551         return NULL;
1552 }
1553
1554 static void *
1555 recoverFromMismatchedSet            (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1556 {
1557     pANTLR3_PARSER                      parser;
1558     pANTLR3_TREE_PARSER     tparser;
1559     pANTLR3_INT_STREAM      is;
1560         pANTLR3_COMMON_TOKEN    matchedSymbol;
1561
1562     switch      (recognizer->type)
1563     {
1564     case        ANTLR3_TYPE_PARSER:
1565
1566                 parser  = (pANTLR3_PARSER) (recognizer->super);
1567                 tparser = NULL;
1568                 is      = parser->tstream->istream;
1569
1570         break;
1571
1572     case        ANTLR3_TYPE_TREE_PARSER:
1573
1574                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1575                 parser  = NULL;
1576                 is      = tparser->ctnstream->tnstream->istream;
1577
1578         break;
1579
1580     default:
1581             
1582                 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1583                 return NULL;
1584
1585         break;
1586     }
1587
1588         if      (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1589         {
1590                 // We can fake the missing token and proceed
1591                 //
1592                 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1593                 recognizer->state->exception->type      = ANTLR3_MISSING_TOKEN_EXCEPTION;
1594                 recognizer->state->exception->token     = matchedSymbol;
1595
1596                 // Print out the error after we insert so that ANTLRWorks sees the
1597                 // token in the exception.
1598                 //
1599                 recognizer->reportError(recognizer);
1600
1601                 recognizer->state->error  = ANTLR3_FALSE;       // Exception is not outstanding any more
1602
1603                 return  matchedSymbol;
1604         }
1605
1606     // TODO - Single token deletion like in recoverFromMismatchedToken()
1607     //
1608     recognizer->state->error    = ANTLR3_TRUE;
1609         recognizer->state->failed       = ANTLR3_TRUE;
1610         return NULL;
1611 }
1612
1613 /// This code is factored out from mismatched token and mismatched set
1614 ///  recovery.  It handles "single token insertion" error recovery for
1615 /// both.  No tokens are consumed to recover from insertions.  Return
1616 /// true if recovery was possible else return false.
1617 ///
1618 static ANTLR3_BOOLEAN   
1619 recoverFromMismatchedElement        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1620 {
1621     pANTLR3_BITSET          viableToksFollowingRule;
1622     pANTLR3_BITSET          follow;
1623     pANTLR3_PARSER          parser;
1624     pANTLR3_TREE_PARSER     tparser;
1625     pANTLR3_INT_STREAM      is;
1626
1627     switch      (recognizer->type)
1628     {
1629     case        ANTLR3_TYPE_PARSER:
1630
1631                 parser  = (pANTLR3_PARSER) (recognizer->super);
1632                 tparser = NULL;
1633                 is      = parser->tstream->istream;
1634
1635         break;
1636
1637     case        ANTLR3_TYPE_TREE_PARSER:
1638
1639                 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1640                 parser  = NULL;
1641                 is      = tparser->ctnstream->tnstream->istream;
1642
1643         break;
1644
1645     default:
1646             
1647                 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1648                 return ANTLR3_FALSE;
1649
1650         break;
1651     }
1652
1653     follow      = antlr3BitsetLoad(followBits);
1654
1655     if  (follow == NULL)
1656     {
1657                 /* The follow set is NULL, which means we don't know what can come 
1658                  * next, so we "hit and hope" by just signifying that we cannot
1659                  * recover, which will just cause the next token to be consumed,
1660                  * which might dig us out.
1661                  */
1662                 return  ANTLR3_FALSE;
1663     }
1664
1665     /* We have a bitmap for the follow set, hence we can compute 
1666      * what can follow this grammar element reference.
1667      */
1668     if  (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1669     {
1670                 /* First we need to know which of the available tokens are viable
1671                  * to follow this reference.
1672                  */
1673                 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
1674
1675                 /* Remove the EOR token, which we do not wish to compute with
1676                  */
1677                 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1678                 viableToksFollowingRule->free(viableToksFollowingRule);
1679                 /* We now have the computed set of what can follow the current token
1680                  */
1681     }
1682
1683     /* We can now see if the current token works with the set of tokens
1684      * that could follow the current grammar reference. If it looks like it
1685      * is consistent, then we can "insert" that token by not throwing
1686      * an exception and assuming that we saw it. 
1687      */
1688     if  ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1689     {
1690                 /* report the error, but don't cause any rules to abort and stuff
1691                  */
1692                 recognizer->reportError(recognizer);
1693                 if      (follow != NULL)
1694                 {
1695                         follow->free(follow);
1696                 }
1697                 recognizer->state->error                        = ANTLR3_FALSE;
1698                 recognizer->state->failed                       = ANTLR3_FALSE;
1699                 return ANTLR3_TRUE;     /* Success in recovery  */
1700     }
1701
1702     if  (follow != NULL)
1703     {
1704                 follow->free(follow);
1705     }
1706
1707     /* We could not find anything viable to do, so this is going to 
1708      * cause an exception.
1709      */
1710     return  ANTLR3_FALSE;
1711 }
1712
1713 /// Eat tokens from the input stream until we get one of JUST the right type
1714 ///
1715 static void             
1716 consumeUntil    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1717 {
1718     ANTLR3_UINT32                       ttype;
1719     pANTLR3_PARSER                      parser;
1720     pANTLR3_TREE_PARSER     tparser;
1721     pANTLR3_INT_STREAM      is;
1722
1723     switch      (recognizer->type)
1724     {
1725                 case    ANTLR3_TYPE_PARSER:
1726
1727                         parser  = (pANTLR3_PARSER) (recognizer->super);
1728                         tparser = NULL;
1729                         is      = parser->tstream->istream;
1730
1731                         break;
1732
1733                 case    ANTLR3_TYPE_TREE_PARSER:
1734
1735                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1736                         parser  = NULL;
1737                         is      = tparser->ctnstream->tnstream->istream;
1738
1739                         break;
1740
1741                 default:
1742                     
1743                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1744                         return;
1745
1746                         break;
1747     }
1748
1749     // What do have at the moment?
1750     //
1751     ttype       = is->_LA(is, 1);
1752
1753     // Start eating tokens until we get to the one we want.
1754     //
1755     while   (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1756     {
1757                 is->consume(is);
1758                 ttype   = is->_LA(is, 1);
1759     }
1760 }
1761
1762 /// Eat tokens from the input stream until we find one that
1763 /// belongs to the supplied set.
1764 ///
1765 static void             
1766 consumeUntilSet                     (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1767 {
1768     ANTLR3_UINT32           ttype;
1769     pANTLR3_PARSER          parser;
1770     pANTLR3_TREE_PARSER     tparser;
1771     pANTLR3_INT_STREAM      is;
1772
1773     switch      (recognizer->type)
1774     {
1775                 case    ANTLR3_TYPE_PARSER:
1776
1777                         parser  = (pANTLR3_PARSER) (recognizer->super);
1778                         tparser = NULL;
1779                         is      = parser->tstream->istream;
1780
1781                         break;
1782
1783                 case    ANTLR3_TYPE_TREE_PARSER:
1784
1785                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1786                         parser  = NULL;
1787                         is      = tparser->ctnstream->tnstream->istream;
1788
1789                         break;
1790
1791                 default:
1792                     
1793                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1794                         return;
1795
1796                         break;
1797     }
1798
1799     // What do have at the moment?
1800     //
1801     ttype       = is->_LA(is, 1);
1802
1803     // Start eating tokens until we get to one we want.
1804     //
1805     while   (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1806     {
1807                 is->consume(is);
1808                 ttype   = is->_LA(is, 1);
1809     }
1810 }
1811
1812 /** Return the rule invocation stack (how we got here in the parse.
1813  *  In the java version Ter just asks the JVM for all the information
1814  *  but in C we don't get this information, so I am going to do nothing 
1815  *  right now.
1816  */
1817 static pANTLR3_STACK    
1818 getRuleInvocationStack              (pANTLR3_BASE_RECOGNIZER recognizer)
1819 {
1820     return NULL;
1821 }
1822
1823 static pANTLR3_STACK    
1824 getRuleInvocationStackNamed         (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1825 {
1826     return NULL;
1827 }
1828
1829 /** Convenience method for template rewrites - NYI.
1830  */
1831 static pANTLR3_HASH_TABLE       
1832 toStrings                           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1833 {
1834     return NULL;
1835 }
1836
1837 static  void ANTLR3_CDECL
1838 freeIntTrie    (void * trie)
1839 {
1840     ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1841 }
1842
1843
1844 /** Pointer to a function to return whether the rule has parsed input starting at the supplied 
1845  *  start index before. If the rule has not parsed input starting from the supplied start index,
1846  *  then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1847  *  then it will return the point where it last stopped parsing after that start point.
1848  *
1849  * \remark
1850  * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1851  * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1852  * version of the table.
1853  */
1854 static ANTLR3_MARKER    
1855 getRuleMemoization                  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1856 {
1857     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1858      */
1859     pANTLR3_INT_TRIE    ruleList;
1860     ANTLR3_MARKER       stopIndex;
1861     pANTLR3_TRIE_ENTRY  entry;
1862
1863     /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1864      * as we will need it eventually if we are being asked for the memo here.
1865      */
1866     entry       = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1867
1868     if  (entry == NULL)
1869     {
1870                 /* Did not find it, so create a new one for it, with a bit depth based on the 
1871                  * size of the input stream. We need the bit depth to incorporate the number if
1872                  * bits required to represent the largest possible stop index in the input, which is the
1873                  * last character. An int stream is free to return the largest 64 bit offset if it has
1874                  * no idea of the size, but you should remember that this will cause the leftmost
1875                  * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1876                  */
1877                 ruleList    = antlr3IntTrieNew(63);     /* Depth is theoretically 64 bits, but probably not ;-) */
1878
1879                 if (ruleList != NULL)
1880                 {
1881                         recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1882                 }
1883
1884                 /* We cannot have a stopIndex in a trie we have just created of course
1885                  */
1886                 return  MEMO_RULE_UNKNOWN;
1887     }
1888
1889     ruleList    = (pANTLR3_INT_TRIE) (entry->data.ptr);
1890
1891     /* See if there is a stop index associated with the supplied start index.
1892      */
1893     stopIndex   = 0;
1894
1895     entry = ruleList->get(ruleList, ruleParseStart);
1896     if (entry != NULL)
1897     {
1898                 stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1899     }
1900
1901     if  (stopIndex == 0)
1902     {
1903                 return MEMO_RULE_UNKNOWN;
1904     }
1905
1906     return  stopIndex;
1907 }
1908
1909 /** Has this rule already parsed input at the current index in the
1910  *  input stream?  Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1911  *  if we have not.
1912  *
1913  *  This method has a side-effect: if we have seen this input for
1914  *  this rule and successfully parsed before, then seek ahead to
1915  *  1 past the stop token matched for this rule last time.
1916  */
1917 static ANTLR3_BOOLEAN   
1918 alreadyParsedRule                   (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1919 {
1920     ANTLR3_MARKER                       stopIndex;
1921     pANTLR3_LEXER                       lexer;
1922     pANTLR3_PARSER                      parser;
1923     pANTLR3_TREE_PARSER     tparser;
1924     pANTLR3_INT_STREAM      is;
1925
1926     switch      (recognizer->type)
1927     {
1928                 case    ANTLR3_TYPE_PARSER:
1929
1930                         parser  = (pANTLR3_PARSER) (recognizer->super);
1931                         tparser = NULL;
1932                         lexer   = NULL;
1933                         is      = parser->tstream->istream;
1934
1935                         break;
1936
1937                 case    ANTLR3_TYPE_TREE_PARSER:
1938
1939                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1940                         parser  = NULL;
1941                         lexer   = NULL;
1942                         is      = tparser->ctnstream->tnstream->istream;
1943
1944                         break;
1945
1946                 case    ANTLR3_TYPE_LEXER:
1947
1948                         lexer   = (pANTLR3_LEXER)   (recognizer->super);
1949                         parser  = NULL;
1950                         tparser = NULL;
1951                         is      = lexer->input->istream;
1952                         break;
1953
1954                 default:
1955                     
1956                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1957                         return ANTLR3_FALSE;
1958
1959                         break;
1960     }
1961
1962     /* See if we have a memo marker for this.
1963      */
1964     stopIndex       = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1965
1966     if  (stopIndex  == MEMO_RULE_UNKNOWN)
1967     {
1968                 return ANTLR3_FALSE;
1969     }
1970
1971     if  (stopIndex == MEMO_RULE_FAILED)
1972     {
1973                 recognizer->state->failed = ANTLR3_TRUE;
1974     }
1975     else
1976     {
1977                 is->seek(is, stopIndex+1);
1978     }
1979
1980     /* If here then the rule was executed for this input already
1981      */
1982     return  ANTLR3_TRUE;
1983 }
1984
1985 /** Record whether or not this rule parsed the input at this position
1986  *  successfully.
1987  */
1988 static void             
1989 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1990 {
1991     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1992      */
1993     pANTLR3_INT_TRIE        ruleList;
1994     pANTLR3_TRIE_ENTRY      entry;
1995     ANTLR3_MARKER           stopIndex;
1996     pANTLR3_LEXER           lexer;
1997     pANTLR3_PARSER          parser;
1998     pANTLR3_TREE_PARSER     tparser;
1999     pANTLR3_INT_STREAM      is;
2000
2001     switch      (recognizer->type)
2002     {
2003                 case    ANTLR3_TYPE_PARSER:
2004
2005                         parser  = (pANTLR3_PARSER) (recognizer->super);
2006                         tparser = NULL;
2007                         is      = parser->tstream->istream;
2008
2009                         break;
2010
2011                 case    ANTLR3_TYPE_TREE_PARSER:
2012
2013                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2014                         parser  = NULL;
2015                         is      = tparser->ctnstream->tnstream->istream;
2016
2017                         break;
2018
2019                 case    ANTLR3_TYPE_LEXER:
2020
2021                         lexer   = (pANTLR3_LEXER)   (recognizer->super);
2022                         parser  = NULL;
2023                         tparser = NULL;
2024                         is              = lexer->input->istream;
2025                         break;
2026
2027                 default:
2028                     
2029                         ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2030                         return;
2031
2032                         break;
2033     }
2034     
2035     stopIndex   = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2036
2037     entry       = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2038
2039     if  (entry != NULL)
2040     {
2041                 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2042
2043                 /* If we don't already have this entry, append it. The memoize trie does not
2044                  * accept duplicates so it won't add it if already there and we just ignore the
2045                  * return code as we don't care if it is there already.
2046                  */
2047                 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2048     }
2049 }
2050 /** A syntactic predicate.  Returns true/false depending on whether
2051  *  the specified grammar fragment matches the current input stream.
2052  *  This resets the failed instance var afterwards.
2053  */
2054 static ANTLR3_BOOLEAN   
2055 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2056 {
2057     ANTLR3_MARKER   start;
2058     pANTLR3_PARSER          parser;
2059     pANTLR3_TREE_PARSER     tparser;
2060     pANTLR3_INT_STREAM      is;
2061
2062     switch      (recognizer->type)
2063     {
2064                 case    ANTLR3_TYPE_PARSER:
2065
2066                         parser  = (pANTLR3_PARSER) (recognizer->super);
2067                         tparser = NULL;
2068                         is      = parser->tstream->istream;
2069
2070                         break;
2071
2072                 case    ANTLR3_TYPE_TREE_PARSER:
2073
2074                         tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2075                         parser  = NULL;
2076                         is      = tparser->ctnstream->tnstream->istream;
2077
2078                         break;
2079
2080                 default:
2081                     
2082                         ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2083                         return ANTLR3_FALSE;
2084
2085                         break;
2086     }
2087
2088     /* Begin backtracking so we can get back to where we started after trying out
2089      * the syntactic predicate.
2090      */
2091     start   = is->mark(is);
2092     recognizer->state->backtracking++;
2093
2094     /* Try the syntactical predicate
2095      */
2096     predicate(ctx);
2097
2098     /* Reset
2099      */
2100     is->rewind(is, start);
2101     recognizer->state->backtracking--;
2102
2103     if  (recognizer->state->failed == ANTLR3_TRUE)
2104     {
2105                 /* Predicate failed
2106                  */
2107                 recognizer->state->failed = ANTLR3_FALSE;
2108                 return  ANTLR3_FALSE;
2109     }
2110     else
2111     {
2112                 /* Predicate was successful
2113                  */
2114                 recognizer->state->failed       = ANTLR3_FALSE;
2115                 return  ANTLR3_TRUE;
2116     }
2117 }
2118
2119 static void
2120 reset(pANTLR3_BASE_RECOGNIZER recognizer)
2121 {
2122     if  (recognizer->state->following != NULL)
2123     {
2124                 recognizer->state->following->free(recognizer->state->following);
2125     }
2126
2127         // Reset the state flags
2128         //
2129         recognizer->state->errorRecovery        = ANTLR3_FALSE;
2130         recognizer->state->lastErrorIndex       = -1;
2131         recognizer->state->failed                       = ANTLR3_FALSE;
2132         recognizer->state->errorCount           = 0;
2133         recognizer->state->backtracking         = 0;
2134         recognizer->state->following            = NULL;
2135
2136         if      (recognizer->state != NULL)
2137         {
2138                 if      (recognizer->state->ruleMemo != NULL)
2139                 {
2140                         recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2141                         recognizer->state->ruleMemo = antlr3IntTrieNew(15);     /* 16 bit depth is enough for 32768 rules! */
2142                 }
2143         }
2144         
2145
2146     // Install a new following set
2147     //
2148     recognizer->state->following   = antlr3StackNew(8);
2149
2150 }
2151
2152 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2153 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2154 //
2155 static void *                           
2156 getCurrentInputSymbol           (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2157 {
2158         return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2159 }
2160
2161 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2162 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2163 //
2164 static void *                           
2165 getMissingSymbol                        (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION      e,
2166                                                                         ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2167 {
2168         pANTLR3_TOKEN_STREAM                    ts;
2169         pANTLR3_COMMON_TOKEN_STREAM             cts;
2170         pANTLR3_COMMON_TOKEN                    token;
2171         pANTLR3_COMMON_TOKEN                    current;
2172         pANTLR3_STRING                                  text;
2173
2174         // Dereference the standard pointers
2175         //
2176         ts              = (pANTLR3_TOKEN_STREAM)istream->super;
2177         cts             = (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
2178         
2179         // Work out what to use as the current symbol to make a line and offset etc
2180         // If we are at EOF, we use the token before EOF
2181         //
2182         current = ts->_LT(ts, 1);
2183         if      (current->getType(current) == ANTLR3_TOKEN_EOF)
2184         {
2185                 current = ts->_LT(ts, -1);
2186         }
2187
2188         // Create a new empty token
2189         //
2190         if      (recognizer->state->tokFactory == NULL)
2191         {
2192                 // We don't yet have a token factory for making tokens
2193                 // we just need a fake one using the input stream of the current
2194                 // token.
2195                 //
2196                 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2197         }
2198         token   = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2199
2200         // Set some of the token properties based on the current token
2201         //
2202         token->setLine                                  (token, current->getLine(current));
2203         token->setCharPositionInLine    (token, current->getCharPositionInLine(current));
2204         token->setChannel                               (token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
2205         token->setType                                  (token, expectedTokenType);
2206     token->user1                    = current->user1;
2207     token->user2                    = current->user2;
2208     token->user3                    = current->user3;
2209     token->custom                   = current->custom;
2210     token->lineStart                = current->lineStart;
2211     
2212         // Create the token text that shows it has been inserted
2213         //
2214         token->setText8(token, (pANTLR3_UINT8)"<missing ");
2215         text = token->getText(token);
2216
2217         if      (text != NULL)
2218         {
2219                 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2220                 text->append8(text, (const char *)">");
2221         }
2222         
2223         // Finally return the pointer to our new token
2224         //
2225         return  token;
2226 }
2227
2228
2229 #ifdef  ANTLR3_WINDOWS
2230 #pragma warning( default : 4100 )
2231 #endif
2232
2233 /// @}
2234 ///
2235