2 * Contains the base functions that all recognizers require.
\r
3 * Any function can be overridden by a lexer/parser/tree parser or by the
\r
6 * \addtogroup pANTLR3_BASE_RECOGNIZER
\r
9 #include <antlr3baserecognizer.h>
\r
11 // [The "BSD licence"]
\r
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
13 // http://www.temporal-wave.com
\r
14 // http://www.linkedin.com/in/jimidle
\r
16 // All rights reserved.
\r
18 // Redistribution and use in source and binary forms, with or without
\r
19 // modification, are permitted provided that the following conditions
\r
21 // 1. Redistributions of source code must retain the above copyright
\r
22 // notice, this list of conditions and the following disclaimer.
\r
23 // 2. Redistributions in binary form must reproduce the above copyright
\r
24 // notice, this list of conditions and the following disclaimer in the
\r
25 // documentation and/or other materials provided with the distribution.
\r
26 // 3. The name of the author may not be used to endorse or promote products
\r
27 // derived from this software without specific prior written permission.
\r
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
40 #ifdef ANTLR3_WINDOWS
\r
41 #pragma warning( disable : 4100 )
\r
44 /* Interface functions -standard implementations cover parser and treeparser
\r
45 * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
\r
46 * most of these functions.
\r
48 static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer);
\r
49 static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer);
\r
50 static void endResync (pANTLR3_BASE_RECOGNIZER recognizer);
\r
51 static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
\r
52 static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
\r
54 static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
\r
55 static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer);
\r
56 static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
\r
57 static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
\r
58 static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
\r
59 static void reportError (pANTLR3_BASE_RECOGNIZER recognizer);
\r
60 static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer);
\r
61 static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
\r
62 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
\r
63 static void recover (pANTLR3_BASE_RECOGNIZER recognizer);
\r
64 static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
\r
65 static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
\r
66 static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
\r
67 static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
\r
68 static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
\r
69 static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer);
\r
70 static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
\r
71 static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
\r
72 static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
\r
73 static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
\r
74 static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
\r
75 static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
\r
76 static void reset (pANTLR3_BASE_RECOGNIZER recognizer);
\r
77 static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer);
\r
78 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
\r
79 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
\r
80 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
\r
81 static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer);
\r
83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
\r
84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
\r
86 pANTLR3_BASE_RECOGNIZER recognizer;
\r
88 // Allocate memory for the structure
\r
90 recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
\r
92 if (recognizer == NULL)
\r
94 // Allocation failed
\r
100 // If we have been supplied with a pre-existing recognizer state
\r
101 // then we just install it, otherwise we must create one from scratch
\r
105 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
\r
107 if (recognizer->state == NULL)
\r
109 ANTLR3_FREE(recognizer);
\r
113 // Initialize any new recognizer state
\r
115 recognizer->state->errorRecovery = ANTLR3_FALSE;
\r
116 recognizer->state->lastErrorIndex = -1;
\r
117 recognizer->state->failed = ANTLR3_FALSE;
\r
118 recognizer->state->errorCount = 0;
\r
119 recognizer->state->backtracking = 0;
\r
120 recognizer->state->following = NULL;
\r
121 recognizer->state->ruleMemo = NULL;
\r
122 recognizer->state->tokenNames = NULL;
\r
123 recognizer->state->sizeHint = sizeHint;
\r
124 recognizer->state->tokSource = NULL;
\r
125 recognizer->state->tokFactory = NULL;
\r
127 // Rather than check to see if we must initialize
\r
128 // the stack every time we are asked for an new rewrite stream
\r
129 // we just always create an empty stack and then just
\r
130 // free it when the base recognizer is freed.
\r
132 recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size.
\r
134 if (recognizer->state->rStreams == NULL)
\r
138 ANTLR3_FREE(recognizer->state);
\r
139 ANTLR3_FREE(recognizer);
\r
145 // Install the one we were given, and do not reset it here
\r
146 // as it will either already have been initialized or will
\r
147 // be in a state that needs to be preserved.
\r
149 recognizer->state = state;
\r
152 // Install the BR API
\r
154 recognizer->alreadyParsedRule = alreadyParsedRule;
\r
155 recognizer->beginResync = beginResync;
\r
156 recognizer->combineFollows = combineFollows;
\r
157 recognizer->beginBacktrack = beginBacktrack;
\r
158 recognizer->endBacktrack = endBacktrack;
\r
159 recognizer->computeCSRuleFollow = computeCSRuleFollow;
\r
160 recognizer->computeErrorRecoverySet = computeErrorRecoverySet;
\r
161 recognizer->consumeUntil = consumeUntil;
\r
162 recognizer->consumeUntilSet = consumeUntilSet;
\r
163 recognizer->displayRecognitionError = displayRecognitionError;
\r
164 recognizer->endResync = endResync;
\r
165 recognizer->exConstruct = antlr3MTExceptionNew;
\r
166 recognizer->getRuleInvocationStack = getRuleInvocationStack;
\r
167 recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
\r
168 recognizer->getRuleMemoization = getRuleMemoization;
\r
169 recognizer->match = match;
\r
170 recognizer->matchAny = matchAny;
\r
171 recognizer->memoize = memoize;
\r
172 recognizer->mismatch = mismatch;
\r
173 recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken;
\r
174 recognizer->mismatchIsMissingToken = mismatchIsMissingToken;
\r
175 recognizer->recover = recover;
\r
176 recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
\r
177 recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet;
\r
178 recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken;
\r
179 recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors;
\r
180 recognizer->reportError = reportError;
\r
181 recognizer->reset = reset;
\r
182 recognizer->synpred = synpred;
\r
183 recognizer->toStrings = toStrings;
\r
184 recognizer->getCurrentInputSymbol = getCurrentInputSymbol;
\r
185 recognizer->getMissingSymbol = getMissingSymbol;
\r
186 recognizer->debugger = NULL;
\r
188 recognizer->free = freeBR;
\r
190 /* Initialize variables
\r
192 recognizer->type = type;
\r
198 freeBR (pANTLR3_BASE_RECOGNIZER recognizer)
\r
200 pANTLR3_EXCEPTION thisE;
\r
202 // Did we have a state allocated?
\r
204 if (recognizer->state != NULL)
\r
206 // Free any rule memoization we set up
\r
208 if (recognizer->state->ruleMemo != NULL)
\r
210 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
\r
211 recognizer->state->ruleMemo = NULL;
\r
214 // Free any exception space we have left around
\r
216 thisE = recognizer->state->exception;
\r
219 thisE->freeEx(thisE);
\r
222 // Free any rewrite streams we have allocated
\r
224 if (recognizer->state->rStreams != NULL)
\r
226 recognizer->state->rStreams->free(recognizer->state->rStreams);
\r
229 // Free up any token factory we created (error recovery for instance)
\r
231 if (recognizer->state->tokFactory != NULL)
\r
233 recognizer->state->tokFactory->close(recognizer->state->tokFactory);
\r
235 // Free the shared state memory
\r
237 ANTLR3_FREE(recognizer->state);
\r
240 // Free the actual recognizer space
\r
242 ANTLR3_FREE(recognizer);
\r
246 * Creates a new Mismatched Token Exception and inserts in the recognizer
\r
249 * \param recognizer
\r
250 * Context pointer for this recognizer
\r
254 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
\r
256 /* Create a basic recognition exception structure
\r
258 antlr3RecognitionExceptionNew(recognizer);
\r
260 /* Now update it to indicate this is a Mismatched token exception
\r
262 recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME;
\r
263 recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
\r
269 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
\r
271 pANTLR3_EXCEPTION ex;
\r
272 pANTLR3_LEXER lexer;
\r
273 pANTLR3_PARSER parser;
\r
274 pANTLR3_TREE_PARSER tparser;
\r
276 pANTLR3_INPUT_STREAM ins;
\r
277 pANTLR3_INT_STREAM is;
\r
278 pANTLR3_COMMON_TOKEN_STREAM cts;
\r
279 pANTLR3_TREE_NODE_STREAM tns;
\r
289 switch (recognizer->type)
\r
291 case ANTLR3_TYPE_LEXER:
\r
293 lexer = (pANTLR3_LEXER) (recognizer->super);
\r
294 ins = lexer->input;
\r
299 case ANTLR3_TYPE_PARSER:
\r
301 parser = (pANTLR3_PARSER) (recognizer->super);
\r
302 cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
\r
303 is = parser->tstream->istream;
\r
307 case ANTLR3_TYPE_TREE_PARSER:
\r
309 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
310 tns = tparser->ctnstream->tnstream;
\r
317 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
\r
323 /* Create a basic exception structure
\r
325 ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
\r
326 (void *)ANTLR3_RECOGNITION_EX_NAME,
\r
330 /* Rest of information depends on the base type of the
\r
333 switch (is->type & ANTLR3_INPUT_MASK)
\r
335 case ANTLR3_CHARSTREAM:
\r
337 ex->c = is->_LA (is, 1); /* Current input character */
\r
338 ex->line = ins->getLine (ins); /* Line number comes from stream */
\r
339 ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */
\r
340 ex->index = is->index (is);
\r
341 ex->streamName = ins->fileName;
\r
342 ex->message = "Unexpected character";
\r
345 case ANTLR3_TOKENSTREAM:
\r
347 ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */
\r
348 ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine (ex->token);
\r
349 ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine (ex->token);
\r
350 ex->index = cts->tstream->istream->index (cts->tstream->istream);
\r
351 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
\r
353 ex->streamName = NULL;
\r
357 ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
\r
359 ex->message = "Unexpected token";
\r
362 case ANTLR3_COMMONTREENODE:
\r
364 ex->token = tns->_LT (tns, 1); /* Current input tree node */
\r
365 ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine (ex->token);
\r
366 ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine (ex->token);
\r
367 ex->index = tns->istream->index (tns->istream);
\r
369 // Are you ready for this? Deep breath now...
\r
372 pANTLR3_COMMON_TREE tnode;
\r
374 tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
\r
376 if (tnode->token == NULL)
\r
378 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
\r
382 if (tnode->token->input == NULL)
\r
384 ex->streamName = NULL;
\r
388 ex->streamName = tnode->token->input->fileName;
\r
391 ex->message = "Unexpected node";
\r
397 ex->nextException = recognizer->state->exception; /* So we don't leak the memory */
\r
398 recognizer->state->exception = ex;
\r
399 recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */
\r
405 /// Match current input symbol against ttype. Upon error, do one token
\r
406 /// insertion or deletion if possible.
\r
407 /// To turn off single token insertion or deletion error
\r
408 /// recovery, override mismatchRecover() and have it call
\r
409 /// plain mismatch(), which does not recover. Then any error
\r
410 /// in a rule will cause an exception and immediate exit from
\r
411 /// rule. Rule would recover by resynchronizing to the set of
\r
412 /// symbols that can follow rule ref.
\r
415 match( pANTLR3_BASE_RECOGNIZER recognizer,
\r
416 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
\r
418 pANTLR3_PARSER parser;
\r
419 pANTLR3_TREE_PARSER tparser;
\r
420 pANTLR3_INT_STREAM is;
\r
421 void * matchedSymbol;
\r
423 switch (recognizer->type)
\r
425 case ANTLR3_TYPE_PARSER:
\r
427 parser = (pANTLR3_PARSER) (recognizer->super);
\r
429 is = parser->tstream->istream;
\r
433 case ANTLR3_TYPE_TREE_PARSER:
\r
435 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
437 is = tparser->ctnstream->tnstream->istream;
\r
443 ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
\r
444 return ANTLR3_FALSE;
\r
449 // Pick up the current input token/node for assignment to labels
\r
451 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
\r
453 if (is->_LA(is, 1) == ttype)
\r
455 // The token was the one we were told to expect
\r
457 is->consume(is); // Consume that token from the stream
\r
458 recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were)
\r
459 recognizer->state->failed = ANTLR3_FALSE; // The match was a success
\r
460 return matchedSymbol; // We are done
\r
463 // We did not find the expected token type, if we are backtracking then
\r
464 // we just set the failed flag and return.
\r
466 if (recognizer->state->backtracking > 0)
\r
468 // Backtracking is going on
\r
470 recognizer->state->failed = ANTLR3_TRUE;
\r
471 return matchedSymbol;
\r
474 // We did not find the expected token and there is no backtracking
\r
475 // going on, so we mismatch, which creates an exception in the recognizer exception
\r
478 matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
\r
479 return matchedSymbol;
\r
482 /// Consumes the next token, whatever it is, and resets the recognizer state
\r
483 /// so that it is not in error.
\r
485 /// \param recognizer
\r
486 /// Recognizer context pointer
\r
489 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
\r
491 pANTLR3_PARSER parser;
\r
492 pANTLR3_TREE_PARSER tparser;
\r
493 pANTLR3_INT_STREAM is;
\r
495 switch (recognizer->type)
\r
497 case ANTLR3_TYPE_PARSER:
\r
499 parser = (pANTLR3_PARSER) (recognizer->super);
\r
501 is = parser->tstream->istream;
\r
505 case ANTLR3_TYPE_TREE_PARSER:
\r
507 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
509 is = tparser->ctnstream->tnstream->istream;
\r
515 ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
\r
520 recognizer->state->errorRecovery = ANTLR3_FALSE;
\r
521 recognizer->state->failed = ANTLR3_FALSE;
\r
528 static ANTLR3_BOOLEAN
\r
529 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
\r
531 ANTLR3_UINT32 nextt;
\r
533 nextt = is->_LA(is, 2);
\r
535 if (nextt == ttype)
\r
537 if (recognizer->state->exception != NULL)
\r
539 recognizer->state->exception->expecting = nextt;
\r
541 return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted
\r
545 return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted
\r
551 static ANTLR3_BOOLEAN
\r
552 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
\r
554 ANTLR3_BOOLEAN retcode;
\r
555 pANTLR3_BITSET followClone;
\r
556 pANTLR3_BITSET viableTokensFollowingThisRule;
\r
558 if (follow == NULL)
\r
560 // There is no information about the tokens that can follow the last one
\r
561 // hence we must say that the current one we found is not a member of the
\r
562 // follow set and does not indicate a missing token. We will just consume this
\r
563 // single token and see if the parser works it out from there.
\r
565 return ANTLR3_FALSE;
\r
568 followClone = NULL;
\r
569 viableTokensFollowingThisRule = NULL;
\r
571 // The C bitset maps are laid down at compile time by the
\r
572 // C code generation. Hence we cannot remove things from them
\r
573 // and so on. So, in order to remove EOR (if we need to) then
\r
574 // we clone the static bitset.
\r
576 followClone = antlr3BitsetLoad(follow);
\r
577 if (followClone == NULL)
\r
579 return ANTLR3_FALSE;
\r
582 // Compute what can follow this grammar reference
\r
584 if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
\r
586 // EOR can follow, but if we are not the start symbol, we
\r
587 // need to remove it.
\r
589 if (recognizer->state->following->vector->count >= 0)
\r
591 followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
\r
594 // Now compute the visiable tokens that can follow this rule, according to context
\r
595 // and make them part of the follow set.
\r
597 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
\r
598 followClone->borInPlace(followClone, viableTokensFollowingThisRule);
\r
601 /// if current token is consistent with what could come after set
\r
602 /// then we know we're missing a token; error recovery is free to
\r
603 /// "insert" the missing token
\r
605 /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
\r
606 /// in follow set to indicate that the fall of the start symbol is
\r
607 /// in the set (EOF can follow).
\r
609 if ( followClone->isMember(followClone, is->_LA(is, 1))
\r
610 || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
\r
613 retcode = ANTLR3_TRUE;
\r
617 retcode = ANTLR3_FALSE;
\r
620 if (viableTokensFollowingThisRule != NULL)
\r
622 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
\r
624 if (followClone != NULL)
\r
626 followClone->free(followClone);
\r
633 /// Factor out what to do upon token mismatch so tree parsers can behave
\r
634 /// differently. Override and call mismatchRecover(input, ttype, follow)
\r
635 /// to get single token insertion and deletion. Use this to turn off
\r
636 /// single token insertion and deletion. Override mismatchRecover
\r
637 /// to call this instead.
\r
639 /// \remark mismatch only works for parsers and must be overridden for anything else.
\r
642 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
\r
644 pANTLR3_PARSER parser;
\r
645 pANTLR3_TREE_PARSER tparser;
\r
646 pANTLR3_INT_STREAM is;
\r
648 // Install a mismatched token exception in the exception stack
\r
650 antlr3MTExceptionNew(recognizer);
\r
651 recognizer->state->exception->expecting = ttype;
\r
653 switch (recognizer->type)
\r
655 case ANTLR3_TYPE_PARSER:
\r
657 parser = (pANTLR3_PARSER) (recognizer->super);
\r
659 is = parser->tstream->istream;
\r
665 ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
\r
671 if (mismatchIsUnwantedToken(recognizer, is, ttype))
\r
673 // Create a basic recognition exception structure
\r
675 antlr3RecognitionExceptionNew(recognizer);
\r
677 // Now update it to indicate this is an unwanted token exception
\r
679 recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
\r
680 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
\r
685 if (mismatchIsMissingToken(recognizer, is, follow))
\r
687 // Create a basic recognition exception structure
\r
689 antlr3RecognitionExceptionNew(recognizer);
\r
691 // Now update it to indicate this is an unwanted token exception
\r
693 recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
\r
694 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
\r
699 // Just a mismatched token is all we can dtermine
\r
701 antlr3MTExceptionNew(recognizer);
\r
705 /// Report a recognition problem.
\r
707 /// This method sets errorRecovery to indicate the parser is recovering
\r
708 /// not parsing. Once in recovery mode, no errors are generated.
\r
709 /// To get out of recovery mode, the parser must successfully match
\r
710 /// a token (after a resync). So it will go:
\r
712 /// 1. error occurs
\r
713 /// 2. enter recovery mode, report error
\r
714 /// 3. consume until token found in resynch set
\r
715 /// 4. try to resume parsing
\r
716 /// 5. next match() will reset errorRecovery mode
\r
718 /// If you override, make sure to update errorCount if you care about that.
\r
721 reportError (pANTLR3_BASE_RECOGNIZER recognizer)
\r
723 if (recognizer->state->errorRecovery == ANTLR3_TRUE)
\r
725 // Already in error recovery so don't display another error while doing so
\r
730 // Signal we are in error recovery now
\r
732 recognizer->state->errorRecovery = ANTLR3_TRUE;
\r
734 // Indicate this recognizer had an error while processing.
\r
736 recognizer->state->errorCount++;
\r
738 // Call the error display routine
\r
740 recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
\r
744 beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
\r
746 if (recognizer->debugger != NULL)
\r
748 recognizer->debugger->beginBacktrack(recognizer->debugger, level);
\r
753 endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
\r
755 if (recognizer->debugger != NULL)
\r
757 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
\r
761 beginResync (pANTLR3_BASE_RECOGNIZER recognizer)
\r
763 if (recognizer->debugger != NULL)
\r
765 recognizer->debugger->beginResync(recognizer->debugger);
\r
770 endResync (pANTLR3_BASE_RECOGNIZER recognizer)
\r
772 if (recognizer->debugger != NULL)
\r
774 recognizer->debugger->endResync(recognizer->debugger);
\r
778 /// Compute the error recovery set for the current rule.
\r
779 /// Documentation below is from the Java implementation.
\r
781 /// During rule invocation, the parser pushes the set of tokens that can
\r
782 /// follow that rule reference on the stack; this amounts to
\r
783 /// computing FIRST of what follows the rule reference in the
\r
784 /// enclosing rule. This local follow set only includes tokens
\r
785 /// from within the rule; i.e., the FIRST computation done by
\r
786 /// ANTLR stops at the end of a rule.
\r
790 /// When you find a "no viable alt exception", the input is not
\r
791 /// consistent with any of the alternatives for rule r. The best
\r
792 /// thing to do is to consume tokens until you see something that
\r
793 /// can legally follow a call to r *or* any rule that called r.
\r
794 /// You don't want the exact set of viable next tokens because the
\r
795 /// input might just be missing a token--you might consume the
\r
796 /// rest of the input looking for one of the missing tokens.
\r
798 /// Consider grammar:
\r
803 /// b : c '^' INT ;
\r
808 /// At each rule invocation, the set of tokens that could follow
\r
809 /// that rule is pushed on a stack. Here are the various "local"
\r
812 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
\r
813 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
\r
814 /// FOLLOW(c_in_b) = FIRST('^') = '^'
\r
816 /// Upon erroneous input "[]", the call chain is
\r
820 /// and, hence, the follow context stack is:
\r
822 /// depth local follow set after call to rule
\r
823 /// 0 <EOF> a (from main())
\r
827 /// Notice that ')' is not included, because b would have to have
\r
828 /// been called from a different context in rule a for ')' to be
\r
831 /// For error recovery, we cannot consider FOLLOW(c)
\r
832 /// (context-sensitive or otherwise). We need the combined set of
\r
833 /// all context-sensitive FOLLOW sets--the set of all tokens that
\r
834 /// could follow any reference in the call chain. We need to
\r
835 /// resync to one of those tokens. Note that FOLLOW(c)='^' and if
\r
836 /// we resync'd to that token, we'd consume until EOF. We need to
\r
837 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
\r
838 /// In this case, for input "[]", LA(1) is in this set so we would
\r
839 /// not consume anything and after printing an error rule c would
\r
840 /// return normally. It would not find the required '^' though.
\r
841 /// At this point, it gets a mismatched token error and throws an
\r
842 /// exception (since LA(1) is not in the viable following token
\r
843 /// set). The rule exception handler tries to recover, but finds
\r
844 /// the same recovery set and doesn't consume anything. Rule b
\r
845 /// exits normally returning to rule a. Now it finds the ']' (and
\r
846 /// with the successful match exits errorRecovery mode).
\r
848 /// So, you can see that the parser walks up call chain looking
\r
849 /// for the token that was a member of the recovery set.
\r
851 /// Errors are not generated in errorRecovery mode.
\r
853 /// ANTLR's error recovery mechanism is based upon original ideas:
\r
855 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
\r
859 /// "A note on error recovery in recursive descent parsers":
\r
860 /// http://portal.acm.org/citation.cfm?id=947902.947905
\r
862 /// Later, Josef Grosch had some good ideas:
\r
864 /// "Efficient and Comfortable Error Recovery in Recursive Descent
\r
866 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
\r
868 /// Like Grosch I implemented local FOLLOW sets that are combined
\r
869 /// at run-time upon error to avoid overhead during parsing.
\r
871 static pANTLR3_BITSET
\r
872 computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer)
\r
874 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
\r
877 /// Compute the context-sensitive FOLLOW set for current rule.
\r
878 /// Documentation below is from the Java runtime.
\r
880 /// This is the set of token types that can follow a specific rule
\r
881 /// reference given a specific call chain. You get the set of
\r
882 /// viable tokens that can possibly come next (look ahead depth 1)
\r
883 /// given the current call chain. Contrast this with the
\r
884 /// definition of plain FOLLOW for rule r:
\r
886 /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
\r
888 /// where x in T* and alpha, beta in V*; T is set of terminals and
\r
889 /// V is the set of terminals and non terminals. In other words,
\r
890 /// FOLLOW(r) is the set of all tokens that can possibly follow
\r
891 /// references to r in///any* sentential form (context). At
\r
892 /// runtime, however, we know precisely which context applies as
\r
893 /// we have the call chain. We may compute the exact (rather
\r
894 /// than covering superset) set of following tokens.
\r
896 /// For example, consider grammar:
\r
898 /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
\r
899 /// | "return" expr '.'
\r
901 /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
\r
902 /// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
\r
906 /// The FOLLOW sets are all inclusive whereas context-sensitive
\r
907 /// FOLLOW sets are precisely what could follow a rule reference.
\r
908 /// For input input "i=(3);", here is the derivation:
\r
910 /// stat => ID '=' expr ';'
\r
911 /// => ID '=' atom ('+' atom)* ';'
\r
912 /// => ID '=' '(' expr ')' ('+' atom)* ';'
\r
913 /// => ID '=' '(' atom ')' ('+' atom)* ';'
\r
914 /// => ID '=' '(' INT ')' ('+' atom)* ';'
\r
915 /// => ID '=' '(' INT ')' ';'
\r
917 /// At the "3" token, you'd have a call chain of
\r
919 /// stat -> expr -> atom -> expr -> atom
\r
921 /// What can follow that specific nested ref to atom? Exactly ')'
\r
922 /// as you can see by looking at the derivation of this specific
\r
923 /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
\r
925 /// You want the exact viable token set when recovering from a
\r
926 /// token mismatch. Upon token mismatch, if LA(1) is member of
\r
927 /// the viable next token set, then you know there is most likely
\r
928 /// a missing token in the input stream. "Insert" one by just not
\r
929 /// throwing an exception.
\r
931 static pANTLR3_BITSET
\r
932 computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer)
\r
934 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
\r
937 /// Compute the current followset for the input stream.
\r
939 static pANTLR3_BITSET
\r
940 combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
\r
942 pANTLR3_BITSET followSet;
\r
943 pANTLR3_BITSET localFollowSet;
\r
947 top = recognizer->state->following->size(recognizer->state->following);
\r
949 followSet = antlr3BitsetNew(0);
\r
950 localFollowSet = NULL;
\r
952 for (i = top; i>0; i--)
\r
954 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
\r
956 if (localFollowSet != NULL)
\r
958 followSet->borInPlace(followSet, localFollowSet);
\r
960 if (exact == ANTLR3_TRUE)
\r
962 if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
\r
964 // Only leave EOR in the set if at top (start rule); this lets us know
\r
965 // if we have to include the follow(start rule); I.E., EOF
\r
969 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
\r
974 break; // Cannot see End Of Rule from here, just drop out
\r
977 localFollowSet->free(localFollowSet);
\r
978 localFollowSet = NULL;
\r
982 if (localFollowSet != NULL)
\r
984 localFollowSet->free(localFollowSet);
\r
989 /// Standard/Example error display method.
\r
990 /// No generic error message display funciton coudl possibly do everything correctly
\r
991 /// for all possible parsers. Hence you are provided with this example routine, which
\r
992 /// you should override in your parser/tree parser to do as you will.
\r
994 /// Here we depart somewhat from the Java runtime as that has now split up a lot
\r
995 /// of the error display routines into spearate units. However, ther is little advantage
\r
996 /// to this in the C version as you will probably implement all such routines as a
\r
997 /// separate translation unit, rather than install them all as pointers to functions
\r
998 /// in the base recognizer.
\r
1001 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
\r
1003 pANTLR3_PARSER parser;
\r
1004 pANTLR3_TREE_PARSER tparser;
\r
1005 pANTLR3_INT_STREAM is;
\r
1006 pANTLR3_STRING ttext;
\r
1007 pANTLR3_STRING ftext;
\r
1008 pANTLR3_EXCEPTION ex;
\r
1009 pANTLR3_COMMON_TOKEN theToken;
\r
1010 pANTLR3_BASE_TREE theBaseTree;
\r
1011 pANTLR3_COMMON_TREE theCommonTree;
\r
1013 // Retrieve some info for easy reading.
\r
1015 ex = recognizer->state->exception;
\r
1018 // See if there is a 'filename' we can use
\r
1020 if (ex->streamName == NULL)
\r
1022 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
\r
1024 ANTLR3_FPRINTF(stderr, "-end of input-(");
\r
1028 ANTLR3_FPRINTF(stderr, "-unknown source-(");
\r
1033 ftext = ex->streamName->to8(ex->streamName);
\r
1034 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
\r
1037 // Next comes the line number
\r
1040 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
\r
1041 ANTLR3_FPRINTF(stderr, " : error %d : %s",
\r
1042 recognizer->state->exception->type,
\r
1043 (pANTLR3_UINT8) (recognizer->state->exception->message));
\r
1046 // How we determine the next piece is dependent on which thing raised the
\r
1049 switch (recognizer->type)
\r
1051 case ANTLR3_TYPE_PARSER:
\r
1053 // Prepare the knowledge we know we have
\r
1055 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1057 is = parser->tstream->istream;
\r
1058 theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
\r
1059 ttext = theToken->toString(theToken);
\r
1061 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
\r
1062 if (theToken != NULL)
\r
1064 if (theToken->type == ANTLR3_TOKEN_EOF)
\r
1066 ANTLR3_FPRINTF(stderr, ", at <EOF>");
\r
1070 // Guard against null text in a token
\r
1072 ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
\r
1077 case ANTLR3_TYPE_TREE_PARSER:
\r
1079 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1081 is = tparser->ctnstream->tnstream->istream;
\r
1082 theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
\r
1083 ttext = theBaseTree->toStringTree(theBaseTree);
\r
1085 if (theBaseTree != NULL)
\r
1087 theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
\r
1089 if (theCommonTree != NULL)
\r
1091 theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
\r
1093 ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
\r
1094 ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
\r
1100 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
\r
1105 // Although this function should generally be provided by the implementation, this one
\r
1106 // should be as helpful as possible for grammar developers and serve as an example
\r
1107 // of what you can do with each exception type. In general, when you make up your
\r
1108 // 'real' handler, you should debug the routine with all possible errors you expect
\r
1109 // which will then let you be as specific as possible about all circumstances.
\r
1111 // Note that in the general case, errors thrown by tree parsers indicate a problem
\r
1112 // with the output of the parser or with the tree grammar itself. The job of the parser
\r
1113 // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
\r
1114 // at that stage should really be semantic errors that your own code determines and handles
\r
1115 // in whatever way is appropriate.
\r
1119 case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
\r
1121 // Indicates that the recognizer was fed a token which seesm to be
\r
1122 // spurious input. We can detect this when the token that follows
\r
1123 // this unwanted token would normally be part of the syntactically
\r
1124 // correct stream. Then we can see that the token we are looking at
\r
1125 // is just something that should not be there and throw this exception.
\r
1127 if (tokenNames == NULL)
\r
1129 ANTLR3_FPRINTF(stderr, " : Extraneous input...");
\r
1133 if (ex->expecting == ANTLR3_TOKEN_EOF)
\r
1135 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
\r
1139 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
\r
1144 case ANTLR3_MISSING_TOKEN_EXCEPTION:
\r
1146 // Indicates that the recognizer detected that the token we just
\r
1147 // hit would be valid syntactically if preceeded by a particular
\r
1148 // token. Perhaps a missing ';' at line end or a missing ',' in an
\r
1149 // expression list, and such like.
\r
1151 if (tokenNames == NULL)
\r
1153 ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
\r
1157 if (ex->expecting == ANTLR3_TOKEN_EOF)
\r
1159 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
\r
1163 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
\r
1168 case ANTLR3_RECOGNITION_EXCEPTION:
\r
1170 // Indicates that the recognizer received a token
\r
1171 // in the input that was not predicted. This is the basic exception type
\r
1172 // from which all others are derived. So we assume it was a syntax error.
\r
1173 // You may get this if there are not more tokens and more are needed
\r
1174 // to complete a parse for instance.
\r
1176 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
\r
1179 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
\r
1181 // We were expecting to see one thing and got another. This is the
\r
1182 // most common error if we coudl not detect a missing or unwanted token.
\r
1183 // Here you can spend your efforts to
\r
1184 // derive more useful error messages based on the expected
\r
1185 // token set and the last token and so on. The error following
\r
1186 // bitmaps do a good job of reducing the set that we were looking
\r
1187 // for down to something small. Knowing what you are parsing may be
\r
1188 // able to allow you to be even more specific about an error.
\r
1190 if (tokenNames == NULL)
\r
1192 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
\r
1196 if (ex->expecting == ANTLR3_TOKEN_EOF)
\r
1198 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
\r
1202 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
\r
1207 case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
\r
1209 // We could not pick any alt decision from the input given
\r
1210 // so god knows what happened - however when you examine your grammar,
\r
1211 // you should. It means that at the point where the current token occurred
\r
1212 // that the DFA indicates nowhere to go from here.
\r
1214 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
\r
1218 case ANTLR3_MISMATCHED_SET_EXCEPTION:
\r
1221 ANTLR3_UINT32 count;
\r
1222 ANTLR3_UINT32 bit;
\r
1223 ANTLR3_UINT32 size;
\r
1224 ANTLR3_UINT32 numbits;
\r
1225 pANTLR3_BITSET errBits;
\r
1227 // This means we were able to deal with one of a set of
\r
1228 // possible tokens at this point, but we did not see any
\r
1229 // member of that set.
\r
1231 ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : ");
\r
1233 // What tokens could we have accepted at this point in the
\r
1237 errBits = antlr3BitsetLoad (ex->expectingSet);
\r
1238 numbits = errBits->numBits (errBits);
\r
1239 size = errBits->size (errBits);
\r
1243 // However many tokens we could have dealt with here, it is usually
\r
1244 // not useful to print ALL of the set here. I arbitrarily chose 8
\r
1245 // here, but you should do whatever makes sense for you of course.
\r
1246 // No token number 0, so look for bit 1 and on.
\r
1248 for (bit = 1; bit < numbits && count < 8 && count < size; bit++)
\r
1250 // TODO: This doesn;t look right - should be asking if the bit is set!!
\r
1252 if (tokenNames[bit])
\r
1254 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
\r
1258 ANTLR3_FPRINTF(stderr, "\n");
\r
1262 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
\r
1263 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
\r
1268 case ANTLR3_EARLY_EXIT_EXCEPTION:
\r
1270 // We entered a loop requiring a number of token sequences
\r
1271 // but found a token that ended that sequence earlier than
\r
1272 // we should have done.
\r
1274 ANTLR3_FPRINTF(stderr, " : missing elements...\n");
\r
1279 // We don't handle any other exceptions here, but you can
\r
1280 // if you wish. If we get an exception that hits this point
\r
1281 // then we are just going to report what we know about the
\r
1284 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
\r
1288 // Here you have the token that was in error which if this is
\r
1289 // the standard implementation will tell you the line and offset
\r
1290 // and also record the address of the start of the line in the
\r
1291 // input stream. You could therefore print the source line and so on.
\r
1292 // Generally though, I would expect that your lexer/parser will keep
\r
1293 // its own map of lines and source pointers or whatever as there
\r
1294 // are a lot of specific things you need to know about the input
\r
1295 // to do something like that.
\r
1296 // Here is where you do it though :-).
\r
1300 /// Return how many syntax errors were detected by this recognizer
\r
1302 static ANTLR3_UINT32
\r
1303 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
\r
1305 return recognizer->state->errorCount;
\r
1308 /// Recover from an error found on the input stream. Mostly this is
\r
1309 /// NoViableAlt exceptions, but could be a mismatched token that
\r
1310 /// the match() routine could not recover from.
\r
1313 recover (pANTLR3_BASE_RECOGNIZER recognizer)
\r
1315 // Used to compute the follow set of tokens
\r
1317 pANTLR3_BITSET followSet;
\r
1318 pANTLR3_PARSER parser;
\r
1319 pANTLR3_TREE_PARSER tparser;
\r
1320 pANTLR3_INT_STREAM is;
\r
1322 switch (recognizer->type)
\r
1324 case ANTLR3_TYPE_PARSER:
\r
1326 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1328 is = parser->tstream->istream;
\r
1332 case ANTLR3_TYPE_TREE_PARSER:
\r
1334 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1336 is = tparser->ctnstream->tnstream->istream;
\r
1342 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
\r
1348 // Are we about to repeat the same error?
\r
1350 if (recognizer->state->lastErrorIndex == is->index(is))
\r
1352 // The last error was at the same token index point. This must be a case
\r
1353 // where LT(1) is in the recovery token set so nothing is
\r
1354 // consumed. Consume a single token so at least to prevent
\r
1355 // an infinite loop; this is a failsafe.
\r
1360 // Record error index position
\r
1362 recognizer->state->lastErrorIndex = is->index(is);
\r
1364 // Work out the follows set for error recovery
\r
1366 followSet = recognizer->computeErrorRecoverySet(recognizer);
\r
1368 // Call resync hook (for debuggers and so on)
\r
1370 recognizer->beginResync(recognizer);
\r
1372 // Consume tokens until we have resynced to something in the follows set
\r
1374 recognizer->consumeUntilSet(recognizer, followSet);
\r
1376 // End resync hook
\r
1378 recognizer->endResync(recognizer);
\r
1380 // Destroy the temporary bitset we produced.
\r
1382 followSet->free(followSet);
\r
1384 // Reset the inError flag so we don't re-report the exception
\r
1386 recognizer->state->error = ANTLR3_FALSE;
\r
1387 recognizer->state->failed = ANTLR3_FALSE;
\r
1391 /// Attempt to recover from a single missing or extra token.
\r
1395 /// LA(1) is not what we are looking for. If LA(2) has the right token,
\r
1396 /// however, then assume LA(1) is some extra spurious token. Delete it
\r
1397 /// and LA(2) as if we were doing a normal match(), which advances the
\r
1402 /// If current token is consistent with what could come after
\r
1403 /// ttype then it is ok to "insert" the missing token, else throw
\r
1404 /// exception For example, Input "i=(3;" is clearly missing the
\r
1405 /// ')'. When the parser returns from the nested call to expr, it
\r
1406 /// will have call chain:
\r
1408 /// stat -> expr -> atom
\r
1410 /// and it will be trying to match the ')' at this point in the
\r
1413 /// => ID '=' '(' INT ')' ('+' atom)* ';'
\r
1415 /// match() will see that ';' doesn't match ')' and report a
\r
1416 /// mismatched token error. To recover, it sees that LA(1)==';'
\r
1417 /// is in the set of tokens that can follow the ')' token
\r
1418 /// reference in rule atom. It can assume that you forgot the ')'.
\r
1420 /// The exception that was passed in, in the java implementation is
\r
1421 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
\r
1422 /// error flag and rules cascade back when this is set.
\r
1425 recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
\r
1427 pANTLR3_PARSER parser;
\r
1428 pANTLR3_TREE_PARSER tparser;
\r
1429 pANTLR3_INT_STREAM is;
\r
1430 void * matchedSymbol;
\r
1432 // Invoke the debugger event if there is a debugger listening to us
\r
1434 if (recognizer->debugger != NULL)
\r
1436 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
\r
1439 switch (recognizer->type)
\r
1441 case ANTLR3_TYPE_PARSER:
\r
1443 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1445 is = parser->tstream->istream;
\r
1449 case ANTLR3_TYPE_TREE_PARSER:
\r
1451 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1453 is = tparser->ctnstream->tnstream->istream;
\r
1459 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
\r
1465 // Create an exception if we need one
\r
1467 if (recognizer->state->exception == NULL)
\r
1469 antlr3RecognitionExceptionNew(recognizer);
\r
1472 // If the next token after the one we are looking at in the input stream
\r
1473 // is what we are looking for then we remove the one we have discovered
\r
1474 // from the stream by consuming it, then consume this next one along too as
\r
1475 // if nothing had happened.
\r
1477 if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
\r
1479 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
\r
1480 recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
\r
1482 // Call resync hook (for debuggers and so on)
\r
1484 if (recognizer->debugger != NULL)
\r
1486 recognizer->debugger->beginResync(recognizer->debugger);
\r
1489 recognizer->beginResync(recognizer);
\r
1491 // "delete" the extra token
\r
1493 recognizer->beginResync(recognizer);
\r
1495 recognizer->endResync(recognizer);
\r
1496 // End resync hook
\r
1498 if (recognizer->debugger != NULL)
\r
1500 recognizer->debugger->endResync(recognizer->debugger);
\r
1503 // Print out the error after we consume so that ANTLRWorks sees the
\r
1504 // token in the exception.
\r
1506 recognizer->reportError(recognizer);
\r
1508 // Return the token we are actually matching
\r
1510 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
\r
1512 // Consume the token that the rule actually expected to get as if everything
\r
1513 // was hunky dory.
\r
1517 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
\r
1519 return matchedSymbol;
\r
1522 // Single token deletion (Unwanted above) did not work
\r
1523 // so we see if we can insert a token instead by calculating which
\r
1524 // token would be missing
\r
1526 if (mismatchIsMissingToken(recognizer, is, follow))
\r
1528 // We can fake the missing token and proceed
\r
1530 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
\r
1531 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
\r
1532 recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
\r
1533 recognizer->state->exception->token = matchedSymbol;
\r
1534 recognizer->state->exception->expecting = ttype;
\r
1536 // Print out the error after we insert so that ANTLRWorks sees the
\r
1537 // token in the exception.
\r
1539 recognizer->reportError(recognizer);
\r
1541 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
\r
1543 return matchedSymbol;
\r
1547 // Neither deleting nor inserting tokens allows recovery
\r
1548 // must just report the exception.
\r
1550 recognizer->state->error = ANTLR3_TRUE;
\r
1555 recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
\r
1557 pANTLR3_PARSER parser;
\r
1558 pANTLR3_TREE_PARSER tparser;
\r
1559 pANTLR3_INT_STREAM is;
\r
1560 pANTLR3_COMMON_TOKEN matchedSymbol;
\r
1562 switch (recognizer->type)
\r
1564 case ANTLR3_TYPE_PARSER:
\r
1566 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1568 is = parser->tstream->istream;
\r
1572 case ANTLR3_TYPE_TREE_PARSER:
\r
1574 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1576 is = tparser->ctnstream->tnstream->istream;
\r
1582 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
\r
1588 if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
\r
1590 // We can fake the missing token and proceed
\r
1592 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
\r
1593 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
\r
1594 recognizer->state->exception->token = matchedSymbol;
\r
1596 // Print out the error after we insert so that ANTLRWorks sees the
\r
1597 // token in the exception.
\r
1599 recognizer->reportError(recognizer);
\r
1601 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
\r
1603 return matchedSymbol;
\r
1606 // TODO - Single token deletion like in recoverFromMismatchedToken()
\r
1608 recognizer->state->error = ANTLR3_TRUE;
\r
1609 recognizer->state->failed = ANTLR3_TRUE;
\r
1613 /// This code is factored out from mismatched token and mismatched set
\r
1614 /// recovery. It handles "single token insertion" error recovery for
\r
1615 /// both. No tokens are consumed to recover from insertions. Return
\r
1616 /// true if recovery was possible else return false.
\r
1618 static ANTLR3_BOOLEAN
\r
1619 recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
\r
1621 pANTLR3_BITSET viableToksFollowingRule;
\r
1622 pANTLR3_BITSET follow;
\r
1623 pANTLR3_PARSER parser;
\r
1624 pANTLR3_TREE_PARSER tparser;
\r
1625 pANTLR3_INT_STREAM is;
\r
1627 switch (recognizer->type)
\r
1629 case ANTLR3_TYPE_PARSER:
\r
1631 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1633 is = parser->tstream->istream;
\r
1637 case ANTLR3_TYPE_TREE_PARSER:
\r
1639 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1641 is = tparser->ctnstream->tnstream->istream;
\r
1647 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
\r
1648 return ANTLR3_FALSE;
\r
1653 follow = antlr3BitsetLoad(followBits);
\r
1655 if (follow == NULL)
\r
1657 /* The follow set is NULL, which means we don't know what can come
\r
1658 * next, so we "hit and hope" by just signifying that we cannot
\r
1659 * recover, which will just cause the next token to be consumed,
\r
1660 * which might dig us out.
\r
1662 return ANTLR3_FALSE;
\r
1665 /* We have a bitmap for the follow set, hence we can compute
\r
1666 * what can follow this grammar element reference.
\r
1668 if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
\r
1670 /* First we need to know which of the available tokens are viable
\r
1671 * to follow this reference.
\r
1673 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
\r
1675 /* Remove the EOR token, which we do not wish to compute with
\r
1677 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
\r
1678 viableToksFollowingRule->free(viableToksFollowingRule);
\r
1679 /* We now have the computed set of what can follow the current token
\r
1683 /* We can now see if the current token works with the set of tokens
\r
1684 * that could follow the current grammar reference. If it looks like it
\r
1685 * is consistent, then we can "insert" that token by not throwing
\r
1686 * an exception and assuming that we saw it.
\r
1688 if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
\r
1690 /* report the error, but don't cause any rules to abort and stuff
\r
1692 recognizer->reportError(recognizer);
\r
1693 if (follow != NULL)
\r
1695 follow->free(follow);
\r
1697 recognizer->state->error = ANTLR3_FALSE;
\r
1698 recognizer->state->failed = ANTLR3_FALSE;
\r
1699 return ANTLR3_TRUE; /* Success in recovery */
\r
1702 if (follow != NULL)
\r
1704 follow->free(follow);
\r
1707 /* We could not find anything viable to do, so this is going to
\r
1708 * cause an exception.
\r
1710 return ANTLR3_FALSE;
\r
1713 /// Eat tokens from the input stream until we get one of JUST the right type
\r
1716 consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
\r
1718 ANTLR3_UINT32 ttype;
\r
1719 pANTLR3_PARSER parser;
\r
1720 pANTLR3_TREE_PARSER tparser;
\r
1721 pANTLR3_INT_STREAM is;
\r
1723 switch (recognizer->type)
\r
1725 case ANTLR3_TYPE_PARSER:
\r
1727 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1729 is = parser->tstream->istream;
\r
1733 case ANTLR3_TYPE_TREE_PARSER:
\r
1735 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1737 is = tparser->ctnstream->tnstream->istream;
\r
1743 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
\r
1749 // What do have at the moment?
\r
1751 ttype = is->_LA(is, 1);
\r
1753 // Start eating tokens until we get to the one we want.
\r
1755 while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
\r
1758 ttype = is->_LA(is, 1);
\r
1762 /// Eat tokens from the input stream until we find one that
\r
1763 /// belongs to the supplied set.
\r
1766 consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
\r
1768 ANTLR3_UINT32 ttype;
\r
1769 pANTLR3_PARSER parser;
\r
1770 pANTLR3_TREE_PARSER tparser;
\r
1771 pANTLR3_INT_STREAM is;
\r
1773 switch (recognizer->type)
\r
1775 case ANTLR3_TYPE_PARSER:
\r
1777 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1779 is = parser->tstream->istream;
\r
1783 case ANTLR3_TYPE_TREE_PARSER:
\r
1785 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1787 is = tparser->ctnstream->tnstream->istream;
\r
1793 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
\r
1799 // What do have at the moment?
\r
1801 ttype = is->_LA(is, 1);
\r
1803 // Start eating tokens until we get to one we want.
\r
1805 while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
\r
1808 ttype = is->_LA(is, 1);
\r
1812 /** Return the rule invocation stack (how we got here in the parse.
\r
1813 * In the java version Ter just asks the JVM for all the information
\r
1814 * but in C we don't get this information, so I am going to do nothing
\r
1817 static pANTLR3_STACK
\r
1818 getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer)
\r
1823 static pANTLR3_STACK
\r
1824 getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
\r
1829 /** Convenience method for template rewrites - NYI.
\r
1831 static pANTLR3_HASH_TABLE
\r
1832 toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
\r
1837 static void ANTLR3_CDECL
\r
1838 freeIntTrie (void * trie)
\r
1840 ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
\r
1844 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
\r
1845 * start index before. If the rule has not parsed input starting from the supplied start index,
\r
1846 * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
\r
1847 * then it will return the point where it last stopped parsing after that start point.
\r
1850 * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
\r
1851 * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
\r
1852 * version of the table.
\r
1854 static ANTLR3_MARKER
\r
1855 getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
\r
1857 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
\r
1859 pANTLR3_INT_TRIE ruleList;
\r
1860 ANTLR3_MARKER stopIndex;
\r
1861 pANTLR3_TRIE_ENTRY entry;
\r
1863 /* See if we have a list in the ruleMemos for this rule, and if not, then create one
\r
1864 * as we will need it eventually if we are being asked for the memo here.
\r
1866 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
\r
1868 if (entry == NULL)
\r
1870 /* Did not find it, so create a new one for it, with a bit depth based on the
\r
1871 * size of the input stream. We need the bit depth to incorporate the number if
\r
1872 * bits required to represent the largest possible stop index in the input, which is the
\r
1873 * last character. An int stream is free to return the largest 64 bit offset if it has
\r
1874 * no idea of the size, but you should remember that this will cause the leftmost
\r
1875 * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
\r
1877 ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */
\r
1879 if (ruleList != NULL)
\r
1881 recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
\r
1884 /* We cannot have a stopIndex in a trie we have just created of course
\r
1886 return MEMO_RULE_UNKNOWN;
\r
1889 ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr);
\r
1891 /* See if there is a stop index associated with the supplied start index.
\r
1895 entry = ruleList->get(ruleList, ruleParseStart);
\r
1896 if (entry != NULL)
\r
1898 stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
\r
1901 if (stopIndex == 0)
\r
1903 return MEMO_RULE_UNKNOWN;
\r
1909 /** Has this rule already parsed input at the current index in the
\r
1910 * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE
\r
1913 * This method has a side-effect: if we have seen this input for
\r
1914 * this rule and successfully parsed before, then seek ahead to
\r
1915 * 1 past the stop token matched for this rule last time.
\r
1917 static ANTLR3_BOOLEAN
\r
1918 alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
\r
1920 ANTLR3_MARKER stopIndex;
\r
1921 pANTLR3_LEXER lexer;
\r
1922 pANTLR3_PARSER parser;
\r
1923 pANTLR3_TREE_PARSER tparser;
\r
1924 pANTLR3_INT_STREAM is;
\r
1926 switch (recognizer->type)
\r
1928 case ANTLR3_TYPE_PARSER:
\r
1930 parser = (pANTLR3_PARSER) (recognizer->super);
\r
1933 is = parser->tstream->istream;
\r
1937 case ANTLR3_TYPE_TREE_PARSER:
\r
1939 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
1942 is = tparser->ctnstream->tnstream->istream;
\r
1946 case ANTLR3_TYPE_LEXER:
\r
1948 lexer = (pANTLR3_LEXER) (recognizer->super);
\r
1951 is = lexer->input->istream;
\r
1956 ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
\r
1957 return ANTLR3_FALSE;
\r
1962 /* See if we have a memo marker for this.
\r
1964 stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
\r
1966 if (stopIndex == MEMO_RULE_UNKNOWN)
\r
1968 return ANTLR3_FALSE;
\r
1971 if (stopIndex == MEMO_RULE_FAILED)
\r
1973 recognizer->state->failed = ANTLR3_TRUE;
\r
1977 is->seek(is, stopIndex+1);
\r
1980 /* If here then the rule was executed for this input already
\r
1982 return ANTLR3_TRUE;
\r
1985 /** Record whether or not this rule parsed the input at this position
\r
1989 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
\r
1991 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
\r
1993 pANTLR3_INT_TRIE ruleList;
\r
1994 pANTLR3_TRIE_ENTRY entry;
\r
1995 ANTLR3_MARKER stopIndex;
\r
1996 pANTLR3_LEXER lexer;
\r
1997 pANTLR3_PARSER parser;
\r
1998 pANTLR3_TREE_PARSER tparser;
\r
1999 pANTLR3_INT_STREAM is;
\r
2001 switch (recognizer->type)
\r
2003 case ANTLR3_TYPE_PARSER:
\r
2005 parser = (pANTLR3_PARSER) (recognizer->super);
\r
2007 is = parser->tstream->istream;
\r
2011 case ANTLR3_TYPE_TREE_PARSER:
\r
2013 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
2015 is = tparser->ctnstream->tnstream->istream;
\r
2019 case ANTLR3_TYPE_LEXER:
\r
2021 lexer = (pANTLR3_LEXER) (recognizer->super);
\r
2024 is = lexer->input->istream;
\r
2029 ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
\r
2035 stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
\r
2037 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
\r
2039 if (entry != NULL)
\r
2041 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
\r
2043 /* If we don't already have this entry, append it. The memoize trie does not
\r
2044 * accept duplicates so it won't add it if already there and we just ignore the
\r
2045 * return code as we don't care if it is there already.
\r
2047 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
\r
2050 /** A syntactic predicate. Returns true/false depending on whether
\r
2051 * the specified grammar fragment matches the current input stream.
\r
2052 * This resets the failed instance var afterwards.
\r
2054 static ANTLR3_BOOLEAN
\r
2055 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
\r
2057 ANTLR3_MARKER start;
\r
2058 pANTLR3_PARSER parser;
\r
2059 pANTLR3_TREE_PARSER tparser;
\r
2060 pANTLR3_INT_STREAM is;
\r
2062 switch (recognizer->type)
\r
2064 case ANTLR3_TYPE_PARSER:
\r
2066 parser = (pANTLR3_PARSER) (recognizer->super);
\r
2068 is = parser->tstream->istream;
\r
2072 case ANTLR3_TYPE_TREE_PARSER:
\r
2074 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
\r
2076 is = tparser->ctnstream->tnstream->istream;
\r
2082 ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
\r
2083 return ANTLR3_FALSE;
\r
2088 /* Begin backtracking so we can get back to where we started after trying out
\r
2089 * the syntactic predicate.
\r
2091 start = is->mark(is);
\r
2092 recognizer->state->backtracking++;
\r
2094 /* Try the syntactical predicate
\r
2100 is->rewind(is, start);
\r
2101 recognizer->state->backtracking--;
\r
2103 if (recognizer->state->failed == ANTLR3_TRUE)
\r
2105 /* Predicate failed
\r
2107 recognizer->state->failed = ANTLR3_FALSE;
\r
2108 return ANTLR3_FALSE;
\r
2112 /* Predicate was successful
\r
2114 recognizer->state->failed = ANTLR3_FALSE;
\r
2115 return ANTLR3_TRUE;
\r
2120 reset(pANTLR3_BASE_RECOGNIZER recognizer)
\r
2122 if (recognizer->state->following != NULL)
\r
2124 recognizer->state->following->free(recognizer->state->following);
\r
2127 // Reset the state flags
\r
2129 recognizer->state->errorRecovery = ANTLR3_FALSE;
\r
2130 recognizer->state->lastErrorIndex = -1;
\r
2131 recognizer->state->failed = ANTLR3_FALSE;
\r
2132 recognizer->state->errorCount = 0;
\r
2133 recognizer->state->backtracking = 0;
\r
2134 recognizer->state->following = NULL;
\r
2136 if (recognizer->state != NULL)
\r
2138 if (recognizer->state->ruleMemo != NULL)
\r
2140 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
\r
2141 recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */
\r
2146 // Install a new following set
\r
2148 recognizer->state->following = antlr3StackNew(8);
\r
2152 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
\r
2153 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
\r
2156 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
\r
2158 return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
\r
2161 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
\r
2162 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
\r
2165 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
\r
2166 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
\r
2168 pANTLR3_TOKEN_STREAM ts;
\r
2169 pANTLR3_COMMON_TOKEN_STREAM cts;
\r
2170 pANTLR3_COMMON_TOKEN token;
\r
2171 pANTLR3_COMMON_TOKEN current;
\r
2172 pANTLR3_STRING text;
\r
2174 // Dereference the standard pointers
\r
2176 ts = (pANTLR3_TOKEN_STREAM)istream->super;
\r
2177 cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
\r
2179 // Work out what to use as the current symbol to make a line and offset etc
\r
2180 // If we are at EOF, we use the token before EOF
\r
2182 current = ts->_LT(ts, 1);
\r
2183 if (current->getType(current) == ANTLR3_TOKEN_EOF)
\r
2185 current = ts->_LT(ts, -1);
\r
2188 // Create a new empty token
\r
2190 if (recognizer->state->tokFactory == NULL)
\r
2192 // We don't yet have a token factory for making tokens
\r
2193 // we just need a fake one using the input stream of the current
\r
2196 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
\r
2198 token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
\r
2200 // Set some of the token properties based on the current token
\r
2202 token->setLine (token, current->getLine(current));
\r
2203 token->setCharPositionInLine (token, current->getCharPositionInLine(current));
\r
2204 token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
\r
2205 token->setType (token, expectedTokenType);
\r
2206 token->user1 = current->user1;
\r
2207 token->user2 = current->user2;
\r
2208 token->user3 = current->user3;
\r
2209 token->custom = current->custom;
\r
2210 token->lineStart = current->lineStart;
\r
2212 // Create the token text that shows it has been inserted
\r
2214 token->setText8(token, (pANTLR3_UINT8)"<missing ");
\r
2215 text = token->getText(token);
\r
2219 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
\r
2220 text->append8(text, (const char *)">");
\r
2223 // Finally return the pointer to our new token
\r
2229 #ifdef ANTLR3_WINDOWS
\r
2230 #pragma warning( default : 4100 )
\r