2 * Defines the basic structure to support recognizing by either a lexer,
\r
3 * parser, or tree parser.
\r
4 * \addtogroup ANTLR3_BASE_RECOGNIZER
\r
7 #ifndef _ANTLR3_BASERECOGNIZER_H
\r
8 #define _ANTLR3_BASERECOGNIZER_H
\r
10 // [The "BSD licence"]
\r
11 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
12 // http://www.temporal-wave.com
\r
13 // http://www.linkedin.com/in/jimidle
\r
15 // All rights reserved.
\r
17 // Redistribution and use in source and binary forms, with or without
\r
18 // modification, are permitted provided that the following conditions
\r
20 // 1. Redistributions of source code must retain the above copyright
\r
21 // notice, this list of conditions and the following disclaimer.
\r
22 // 2. Redistributions in binary form must reproduce the above copyright
\r
23 // notice, this list of conditions and the following disclaimer in the
\r
24 // documentation and/or other materials provided with the distribution.
\r
25 // 3. The name of the author may not be used to endorse or promote products
\r
26 // derived from this software without specific prior written permission.
\r
28 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
29 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
30 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
31 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
32 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
33 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
37 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
39 #include <antlr3defs.h>
\r
40 #include <antlr3exception.h>
\r
41 #include <antlr3input.h>
\r
42 #include <antlr3tokenstream.h>
\r
43 #include <antlr3commontoken.h>
\r
44 #include <antlr3commontreenodestream.h>
\r
45 #include <antlr3debugeventlistener.h>
\r
46 #include <antlr3recognizersharedstate.h>
\r
48 /** Type indicator for a lexer recognizer
\r
50 #define ANTLR3_TYPE_LEXER 0x0001
\r
52 /** Type indicator for a parser recognizer
\r
54 #define ANTLR3_TYPE_PARSER 0x0002
\r
56 /** Type indicator for a tree parser recognizer
\r
58 #define ANTLR3_TYPE_TREE_PARSER 0x0004
\r
64 /** \brief Base tracking context structure for all types of
\r
67 typedef struct ANTLR3_BASE_RECOGNIZER_struct
\r
69 /// Whatever super structure is providing this interface needs a pointer to itself
\r
70 /// so that this can be passed back to it whenever the api functions
\r
71 /// are called back from here.
\r
75 /// Indicates the type of recognizer that we are an instance of.
\r
76 /// The programmer may set this to anything of course, but the default
\r
77 /// implementations of the interface only really understand the built in
\r
78 /// types, so new error handlers etc would probably be required to as well.
\r
80 /// Valid types are:
\r
82 /// - #ANTLR3_TYPE_LEXER
\r
83 /// - #ANTLR3_TYPE_PARSER
\r
84 /// - #ANTLR3_TYPE_TREE_PARSER
\r
88 /// A pointer to the shared recognizer state, such that multiple
\r
89 /// recognizers can use the same inputs streams and so on (in
\r
90 /// the case of grammar inheritance for instance.
\r
92 pANTLR3_RECOGNIZER_SHARED_STATE state;
\r
94 /// If set to something other than NULL, then this structure is
\r
95 /// points to an instance of the debugger interface. In general, the
\r
96 /// debugger is only referenced internally in recovery/error operations
\r
97 /// so that it does not cause overhead by having to check this pointer
\r
98 /// in every function/method
\r
100 pANTLR3_DEBUG_EVENT_LISTENER debugger;
\r
103 /// Pointer to a function that matches the current input symbol
\r
104 /// against the supplied type. the function causes an error if a
\r
105 /// match is not found and the default implementation will also
\r
106 /// attempt to perform one token insertion or deletion if that is
\r
107 /// possible with the input stream. You can override the default
\r
108 /// implementation by installing a pointer to your own function
\r
109 /// in this interface after the recognizer has initialized. This can
\r
110 /// perform different recovery options or not recover at all and so on.
\r
111 /// To ignore recovery altogether, see the comments in the default
\r
112 /// implementation of this function in antlr3baserecognizer.c
\r
114 /// Note that errors are signalled by setting the error flag below
\r
115 /// and creating a new exception structure and installing it in the
\r
116 /// exception pointer below (you can chain these if you like and handle them
\r
117 /// in some customized way).
\r
119 void * (*match) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
120 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
\r
122 /// Pointer to a function that matches the next token/char in the input stream
\r
123 /// regardless of what it actually is.
\r
125 void (*matchAny) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
127 /// Pointer to a function that decides if the token ahead of the current one is the
\r
128 /// one we were loking for, in which case the curernt one is very likely extraneous
\r
129 /// and can be reported that way.
\r
132 (*mismatchIsUnwantedToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, ANTLR3_UINT32 ttype);
\r
134 /// Pointer to a function that decides if the current token is one that can logically
\r
135 /// follow the one we were looking for, in which case the one we were looking for is
\r
136 /// probably missing from the input.
\r
139 (*mismatchIsMissingToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, pANTLR3_BITSET_LIST follow);
\r
141 /** Pointer to a function that works out what to do when a token mismatch
\r
142 * occurs, so that Tree parsers can behave differently to other recognizers.
\r
144 void (*mismatch) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
145 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
\r
147 /** Pointer to a function to call to report a recognition problem. You may override
\r
148 * this function with your own function, but refer to the standard implementation
\r
149 * in antlr3baserecognizer.c for guidance. The function should recognize whether
\r
150 * error recovery is in force, so that it does not print out more than one error messages
\r
151 * for the same error. From the java comments in BaseRecognizer.java:
\r
153 * This method sets errorRecovery to indicate the parser is recovering
\r
154 * not parsing. Once in recovery mode, no errors are generated.
\r
155 * To get out of recovery mode, the parser must successfully match
\r
156 * a token (after a resync). So it will go:
\r
159 * 2. enter recovery mode, report error
\r
160 * 3. consume until token found in resynch set
\r
161 * 4. try to resume parsing
\r
162 * 5. next match() will reset errorRecovery mode
\r
164 void (*reportError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
166 /** Pointer to a function that is called to display a recognition error message. You may
\r
167 * override this function independently of (*reportError)() above as that function calls
\r
168 * this one to do the actual exception printing.
\r
170 void (*displayRecognitionError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_UINT8 * tokenNames);
\r
172 /// Get number of recognition errors (lexer, parser, tree parser). Each
\r
173 /// recognizer tracks its own number. So parser and lexer each have
\r
174 /// separate count. Does not count the spurious errors found between
\r
175 /// an error and next valid token match
\r
177 /// \see reportError()
\r
180 (*getNumberOfSyntaxErrors) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
182 /** Pointer to a function that recovers from an error found in the input stream.
\r
183 * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also
\r
184 * be from a mismatched token that the (*match)() could not recover from.
\r
186 void (*recover) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
188 /** Pointer to a function that is a hook to listen to token consumption during error recovery.
\r
189 * This is mainly used by the debug parser to send events to the listener.
\r
191 void (*beginResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
193 /** Pointer to a function that is a hook to listen to token consumption during error recovery.
\r
194 * This is mainly used by the debug parser to send events to the listener.
\r
196 void (*endResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
198 /** Pointer to a function that is a hook to listen to token consumption during error recovery.
\r
199 * This is mainly used by the debug parser to send events to the listener.
\r
201 void (*beginBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level);
\r
203 /** Pointer to a function that is a hook to listen to token consumption during error recovery.
\r
204 * This is mainly used by the debug parser to send events to the listener.
\r
206 void (*endBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
\r
208 /** Pointer to a function to computer the error recovery set for the current rule.
\r
209 * \see antlr3ComputeErrorRecoverySet() for details.
\r
211 pANTLR3_BITSET (*computeErrorRecoverySet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
213 /** Pointer to a function that computes the context-sensitive FOLLOW set for the
\r
215 * \see antlr3ComputeCSRuleFollow() for details.
\r
217 pANTLR3_BITSET (*computeCSRuleFollow) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
219 /** Pointer to a function to combine follow bitsets.
\r
220 * \see antlr3CombineFollows() for details.
\r
222 pANTLR3_BITSET (*combineFollows) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
223 ANTLR3_BOOLEAN exact);
\r
225 /** Pointer to a function that recovers from a mismatched token in the input stream.
\r
226 * \see antlr3RecoverMismatch() for details.
\r
228 void * (*recoverFromMismatchedToken)
\r
229 (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
230 ANTLR3_UINT32 ttype,
\r
231 pANTLR3_BITSET_LIST follow);
\r
233 /** Pointer to a function that recovers from a mismatched set in the token stream, in a similar manner
\r
234 * to (*recoverFromMismatchedToken)
\r
236 void * (*recoverFromMismatchedSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
237 pANTLR3_BITSET_LIST follow);
\r
239 /** Pointer to common routine to handle single token insertion for recovery functions.
\r
241 ANTLR3_BOOLEAN (*recoverFromMismatchedElement)
\r
242 (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
243 pANTLR3_BITSET_LIST follow);
\r
245 /** Pointer to function that consumes input until the next token matches
\r
248 void (*consumeUntil) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
249 ANTLR3_UINT32 tokenType);
\r
251 /** Pointer to function that consumes input until the next token matches
\r
252 * one in the given set.
\r
254 void (*consumeUntilSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
255 pANTLR3_BITSET set);
\r
257 /** Pointer to function that returns an ANTLR3_LIST of the strings that identify
\r
258 * the rules in the parser that got you to this point. Can be overridden by installing your
\r
259 * own function set.
\r
261 * \todo Document how to override invocation stack functions.
\r
263 pANTLR3_STACK (*getRuleInvocationStack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
264 pANTLR3_STACK (*getRuleInvocationStackNamed) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
265 pANTLR3_UINT8 name);
\r
267 /** Pointer to a function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of
\r
268 * string token names. As this is mostly used in string template processing it may not be useful
\r
269 * in the C runtime.
\r
271 pANTLR3_HASH_TABLE (*toStrings) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
272 pANTLR3_HASH_TABLE);
\r
274 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
\r
275 * start index before. If the rule has not parsed input starting from the supplied start index,
\r
276 * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
\r
277 * then it will return the point where it last stopped parsing after that start point.
\r
279 ANTLR3_MARKER (*getRuleMemoization) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
280 ANTLR3_INTKEY ruleIndex,
\r
281 ANTLR3_MARKER ruleParseStart);
\r
283 /** Pointer to function that determines whether the rule has parsed input at the current index
\r
284 * in the input stream
\r
286 ANTLR3_BOOLEAN (*alreadyParsedRule) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
287 ANTLR3_MARKER ruleIndex);
\r
289 /** Pointer to function that records whether the rule has parsed the input at a
\r
290 * current position successfully or not.
\r
292 void (*memoize) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
293 ANTLR3_MARKER ruleIndex,
\r
294 ANTLR3_MARKER ruleParseStart);
\r
296 /// Pointer to a function that returns the current input symbol.
\r
297 /// The is placed into any label for the associated token ref; e.g., x=ID. Token
\r
298 /// and tree parsers need to return different objects. Rather than test
\r
299 /// for input stream type or change the IntStream interface, I use
\r
300 /// a simple method to ask the recognizer to tell me what the current
\r
301 /// input symbol is.
\r
303 /// This is ignored for lexers and the lexer implementation of this
\r
304 /// function should return NULL.
\r
306 void * (*getCurrentInputSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
307 pANTLR3_INT_STREAM istream);
\r
309 /// Conjure up a missing token during error recovery.
\r
311 /// The recognizer attempts to recover from single missing
\r
312 /// symbols. But, actions might refer to that missing symbol.
\r
313 /// For example, x=ID {f($x);}. The action clearly assumes
\r
314 /// that there has been an identifier matched previously and that
\r
315 /// $x points at that token. If that token is missing, but
\r
316 /// the next token in the stream is what we want we assume that
\r
317 /// this token is missing and we keep going. Because we
\r
318 /// have to return some token to replace the missing token,
\r
319 /// we have to conjure one up. This method gives the user control
\r
320 /// over the tokens returned for missing tokens. Mostly,
\r
321 /// you will want to create something special for identifier
\r
322 /// tokens. For literals such as '{' and ',', the default
\r
323 /// action in the parser or tree parser works. It simply creates
\r
324 /// a CommonToken of the appropriate type. The text will be the token.
\r
325 /// If you change what tokens must be created by the lexer,
\r
326 /// override this method to create the appropriate tokens.
\r
328 void * (*getMissingSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
\r
329 pANTLR3_INT_STREAM istream,
\r
330 pANTLR3_EXCEPTION e,
\r
331 ANTLR3_UINT32 expectedTokenType,
\r
332 pANTLR3_BITSET_LIST follow);
\r
334 /** Pointer to a function that returns whether the supplied grammar function
\r
335 * will parse the current input stream or not. This is the way that syntactic
\r
336 * predicates are evaluated. Unlike java, C is perfectly happy to invoke code
\r
337 * via a pointer to a function (hence that's what all the ANTLR3 C interfaces
\r
340 ANTLR3_BOOLEAN (*synpred) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, void * ctx,
\r
341 void (*predicate)(void * ctx));
\r
343 /** Pointer to a function that can construct a generic exception structure
\r
344 * with such information as the input stream can provide.
\r
346 void (*exConstruct) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
348 /** Reset the recognizer
\r
350 void (*reset) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
352 /** Pointer to a function that knows how to free the resources of a base recognizer.
\r
354 void (*free) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
\r
357 ANTLR3_BASE_RECOGNIZER;
\r
363 #include <antlr3lexer.h>
\r
364 #include <antlr3parser.h>
\r
365 #include <antlr3treeparser.h>
\r
370 #endif /* _ANTLR3_BASERECOGNIZER_H */
\r