2 * While the C runtime does not need to model the state of
\r
3 * multiple lexers and parsers in the same way as the Java runtime does
\r
4 * it is no overhead to reflect that model. In fact the
\r
5 * C runtime has always been able to share recognizer state.
\r
7 * This 'class' therefore defines all the elements of a recognizer
\r
8 * (either lexer, parser or tree parser) that are need to
\r
9 * track the current recognition state. Multiple recognizers
\r
10 * may then share this state, for instance when one grammar
\r
14 #ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_H
\r
15 #define _ANTLR3_RECOGNIZER_SHARED_STATE_H
\r
17 // [The "BSD licence"]
\r
18 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
19 // http://www.temporal-wave.com
\r
20 // http://www.linkedin.com/in/jimidle
\r
22 // All rights reserved.
\r
24 // Redistribution and use in source and binary forms, with or without
\r
25 // modification, are permitted provided that the following conditions
\r
27 // 1. Redistributions of source code must retain the above copyright
\r
28 // notice, this list of conditions and the following disclaimer.
\r
29 // 2. Redistributions in binary form must reproduce the above copyright
\r
30 // notice, this list of conditions and the following disclaimer in the
\r
31 // documentation and/or other materials provided with the distribution.
\r
32 // 3. The name of the author may not be used to endorse or promote products
\r
33 // derived from this software without specific prior written permission.
\r
35 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
36 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
37 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
38 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
39 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
40 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
41 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
42 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
43 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
44 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
46 #include <antlr3defs.h>
\r
52 /** All the data elements required to track the current state
\r
53 * of any recognizer (lexer, parser, tree parser).
\r
54 * May be share between multiple recognizers such that
\r
55 * grammar inheritance is easily supported.
\r
57 typedef struct ANTLR3_RECOGNIZER_SHARED_STATE_struct
\r
59 /** If set to ANTLR3_TRUE then the recognizer has an exception
\r
60 * condition (this is tested by the generated code for the rules of
\r
63 ANTLR3_BOOLEAN error;
\r
65 /** Points to the first in a possible chain of exceptions that the
\r
66 * recognizer has discovered.
\r
68 pANTLR3_EXCEPTION exception;
\r
70 /** Track around a hint from the creator of the recognizer as to how big this
\r
71 * thing is going to get, as the actress said to the bishop. This allows us
\r
72 * to tune hash tables accordingly. This might not be the best place for this
\r
73 * in the end but we will see.
\r
75 ANTLR3_UINT32 sizeHint;
\r
77 /** Track the set of token types that can follow any rule invocation.
\r
78 * Stack structure, to support: List<BitSet>.
\r
80 pANTLR3_STACK following;
\r
83 /** This is true when we see an error and before having successfully
\r
84 * matched a token. Prevents generation of more than one error message
\r
87 ANTLR3_BOOLEAN errorRecovery;
\r
89 /** The index into the input stream where the last error occurred.
\r
90 * This is used to prevent infinite loops where an error is found
\r
91 * but no token is consumed during recovery...another error is found,
\r
92 * ad nauseam. This is a failsafe mechanism to guarantee that at least
\r
93 * one token/tree node is consumed for two errors.
\r
95 ANTLR3_MARKER lastErrorIndex;
\r
97 /** In lieu of a return value, this indicates that a rule or token
\r
98 * has failed to match. Reset to false upon valid token match.
\r
100 ANTLR3_BOOLEAN failed;
\r
102 /** When the recognizer terminates, the error handling functions
\r
103 * will have incremented this value if any error occurred (that was displayed). It can then be
\r
104 * used by the grammar programmer without having to use static globals.
\r
106 ANTLR3_UINT32 errorCount;
\r
108 /** If 0, no backtracking is going on. Safe to exec actions etc...
\r
109 * If >0 then it's the level of backtracking.
\r
111 ANTLR3_INT32 backtracking;
\r
113 /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing.
\r
114 * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is
\r
115 * the memoization table for ruleIndex. For key ruleStartIndex, you
\r
116 * get back the stop token for associated rule or MEMO_RULE_FAILED.
\r
118 * This is only used if rule memoization is on.
\r
120 pANTLR3_INT_TRIE ruleMemo;
\r
122 /** Pointer to an array of token names
\r
123 * that are generally useful in error reporting. The generated parsers install
\r
124 * this pointer. The table it points to is statically allocated as 8 bit ascii
\r
125 * at parser compile time - grammar token names are thus restricted in character
\r
126 * sets, which does not seem to terrible.
\r
128 pANTLR3_UINT8 * tokenNames;
\r
130 /** User programmable pointer that can be used for instance as a place to
\r
131 * store some tracking structure specific to the grammar that would not normally
\r
132 * be available to the error handling functions.
\r
136 /** The goal of all lexer rules/methods is to create a token object.
\r
137 * This is an instance variable as multiple rules may collaborate to
\r
138 * create a single token. For example, NUM : INT | FLOAT ;
\r
139 * In this case, you want the INT or FLOAT rule to set token and not
\r
140 * have it reset to a NUM token in rule NUM.
\r
142 pANTLR3_COMMON_TOKEN token;
\r
144 /** The goal of all lexer rules being to create a token, then a lexer
\r
145 * needs to build a token factory to create them.
\r
147 pANTLR3_TOKEN_FACTORY tokFactory;
\r
149 /** A lexer is a source of tokens, produced by all the generated (or
\r
150 * hand crafted if you like) matching rules. As such it needs to provide
\r
151 * a token source interface implementation.
\r
153 pANTLR3_TOKEN_SOURCE tokSource;
\r
155 /** The channel number for the current token
\r
157 ANTLR3_UINT32 channel;
\r
159 /** The token type for the current token
\r
161 ANTLR3_UINT32 type;
\r
163 /** The input line (where it makes sense) on which the first character of the current
\r
166 ANTLR3_INT32 tokenStartLine;
\r
168 /** The character position of the first character of the current token
\r
169 * within the line specified by tokenStartLine
\r
171 ANTLR3_INT32 tokenStartCharPositionInLine;
\r
173 /** What character index in the stream did the current token start at?
\r
174 * Needed, for example, to get the text for current token. Set at
\r
175 * the start of nextToken.
\r
177 ANTLR3_MARKER tokenStartCharIndex;
\r
179 /** Text for the current token. This can be overridden by setting this
\r
180 * variable directly or by using the SETTEXT() macro (preferred) in your
\r
183 pANTLR3_STRING text;
\r
185 /** User controlled variables that will be installed in a newly created
\r
188 ANTLR3_UINT32 user1, user2, user3;
\r
191 /** Input stream stack, which allows the C programmer to switch input streams
\r
192 * easily and allow the standard nextToken() implementation to deal with it
\r
193 * as this is a common requirement.
\r
195 pANTLR3_STACK streams;
\r
197 /// A stack of token/tree rewrite streams that are available for use
\r
198 /// by a parser or tree parser that is using rewrites to generate
\r
199 /// an AST. This saves each rule in the recongizer from having to
\r
200 /// allocate and deallocate rewtire streams on entry and exit. As
\r
201 /// the parser recurses throgh the rules it will reach a steady state
\r
202 /// of the maximum number of allocated streams, which instead of
\r
203 /// deallocating them at rule exit, it will place on this stack for
\r
204 /// reuse. The streams are then all finally freed when this stack
\r
207 pANTLR3_VECTOR rStreams;
\r
210 ANTLR3_RECOGNIZER_SHARED_STATE;
\r