2 * Defines the interface for an ANTLR3 common token stream. Custom token streams should create
\r
3 * one of these and then override any functions by installing their own pointers
\r
4 * to implement the various functions.
\r
6 #ifndef _ANTLR3_TOKENSTREAM_H
\r
7 #define _ANTLR3_TOKENSTREAM_H
\r
9 // [The "BSD licence"]
\r
10 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
11 // http://www.temporal-wave.com
\r
12 // http://www.linkedin.com/in/jimidle
\r
14 // All rights reserved.
\r
16 // Redistribution and use in source and binary forms, with or without
\r
17 // modification, are permitted provided that the following conditions
\r
19 // 1. Redistributions of source code must retain the above copyright
\r
20 // notice, this list of conditions and the following disclaimer.
\r
21 // 2. Redistributions in binary form must reproduce the above copyright
\r
22 // notice, this list of conditions and the following disclaimer in the
\r
23 // documentation and/or other materials provided with the distribution.
\r
24 // 3. The name of the author may not be used to endorse or promote products
\r
25 // derived from this software without specific prior written permission.
\r
27 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
28 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
29 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
30 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
31 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
32 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
36 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
38 #include <antlr3defs.h>
\r
39 #include <antlr3string.h>
\r
40 #include <antlr3collections.h>
\r
41 #include <antlr3input.h>
\r
42 #include <antlr3commontoken.h>
\r
43 #include <antlr3bitset.h>
\r
44 #include <antlr3debugeventlistener.h>
\r
50 /** Definition of a token source, which has a pointer to a function that
\r
51 * returns the next token (using a token factory if it is going to be
\r
52 * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly
\r
53 * different to the Java interface because we have no way to implement
\r
54 * multiple interfaces without defining them in the interface structure
\r
55 * or casting (void *), which is too convoluted.
\r
57 typedef struct ANTLR3_TOKEN_SOURCE_struct
\r
59 /** Pointer to a function that returns the next token in the stream.
\r
61 pANTLR3_COMMON_TOKEN (*nextToken)(struct ANTLR3_TOKEN_SOURCE_struct * tokenSource);
\r
63 /** Whoever is providing tokens, needs to provide a string factory too
\r
65 pANTLR3_STRING_FACTORY strFactory;
\r
67 /** A special pre-allocated token, which signifies End Of Tokens. Because this must
\r
68 * be set up with the current input index and so on, we embed the structure and
\r
69 * return the address of it. It is marked as factoryMade, so that it is never
\r
70 * attempted to be freed.
\r
72 ANTLR3_COMMON_TOKEN eofToken;
\r
74 /// A special pre-allocated token, which is returned by mTokens() if the
\r
75 /// lexer rule said to just skip the generated token altogether.
\r
76 /// Having this single token stops us wasting memory by have the token factory
\r
77 /// actually create something that we are going to SKIP(); anyway.
\r
79 ANTLR3_COMMON_TOKEN skipToken;
\r
81 /** Whatever is supplying the token source interface, needs a pointer to
\r
82 * itself so that this pointer can be passed to it when the nextToken
\r
83 * function is called.
\r
87 /** When the token source is constructed, it is populated with the file
\r
88 * name from whence the tokens were produced by the lexer. This pointer is a
\r
89 * copy of the one supplied by the CharStream (and may be NULL) so should
\r
90 * not be manipulated other than to copy or print it.
\r
92 pANTLR3_STRING fileName;
\r
94 ANTLR3_TOKEN_SOURCE;
\r
96 /** Definition of the ANTLR3 common token stream interface.
\r
98 * Much of the documentation for this interface is stolen from Ter's Java implementation.
\r
100 typedef struct ANTLR3_TOKEN_STREAM_struct
\r
102 /** Pointer to the token source for this stream
\r
104 pANTLR3_TOKEN_SOURCE tokenSource;
\r
106 /** Whatever is providing this interface needs a pointer to itself
\r
107 * so that this can be passed back to it whenever the api functions
\r
112 /** All input streams implement the ANTLR3_INT_STREAM interface...
\r
114 pANTLR3_INT_STREAM istream;
\r
116 /// Debugger interface, is this is a debugging token stream
\r
118 pANTLR3_DEBUG_EVENT_LISTENER debugger;
\r
120 /// Indicates the initial stream state for dbgConsume()
\r
122 ANTLR3_BOOLEAN initialStreamState;
\r
124 /** Get Token at current input pointer + i ahead where i=1 is next Token.
\r
125 * i<0 indicates tokens in the past. So -1 is previous token and -2 is
\r
126 * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken.
\r
127 * Return null for LT(0) and any index that results in an absolute address
\r
128 * that is negative.
\r
130 pANTLR3_COMMON_TOKEN (*_LT) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_INT32 k);
\r
132 /** Get a token at an absolute index i; 0..n-1. This is really only
\r
133 * needed for profiling and debugging and token stream rewriting.
\r
134 * If you don't want to buffer up tokens, then this method makes no
\r
135 * sense for you. Naturally you can't use the rewrite stream feature.
\r
136 * I believe DebugTokenStream can easily be altered to not use
\r
137 * this method, removing the dependency.
\r
139 pANTLR3_COMMON_TOKEN (*get) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 i);
\r
141 /** Where is this stream pulling tokens from? This is not the name, but
\r
142 * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface.
\r
143 * The Token Source interface contains a pointer to the input stream and a pointer
\r
144 * to a function that returns the next token.
\r
146 pANTLR3_TOKEN_SOURCE (*getTokenSource) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream);
\r
148 /** Function that installs a token source for teh stream
\r
150 void (*setTokenSource) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream,
\r
151 pANTLR3_TOKEN_SOURCE tokenSource);
\r
153 /** Return the text of all the tokens in the stream, as the old tramp in
\r
154 * Leeds market used to say; "Get the lot!"
\r
156 pANTLR3_STRING (*toString) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream);
\r
158 /** Return the text of all tokens from start to stop, inclusive.
\r
159 * If the stream does not buffer all the tokens then it can just
\r
160 * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in
\r
161 * an action in that case.
\r
163 pANTLR3_STRING (*toStringSS) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop);
\r
165 /** Because the user is not required to use a token with an index stored
\r
166 * in it, we must provide a means for two token objects themselves to
\r
167 * indicate the start/end location. Most often this will just delegate
\r
168 * to the other toString(int,int). This is also parallel with
\r
169 * the pTREENODE_STREAM->toString(Object,Object).
\r
171 pANTLR3_STRING (*toStringTT) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop);
\r
174 /** Function that sets the token stream into debugging mode
\r
176 void (*setDebugListener) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, pANTLR3_DEBUG_EVENT_LISTENER debugger);
\r
178 /** Function that knows how to free the memory for an ANTLR3_TOKEN_STREAM
\r
180 void (*free) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream);
\r
182 ANTLR3_TOKEN_STREAM;
\r
184 /** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default
\r
185 * parsers and recognizers. You may of course build your own implementation if
\r
186 * you are so inclined.
\r
188 typedef struct ANTLR3_COMMON_TOKEN_STREAM_struct
\r
190 /** The ANTLR3_TOKEN_STREAM interface implementation, which also includes
\r
191 * the intstream implementation. We could duplicate the pANTLR_INT_STREAM
\r
192 * in this interface and initialize it to a copy, but this could be confusing
\r
193 * it just results in one more level of indirection and I think that with
\r
194 * judicial use of 'const' later, the optimizer will do decent job.
\r
196 pANTLR3_TOKEN_STREAM tstream;
\r
198 /** Whatever is supplying the COMMON_TOKEN_STREAM needs a pointer to itself
\r
199 * so that this can be accessed by any of the API functions which it implements.
\r
203 /** Records every single token pulled from the source indexed by the token index.
\r
204 * There might be more efficient ways to do this, such as referencing directly in to
\r
205 * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not
\r
206 * a huge overhead as it only stores pointers anyway, but allows for iterations and
\r
209 pANTLR3_VECTOR tokens;
\r
211 /** Override map of tokens. If a token type has an entry in here, then
\r
212 * the pointer in the table points to an int, being the override channel number
\r
213 * that should always be used for this token type.
\r
215 pANTLR3_LIST channelOverrides;
\r
217 /** Discared set. If a token has an entry in this table, then it is thrown
\r
218 * away (data pointer is always NULL).
\r
220 pANTLR3_LIST discardSet;
\r
222 /* The channel number that this token stream is tuned to. For instance, whitespace
\r
223 * is usually tuned to channel 99, which no token stream would normally tune to and
\r
224 * so it is thrown away.
\r
226 ANTLR3_UINT32 channel;
\r
228 /** If this flag is set to ANTLR3_TRUE, then tokens that the stream sees that are not
\r
229 * in the channel that this stream is tuned to, are not tracked in the
\r
230 * tokens table. When set to false, ALL tokens are added to the tracking.
\r
232 ANTLR3_BOOLEAN discardOffChannel;
\r
234 /** The index into the tokens list of the current token (the next one that will be
\r
235 * consumed. p = -1 indicates that the token list is empty.
\r
239 /** A simple filter mechanism whereby you can tell this token stream
\r
240 * to force all tokens of type ttype to be on channel. For example,
\r
241 * when interpreting, we cannot exec actions so we need to tell
\r
242 * the stream to force all WS and NEWLINE to be a different, ignored
\r
245 void (*setTokenTypeChannel) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
\r
246 ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel);
\r
248 /** Add a particular token type to the discard set. If a token is found to belong
\r
249 * to this set, then it is skipped/thrown away
\r
251 void (*discardTokenType) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_INT32 ttype);
\r
253 /** Signal to discard off channel tokens from here on in.
\r
255 void (*discardOffChannelToks)(struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_BOOLEAN discard);
\r
257 /** Function that returns a pointer to the ANTLR3_LIST of all tokens
\r
258 * in the stream (this causes the buffer to fill if we have not get any yet)
\r
260 pANTLR3_VECTOR (*getTokens) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream);
\r
262 /** Function that returns all the tokens between a start and a stop index.
\r
263 * TODO: This is a new list (Ack! Maybe this is a reason to have factories for LISTS and HASHTABLES etc :-( come back to this)
\r
265 pANTLR3_LIST (*getTokenRange) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop);
\r
267 /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens
\r
269 pANTLR3_LIST (*getTokensSet) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
\r
270 ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types);
\r
272 /** Function that returns all the tokens indicated by being a member of the supplied List
\r
274 pANTLR3_LIST (*getTokensList) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
\r
275 ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list);
\r
277 /** Function that returns all tokens of a certain type within a range.
\r
279 pANTLR3_LIST (*getTokensType) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
\r
280 ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type);
\r
283 /** Function that knows how to free an ANTLR3_COMMON_TOKEN_STREAM
\r
285 void (*free) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream);
\r
287 ANTLR3_COMMON_TOKEN_STREAM;
\r