2 * \brief Defines the interface for a common token.
\r
4 * All token streams should provide their tokens using an instance
\r
5 * of this common token. A custom pointer is provided, wher you may attach
\r
6 * a further structure to enhance the common token if you feel the need
\r
7 * to do so. The C runtime will assume that a token provides implementations
\r
8 * of the interface functions, but all of them may be rplaced by your own
\r
9 * implementation if you require it.
\r
11 #ifndef _ANTLR3_COMMON_TOKEN_H
\r
12 #define _ANTLR3_COMMON_TOKEN_H
\r
14 // [The "BSD licence"]
\r
15 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
16 // http://www.temporal-wave.com
\r
17 // http://www.linkedin.com/in/jimidle
\r
19 // All rights reserved.
\r
21 // Redistribution and use in source and binary forms, with or without
\r
22 // modification, are permitted provided that the following conditions
\r
24 // 1. Redistributions of source code must retain the above copyright
\r
25 // notice, this list of conditions and the following disclaimer.
\r
26 // 2. Redistributions in binary form must reproduce the above copyright
\r
27 // notice, this list of conditions and the following disclaimer in the
\r
28 // documentation and/or other materials provided with the distribution.
\r
29 // 3. The name of the author may not be used to endorse or promote products
\r
30 // derived from this software without specific prior written permission.
\r
32 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
33 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
34 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
35 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
36 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
37 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
38 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
39 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
40 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
41 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
43 #include <antlr3defs.h>
\r
45 /** How many tokens to allocate at once in the token factory
\r
47 #define ANTLR3_FACTORY_POOL_SIZE 1024
\r
49 /* Base token types, which all lexer/parser tokens come after in sequence.
\r
52 /** Indicator of an invalid token
\r
54 #define ANTLR3_TOKEN_INVALID 0
\r
56 #define ANTLR3_EOR_TOKEN_TYPE 1
\r
58 /** Imaginary token type to cause a traversal of child nodes in a tree parser
\r
60 #define ANTLR3_TOKEN_DOWN 2
\r
62 /** Imaginary token type to signal the end of a stream of child nodes.
\r
64 #define ANTLR3_TOKEN_UP 3
\r
66 /** First token that can be used by users/generated code
\r
69 #define ANTLR3_MIN_TOKEN_TYPE ANTLR3_TOKEN_UP + 1
\r
71 /** End of file token
\r
73 #define ANTLR3_TOKEN_EOF (ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF)
\r
75 /** Default channel for a token
\r
77 #define ANTLR3_TOKEN_DEFAULT_CHANNEL 0
\r
79 /** Reserved channel number for a HIDDEN token - a token that
\r
80 * is hidden from the parser.
\r
88 // Indicates whether this token is carrying:
\r
91 // ------+--------------------------------------
\r
92 // 0 | Nothing (neither rewrite text, nor setText)
\r
93 // 1 | char * to user supplied rewrite text
\r
94 // 2 | pANTLR3_STRING because of setText or similar action
\r
96 #define ANTLR3_TEXT_NONE 0
\r
97 #define ANTLR3_TEXT_CHARP 1
\r
98 #define ANTLR3_TEXT_STRING 2
\r
100 /** The definition of an ANTLR3 common token structure, which all implementations
\r
101 * of a token stream should provide, installing any further structures in the
\r
102 * custom pointer element of this structure.
\r
105 * Token streams are in essence provided by lexers or other programs that serve
\r
108 typedef struct ANTLR3_COMMON_TOKEN_struct
\r
110 /** The actual type of this token
\r
112 ANTLR3_UINT32 type;
\r
114 /** Indicates that a token was produced from the token factory and therefore
\r
115 * the the freeToken() method should not do anything itself because
\r
116 * token factory is responsible for deleting it.
\r
118 ANTLR3_BOOLEAN factoryMade;
\r
120 /// A string factory that we can use if we ever need the text of a token
\r
121 /// and need to manufacture a pANTLR3_STRING
\r
123 pANTLR3_STRING_FACTORY strFactory;
\r
125 /** The line number in the input stream where this token was derived from
\r
127 ANTLR3_UINT32 line;
\r
129 /** The offset into the input stream that the line in which this
\r
130 * token resides starts.
\r
134 /** The character position in the line that this token was derived from
\r
136 ANTLR3_INT32 charPosition;
\r
138 /** The virtual channel that this token exists in.
\r
140 ANTLR3_UINT32 channel;
\r
142 /** Pointer to the input stream that this token originated in.
\r
144 pANTLR3_INPUT_STREAM input;
\r
146 /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
\r
148 ANTLR3_MARKER index;
\r
150 /** The character offset in the input stream where the text for this token
\r
153 ANTLR3_MARKER start;
\r
155 /** The character offset in the input stream where the text for this token
\r
158 ANTLR3_MARKER stop;
\r
160 /// Indicates whether this token is carrying:
\r
162 /// State | Meaning
\r
163 /// ------+--------------------------------------
\r
164 /// 0 | Nothing (neither rewrite text, nor setText)
\r
165 /// 1 | char * to user supplied rewrite text
\r
166 /// 2 | pANTLR3_STRING because of setText or similar action
\r
168 /// Affects the union structure tokText below
\r
169 /// (uses 32 bit so alignment is always good)
\r
171 ANTLR3_UINT32 textState;
\r
175 /// Pointer that is used when the token just has a pointer to
\r
176 /// a char *, such as when a rewrite of an imaginary token supplies
\r
177 /// a string in the grammar. No sense in constructing a pANTLR3_STRING just
\r
178 /// for that, as mostly the text will not be accessed - if it is, then
\r
179 /// we will build a pANTLR3_STRING for it a that point.
\r
181 pANTLR3_UCHAR chars;
\r
183 /// Some token types actually do carry around their associated text, hence
\r
184 /// (*getText)() will return this pointer if it is not NULL
\r
186 pANTLR3_STRING text;
\r
190 /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
\r
191 * as the standard structure for a token, a number of user programmable
\r
192 * elements are allowed in a token. This is one of them.
\r
194 ANTLR3_UINT32 user1;
\r
196 /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
\r
197 * as the standard structure for a token, a number of user programmable
\r
198 * elements are allowed in a token. This is one of them.
\r
200 ANTLR3_UINT32 user2;
\r
202 /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
\r
203 * as the standard structure for a token, a number of user programmable
\r
204 * elements are allowed in a token. This is one of them.
\r
206 ANTLR3_UINT32 user3;
\r
208 /** Pointer to a custom element that the ANTLR3 programmer may define and install
\r
212 /** Pointer to a function that knows how to free the custom structure when the
\r
213 * token is destroyed.
\r
215 void (*freeCustom)(void * custom);
\r
217 /* ==============================
\r
221 /** Pointer to function that returns the text pointer of a token, use
\r
222 * toString() if you want a pANTLR3_STRING version of the token.
\r
224 pANTLR3_STRING (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token);
\r
226 /** Pointer to a function that 'might' be able to set the text associated
\r
227 * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
\r
228 * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
\r
229 * strings associated with them but just point into the current input stream. These
\r
230 * tokens will implement this function with a function that errors out (probably
\r
233 void (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text);
\r
235 /** Pointer to a function that 'might' be able to set the text associated
\r
236 * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
\r
237 * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
\r
238 * strings associated with them but just point into the current input stream. These
\r
239 * tokens will implement this function with a function that errors out (probably
\r
242 void (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text);
\r
244 /** Pointer to a function that returns the token type of this token
\r
246 ANTLR3_UINT32 (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token);
\r
248 /** Pointer to a function that sets the type of this token
\r
250 void (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype);
\r
252 /** Pointer to a function that gets the 'line' number where this token resides
\r
254 ANTLR3_UINT32 (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token);
\r
256 /** Pointer to a function that sets the 'line' number where this token reside
\r
258 void (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line);
\r
260 /** Pointer to a function that gets the offset in the line where this token exists
\r
262 ANTLR3_INT32 (*getCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token);
\r
264 /** Pointer to a function that sets the offset in the line where this token exists
\r
266 void (*setCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos);
\r
268 /** Pointer to a function that gets the channel that this token was placed in (parsers
\r
269 * can 'tune' to these channels.
\r
271 ANTLR3_UINT32 (*getChannel) (struct ANTLR3_COMMON_TOKEN_struct * token);
\r
273 /** Pointer to a function that sets the channel that this token should belong to
\r
275 void (*setChannel) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel);
\r
277 /** Pointer to a function that returns an index 0...n-1 of the token in the token
\r
280 ANTLR3_MARKER (*getTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token);
\r
282 /** Pointer to a function that can set the token index of this token in the token
\r
285 void (*setTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER);
\r
287 /** Pointer to a function that gets the start index in the input stream for this token.
\r
289 ANTLR3_MARKER (*getStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token);
\r
291 /** Pointer to a function that sets the start index in the input stream for this token.
\r
293 void (*setStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
\r
295 /** Pointer to a function that gets the stop index in the input stream for this token.
\r
297 ANTLR3_MARKER (*getStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token);
\r
299 /** Pointer to a function that sets the stop index in the input stream for this token.
\r
301 void (*setStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
\r
303 /** Pointer to a function that returns this token as a text representation that can be
\r
304 * printed with embedded control codes such as \n replaced with the printable sequence "\\n"
\r
305 * This also yields a string structure that can be used more easily than the pointer to
\r
306 * the input stream in certain situations.
\r
308 pANTLR3_STRING (*toString) (struct ANTLR3_COMMON_TOKEN_struct * token);
\r
310 ANTLR3_COMMON_TOKEN;
\r
312 /** \brief ANTLR3 Token factory interface to create lots of tokens efficiently
\r
313 * rather than creating and freeing lots of little bits of memory.
\r
315 typedef struct ANTLR3_TOKEN_FACTORY_struct
\r
317 /** Pointers to the array of tokens that this factory has produced so far
\r
319 pANTLR3_COMMON_TOKEN *pools;
\r
321 /** Current pool tokens we are allocating from
\r
323 ANTLR3_INT32 thisPool;
\r
325 /** The next token to throw out from the pool, will cause a new pool allocation
\r
326 * if this exceeds the available tokenCount
\r
328 ANTLR3_UINT32 nextToken;
\r
330 /** Trick to initialize tokens and their API quickly, we set up this token when the
\r
331 * factory is created, then just copy the memory it uses into the new token.
\r
333 ANTLR3_COMMON_TOKEN unTruc;
\r
335 /** Pointer to an input stream that is using this token factory (may be NULL)
\r
336 * which will be assigned to the tokens automatically.
\r
338 pANTLR3_INPUT_STREAM input;
\r
340 /** Pointer to a function that returns a new token
\r
342 pANTLR3_COMMON_TOKEN (*newToken) (struct ANTLR3_TOKEN_FACTORY_struct * factory);
\r
344 /** Pointer to a function that changes teh curent inptu stream so that
\r
345 * new tokens are created with reference to their originating text.
\r
347 void (*setInputStream) (struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input);
\r
348 /** Pointer to a function the destroys the factory
\r
350 void (*close) (struct ANTLR3_TOKEN_FACTORY_struct * factory);
\r
352 ANTLR3_TOKEN_FACTORY;
\r