2 * Simple string interface allows indiscriminate allocation of strings
\r
3 * such that they can be allocated all over the place and released in
\r
4 * one chunk via a string factory - saves lots of hassle in remembering what
\r
5 * strings were allocated where.
\r
7 #ifndef _ANTLR3_STRING_H
\r
8 #define _ANTLR3_STRING_H
\r
10 // [The "BSD licence"]
\r
11 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
12 // http://www.temporal-wave.com
\r
13 // http://www.linkedin.com/in/jimidle
\r
15 // All rights reserved.
\r
17 // Redistribution and use in source and binary forms, with or without
\r
18 // modification, are permitted provided that the following conditions
\r
20 // 1. Redistributions of source code must retain the above copyright
\r
21 // notice, this list of conditions and the following disclaimer.
\r
22 // 2. Redistributions in binary form must reproduce the above copyright
\r
23 // notice, this list of conditions and the following disclaimer in the
\r
24 // documentation and/or other materials provided with the distribution.
\r
25 // 3. The name of the author may not be used to endorse or promote products
\r
26 // derived from this software without specific prior written permission.
\r
28 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
29 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
30 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
31 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
32 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
33 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
37 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
39 #include <antlr3defs.h>
\r
40 #include <antlr3collections.h>
\r
46 /** Base string class tracks the allocations and provides simple string
\r
47 * tracking functions. Mostly you can work directly on the string for things
\r
48 * that don't reallocate it, like strchr() etc. Perhaps someone will want to provide implementations for UTF8
\r
51 typedef struct ANTLR3_STRING_struct
\r
54 /** The factory that created this string
\r
56 pANTLR3_STRING_FACTORY factory;
\r
58 /** Pointer to the current string value (starts at NULL unless
\r
59 * the string allocator is told to create it with a pre known size.
\r
61 pANTLR3_UINT8 chars;
\r
63 /** Current length of the string up to and not including, the trailing '\0'
\r
64 * Note that the actual allocation (->size)
\r
65 * is always at least one byte more than this to accommodate trailing '\0'
\r
69 /** Current size of the string in bytes including the trailing '\0'
\r
73 /** Index of string (allocation number) in case someone wants
\r
74 * to explicitly release it.
\r
76 ANTLR3_UINT32 index;
\r
78 /** Occasionally it is useful to know what the encoding of the string
\r
79 * actually is, hence it is stored here as one the ANTLR3_ENCODING_ values
\r
81 ANTLR3_UINT8 encoding;
\r
83 /** Pointer to function that sets the string value to a specific string in the default encoding
\r
84 * for this string. For instance, if this is ASCII 8 bit, then this function is the same as set8
\r
85 * but if the encoding is 16 bit, then the pointer is assumed to point to 16 bit characters not
\r
88 pANTLR3_UINT8 (*set) (struct ANTLR3_STRING_struct * string, const char * chars);
\r
90 /** Pointer to function that sets the string value to a specific 8 bit string in the default encoding
\r
91 * for this string. For instance, if this is a 16 bit string, then this function is the same as set8
\r
92 * but if the encoding is 16 bit, then the pointer is assumed to point to 8 bit characters that must
\r
93 * be converted to 16 bit characters on the fly.
\r
95 pANTLR3_UINT8 (*set8) (struct ANTLR3_STRING_struct * string, const char * chars);
\r
97 /** Pointer to function adds a raw char * type pointer in the default encoding
\r
98 * for this string. For instance, if this is ASCII 8 bit, then this function is the same as append8
\r
99 * but if the encoding is 16 bit, then the pointer is assumed to point to 16 bit characters not
\r
102 pANTLR3_UINT8 (*append) (struct ANTLR3_STRING_struct * string, const char * newbit);
\r
104 /** Pointer to function adds a raw char * type pointer in the default encoding
\r
105 * for this string. For instance, if this is a 16 bit string, then this function assumes the pointer
\r
106 * points to 8 bit characters that must be converted on the fly.
\r
108 pANTLR3_UINT8 (*append8) (struct ANTLR3_STRING_struct * string, const char * newbit);
\r
110 /** Pointer to function that inserts the supplied string at the specified
\r
111 * offset in the current string in the default encoding for this string. For instance, if this is an 8
\r
112 * bit string, then this is the same as insert8, but if this is a 16 bit string, then the poitner
\r
113 * must point to 16 bit characters.
\r
116 pANTLR3_UINT8 (*insert) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
\r
118 /** Pointer to function that inserts the supplied string at the specified
\r
119 * offset in the current string in the default encoding for this string. For instance, if this is a 16 bit string
\r
120 * then the pointer is assumed to point at 8 bit characteres that must be converted on the fly.
\r
122 pANTLR3_UINT8 (*insert8) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
\r
124 /** Pointer to function that sets the string value to a copy of the supplied string (strings must be in the
\r
127 pANTLR3_UINT8 (*setS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * chars);
\r
129 /** Pointer to function appends a copy of the characters contained in another string. Strings must be in the
\r
132 pANTLR3_UINT8 (*appendS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * newbit);
\r
134 /** Pointer to function that inserts a copy of the characters in the supplied string at the specified
\r
135 * offset in the current string. strings must be in the same encoding.
\r
137 pANTLR3_UINT8 (*insertS) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, struct ANTLR3_STRING_struct * newbit);
\r
139 /** Pointer to function that inserts the supplied integer in string form at the specified
\r
140 * offset in the current string.
\r
142 pANTLR3_UINT8 (*inserti) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
\r
144 /** Pointer to function that adds a single character to the end of the string, in the encoding of the
\r
145 * string - 8 bit, 16 bit, utf-8 etc. Input is a single UTF32 (32 bits wide integer) character.
\r
147 pANTLR3_UINT8 (*addc) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 c);
\r
149 /** Pointer to function that adds the stringified representation of an integer
\r
152 pANTLR3_UINT8 (*addi) (struct ANTLR3_STRING_struct * string, ANTLR3_INT32 i);
\r
154 /** Pointer to function that compares the text of a string to the supplied
\r
155 * 8 bit character string and returns a result a la strcmp()
\r
157 ANTLR3_UINT32 (*compare8) (struct ANTLR3_STRING_struct * string, const char * compStr);
\r
159 /** Pointer to a function that compares the text of a string with the supplied character string
\r
160 * (which is assumed to be in the same encoding as the string itself) and returns a result
\r
163 ANTLR3_UINT32 (*compare) (struct ANTLR3_STRING_struct * string, const char * compStr);
\r
165 /** Pointer to a function that compares the text of a string with the supplied string
\r
166 * (which is assumed to be in the same encoding as the string itself) and returns a result
\r
169 ANTLR3_UINT32 (*compareS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * compStr);
\r
171 /** Pointer to a function that returns the character indexed at the supplied
\r
172 * offset as a 32 bit character.
\r
174 ANTLR3_UCHAR (*charAt) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 offset);
\r
176 /** Pointer to a function that returns a substring of the supplied string a la .subString(s,e)
\r
177 * in the Java language.
\r
179 struct ANTLR3_STRING_struct *
\r
180 (*subString) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
\r
182 /** Pointer to a function that returns the integer representation of any numeric characters
\r
183 * at the beginning of the string
\r
185 ANTLR3_INT32 (*toInt32) (struct ANTLR3_STRING_struct * string);
\r
187 /** Pointer to a function that yields an 8 bit string regardless of the encoding of the supplied
\r
188 * string. This is useful when you want to use the text of a token in some way that requires an 8 bit
\r
189 * value, such as the key for a hashtable. The function is required to produce a usable string even
\r
190 * if the text given as input has characters that do not fit in 8 bit space, it will replace them
\r
191 * with some arbitrary character such as '?'
\r
193 struct ANTLR3_STRING_struct *
\r
194 (*to8) (struct ANTLR3_STRING_struct * string);
\r
196 /// Pointer to a function that yields a UT8 encoded string of the current string,
\r
197 /// regardless of the current encoding of the string. Because there is currently no UTF8
\r
198 /// handling in the string class, it creates therefore, a string that is useful only for read only
\r
199 /// applications as it will not contain methods that deal with UTF8 at the moment.
\r
201 struct ANTLR3_STRING_struct *
\r
202 (*toUTF8) (struct ANTLR3_STRING_struct * string);
\r
207 /** Definition of the string factory interface, which creates and tracks
\r
208 * strings for you of various shapes and sizes.
\r
210 typedef struct ANTLR3_STRING_FACTORY_struct
\r
212 /** List of all the strings that have been allocated by the factory
\r
214 pANTLR3_VECTOR strings;
\r
216 /* Index of next string that we allocate
\r
218 ANTLR3_UINT32 index;
\r
220 /** Pointer to function that manufactures an empty string
\r
222 pANTLR3_STRING (*newRaw) (struct ANTLR3_STRING_FACTORY_struct * factory);
\r
224 /** Pointer to function that manufactures a raw string with no text in it but space for size
\r
227 pANTLR3_STRING (*newSize) (struct ANTLR3_STRING_FACTORY_struct * factory, ANTLR3_UINT32 size);
\r
229 /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed
\r
230 * to point to characters in the same encoding as the string type, hence if this is a 16 bit string the
\r
231 * pointer should point to 16 bit characters.
\r
233 pANTLR3_STRING (*newPtr) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
\r
235 /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed to
\r
236 * point at 8 bit characters which must be converted on the fly to the encoding of the actual string.
\r
238 pANTLR3_STRING (*newPtr8) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
\r
240 /** Pointer to function that manufactures a string from a given pointer and works out the length. The pointer is
\r
241 * assumed to point to characters in the same encoding as the string itself, i.e. 16 bit if a 16 bit
\r
242 * string and so on.
\r
244 pANTLR3_STRING (*newStr) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
\r
246 /** Pointer to function that manufactures a string from a given pointer and length. The pointer should
\r
247 * point to 8 bit characters regardless of the actual encoding of the string. The 8 bit characters
\r
248 * will be converted to the actual string encoding on the fly.
\r
250 pANTLR3_STRING (*newStr8) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
\r
252 /** Pointer to function that deletes the string altogether
\r
254 void (*destroy) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
\r
256 /** Pointer to function that returns a copy of the string in printable form without any control
\r
257 * characters in it.
\r
259 pANTLR3_STRING (*printable)(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
\r
261 /** Pointer to function that closes the factory
\r
263 void (*close) (struct ANTLR3_STRING_FACTORY_struct * factory);
\r
266 ANTLR3_STRING_FACTORY;
\r