2 * Implementation of the ANTLR3 string and string factory classes
\r
5 // [The "BSD licence"]
\r
6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
7 // http://www.temporal-wave.com
\r
8 // http://www.linkedin.com/in/jimidle
\r
10 // All rights reserved.
\r
12 // Redistribution and use in source and binary forms, with or without
\r
13 // modification, are permitted provided that the following conditions
\r
15 // 1. Redistributions of source code must retain the above copyright
\r
16 // notice, this list of conditions and the following disclaimer.
\r
17 // 2. Redistributions in binary form must reproduce the above copyright
\r
18 // notice, this list of conditions and the following disclaimer in the
\r
19 // documentation and/or other materials provided with the distribution.
\r
20 // 3. The name of the author may not be used to endorse or promote products
\r
21 // derived from this software without specific prior written permission.
\r
23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
34 #include <antlr3string.h>
\r
38 static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory);
\r
39 static pANTLR3_STRING newRaw16 (pANTLR3_STRING_FACTORY factory);
\r
40 static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
\r
41 static pANTLR3_STRING newSize16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
\r
42 static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
\r
43 static pANTLR3_STRING newPtr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
\r
44 static pANTLR3_STRING newPtr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
\r
45 static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
\r
46 static pANTLR3_STRING newStr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
\r
47 static pANTLR3_STRING newStr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
\r
48 static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
\r
49 static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
\r
50 static pANTLR3_STRING printable16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
\r
51 static void closeFactory(pANTLR3_STRING_FACTORY factory);
\r
55 static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars);
\r
56 static pANTLR3_UINT8 set16_8 (pANTLR3_STRING string, const char * chars);
\r
57 static pANTLR3_UINT8 set16_16 (pANTLR3_STRING string, const char * chars);
\r
58 static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit);
\r
59 static pANTLR3_UINT8 append16_8 (pANTLR3_STRING string, const char * newbit);
\r
60 static pANTLR3_UINT8 append16_16 (pANTLR3_STRING string, const char * newbit);
\r
61 static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
\r
62 static pANTLR3_UINT8 insert16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
\r
63 static pANTLR3_UINT8 insert16_16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
\r
65 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars);
\r
66 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit);
\r
67 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
\r
69 static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c);
\r
70 static pANTLR3_UINT8 addc16 (pANTLR3_STRING string, ANTLR3_UINT32 c);
\r
71 static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i);
\r
72 static pANTLR3_UINT8 addi16 (pANTLR3_STRING string, ANTLR3_INT32 i);
\r
73 static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
\r
74 static pANTLR3_UINT8 inserti16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
\r
76 static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr);
\r
77 static ANTLR3_UINT32 compare16_8 (pANTLR3_STRING string, const char * compStr);
\r
78 static ANTLR3_UINT32 compare16_16(pANTLR3_STRING string, const char * compStr);
\r
79 static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr);
\r
80 static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
\r
81 static ANTLR3_UCHAR charAt16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
\r
82 static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
\r
83 static pANTLR3_STRING subString16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
\r
84 static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string);
\r
85 static ANTLR3_INT32 toInt32_16 (pANTLR3_STRING string);
\r
86 static pANTLR3_STRING to8_8 (pANTLR3_STRING string);
\r
87 static pANTLR3_STRING to8_16 (pANTLR3_STRING string);
\r
88 static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string);
\r
89 static pANTLR3_STRING toUTF8_16 (pANTLR3_STRING string);
\r
93 static void stringInit8 (pANTLR3_STRING string);
\r
94 static void stringInit16 (pANTLR3_STRING string);
\r
95 static void ANTLR3_CDECL stringFree (pANTLR3_STRING string);
\r
97 ANTLR3_API pANTLR3_STRING_FACTORY
\r
98 antlr3StringFactoryNew()
\r
100 pANTLR3_STRING_FACTORY factory;
\r
104 factory = (pANTLR3_STRING_FACTORY) ANTLR3_MALLOC(sizeof(ANTLR3_STRING_FACTORY));
\r
106 if (factory == NULL)
\r
111 /* Now we make a new list to track the strings.
\r
113 factory->strings = antlr3VectorNew(0);
\r
114 factory->index = 0;
\r
116 if (factory->strings == NULL)
\r
118 ANTLR3_FREE(factory);
\r
122 /* Install the API (8 bit assumed)
\r
124 factory->newRaw = newRaw8;
\r
125 factory->newSize = newSize8;
\r
127 factory->newPtr = newPtr8;
\r
128 factory->newPtr8 = newPtr8;
\r
129 factory->newStr = newStr8;
\r
130 factory->newStr8 = newStr8;
\r
131 factory->destroy = destroy;
\r
132 factory->printable = printable8;
\r
133 factory->destroy = destroy;
\r
134 factory->close = closeFactory;
\r
139 /** Create a string factory that is UCS2 (16 bit) encoding based
\r
141 ANTLR3_API pANTLR3_STRING_FACTORY
\r
142 antlr3UCS2StringFactoryNew()
\r
144 pANTLR3_STRING_FACTORY factory;
\r
146 /* Allocate an 8 bit factory, then override with 16 bit UCS2 functions where we
\r
149 factory = antlr3StringFactoryNew();
\r
151 if (factory == NULL)
\r
156 /* Override the 8 bit API with the UCS2 (mostly just 16 bit) API
\r
158 factory->newRaw = newRaw16;
\r
159 factory->newSize = newSize16;
\r
161 factory->newPtr = newPtr16_16;
\r
162 factory->newPtr8 = newPtr16_8;
\r
163 factory->newStr = newStr16_16;
\r
164 factory->newStr8 = newStr16_8;
\r
165 factory->printable = printable16;
\r
167 factory->destroy = destroy;
\r
168 factory->destroy = destroy;
\r
169 factory->close = closeFactory;
\r
179 static pANTLR3_STRING
\r
180 newRaw8 (pANTLR3_STRING_FACTORY factory)
\r
182 pANTLR3_STRING string;
\r
184 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
\r
186 if (string == NULL)
\r
191 /* Structure is allocated, now fill in the API etc.
\r
193 stringInit8(string);
\r
194 string->factory = factory;
\r
196 /* Add the string into the allocated list
\r
198 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
\r
199 string->index = factory->index++;
\r
208 static pANTLR3_STRING
\r
209 newRaw16 (pANTLR3_STRING_FACTORY factory)
\r
211 pANTLR3_STRING string;
\r
213 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
\r
215 if (string == NULL)
\r
220 /* Structure is allocated, now fill in the API etc.
\r
222 stringInit16(string);
\r
223 string->factory = factory;
\r
225 /* Add the string into the allocated list
\r
227 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
\r
228 string->index = factory->index++;
\r
233 void ANTLR3_CDECL stringFree (pANTLR3_STRING string)
\r
235 /* First free the string itself if there was anything in it
\r
239 ANTLR3_FREE(string->chars);
\r
242 /* Now free the space for this string
\r
244 ANTLR3_FREE(string);
\r
254 stringInit8 (pANTLR3_STRING string)
\r
258 string->chars = NULL;
\r
259 string->encoding = ANTLR3_ENCODING_LATIN1;
\r
261 /* API for 8 bit strings*/
\r
263 string->set = set8;
\r
264 string->set8 = set8;
\r
265 string->append = append8;
\r
266 string->append8 = append8;
\r
267 string->insert = insert8;
\r
268 string->insert8 = insert8;
\r
269 string->addi = addi8;
\r
270 string->inserti = inserti8;
\r
271 string->addc = addc8;
\r
272 string->charAt = charAt8;
\r
273 string->compare = compare8;
\r
274 string->compare8 = compare8;
\r
275 string->subString = subString8;
\r
276 string->toInt32 = toInt32_8;
\r
277 string->to8 = to8_8;
\r
278 string->toUTF8 = toUTF8_8;
\r
279 string->compareS = compareS;
\r
280 string->setS = setS;
\r
281 string->appendS = appendS;
\r
282 string->insertS = insertS;
\r
291 stringInit16 (pANTLR3_STRING string)
\r
295 string->chars = NULL;
\r
296 string->encoding = ANTLR3_ENCODING_UCS2;
\r
298 /* API for 16 bit strings */
\r
300 string->set = set16_16;
\r
301 string->set8 = set16_8;
\r
302 string->append = append16_16;
\r
303 string->append8 = append16_8;
\r
304 string->insert = insert16_16;
\r
305 string->insert8 = insert16_8;
\r
306 string->addi = addi16;
\r
307 string->inserti = inserti16;
\r
308 string->addc = addc16;
\r
309 string->charAt = charAt16;
\r
310 string->compare = compare16_16;
\r
311 string->compare8 = compare16_8;
\r
312 string->subString = subString16;
\r
313 string->toInt32 = toInt32_16;
\r
314 string->to8 = to8_16;
\r
315 string->toUTF8 = toUTF8_16;
\r
317 string->compareS = compareS;
\r
318 string->setS = setS;
\r
319 string->appendS = appendS;
\r
320 string->insertS = insertS;
\r
326 * TODO: Implement UTF-8
\r
329 stringInitUTF8 (pANTLR3_STRING string)
\r
333 string->chars = NULL;
\r
339 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
\r
340 // a memcpy as we make no assumptions about the 8 bit encoding.
\r
342 static pANTLR3_STRING
\r
343 toUTF8_8 (pANTLR3_STRING string)
\r
345 return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
\r
348 // Convert a 16 bit (UCS2) string into a UTF8 representation using the Unicode.org
\r
349 // supplied C algorithms, which are now contained within the ANTLR3 C runtime
\r
350 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
\r
351 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
\r
353 static pANTLR3_STRING
\r
354 toUTF8_16 (pANTLR3_STRING string)
\r
359 pANTLR3_STRING utf8String;
\r
361 ConversionResult cResult;
\r
363 // Allocate the output buffer, which needs to accommodate potentially
\r
364 // 3X (in bytes) the input size (in chars).
\r
366 utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
\r
368 if (utf8String != NULL)
\r
370 // Free existing allocation
\r
372 ANTLR3_FREE(utf8String->chars);
\r
374 // Reallocate according to maximum expected size
\r
376 utf8String->size = string->len *3;
\r
377 utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
\r
379 if (utf8String->chars != NULL)
\r
381 inputEnd = (UTF16 *) (string->chars);
\r
382 outputEnd = (UTF8 *) (utf8String->chars);
\r
384 // Call the Unicode converter
\r
386 cResult = ConvertUTF16toUTF8
\r
388 (const UTF16**)&inputEnd,
\r
389 ((const UTF16 *)(string->chars)) + string->len,
\r
391 outputEnd + utf8String->size - 1,
\r
395 // We don't really care if things failed or not here, we just converted
\r
396 // everything that was vaguely possible and stopped when it wasn't. It is
\r
397 // up to the grammar programmer to verify that the input is sensible.
\r
399 utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
\r
401 *(outputEnd+1) = '\0'; // Always null terminate
\r
408 * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
\r
410 * \param[in] factory - Pointer to the string factory that owns strings
\r
411 * \param[in] size - In characters
\r
412 * \return pointer to the new string.
\r
414 static pANTLR3_STRING
\r
415 newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
\r
417 pANTLR3_STRING string;
\r
419 string = factory->newRaw(factory);
\r
421 if (string == NULL)
\r
426 /* Always add one more byte for a terminator ;-)
\r
428 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
\r
429 *(string->chars) = '\0';
\r
430 string->size = size + 1;
\r
436 * Creates a new string with enough capacity for size 16 bit characters plus a terminator.
\r
438 * \param[in] factory - POitner to the string factory that owns strings
\r
439 * \param[in] size - In characters
\r
440 * \return pointer to the new string.
\r
442 static pANTLR3_STRING
\r
443 newSize16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
\r
445 pANTLR3_STRING string;
\r
447 string = factory->newRaw(factory);
\r
449 if (string == NULL)
\r
454 /* Always add one more byte for a terminator ;-)
\r
456 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
\r
457 *(string->chars) = '\0';
\r
458 string->size = size+1; /* Size is always in characters, as is len */
\r
463 /** Creates a new 8 bit string initialized with the 8 bit characters at the
\r
464 * supplied ptr, of pre-determined size.
\r
465 * \param[in] factory - Pointer to the string factory that owns the strings
\r
466 * \param[in] ptr - Pointer to 8 bit encoded characters
\r
467 * \return pointer to the new string
\r
469 static pANTLR3_STRING
\r
470 newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
\r
472 pANTLR3_STRING string;
\r
474 string = factory->newSize(factory, size);
\r
476 if (string == NULL)
\r
488 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
\r
489 *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */
\r
490 string->len = size;
\r
496 /** Creates a new 16 bit string initialized with the 8 bit characters at the
\r
497 * supplied 8 bit character ptr, of pre-determined size.
\r
498 * \param[in] factory - Pointer to the string factory that owns the strings
\r
499 * \param[in] ptr - Pointer to 8 bit encoded characters
\r
500 * \return pointer to the new string
\r
502 static pANTLR3_STRING
\r
503 newPtr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
\r
505 pANTLR3_STRING string;
\r
507 /* newSize accepts size in characters, not bytes
\r
509 string = factory->newSize(factory, size);
\r
511 if (string == NULL)
\r
523 pANTLR3_UINT16 out;
\r
524 ANTLR3_INT32 inSize;
\r
526 out = (pANTLR3_UINT16)(string->chars);
\r
529 while (inSize-- > 0)
\r
531 *out++ = (ANTLR3_UINT16)(*ptr++);
\r
534 /* Terminate, these strings are usually used for Token streams and printing etc.
\r
536 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
\r
538 string->len = size;
\r
544 /** Creates a new 16 bit string initialized with the 16 bit characters at the
\r
545 * supplied ptr, of pre-determined size.
\r
546 * \param[in] factory - Pointer to the string factory that owns the strings
\r
547 * \param[in] ptr - Pointer to 16 bit encoded characters
\r
548 * \return pointer to the new string
\r
550 static pANTLR3_STRING
\r
551 newPtr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
\r
553 pANTLR3_STRING string;
\r
555 string = factory->newSize(factory, size);
\r
557 if (string == NULL)
\r
569 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
\r
571 /* Terminate, these strings are usually used for Token streams and printing etc.
\r
573 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
\r
574 string->len = size;
\r
580 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
\r
581 * \param[in] factory - Pointer to the string factory that owns strings.
\r
582 * \param[in] ptr - Pointer to the 8 bit encoded string
\r
583 * \return Pointer to the newly initialized string
\r
585 static pANTLR3_STRING
\r
586 newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
\r
588 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
\r
591 /** Create a new 16 bit string from the supplied, null terminated, 8 bit string pointer.
\r
592 * \param[in] factory - Pointer to the string factory that owns strings.
\r
593 * \param[in] ptr - Pointer to the 8 bit encoded string
\r
594 * \return POinter to the newly initialized string
\r
596 static pANTLR3_STRING
\r
597 newStr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
\r
599 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
\r
602 /** Create a new 16 bit string from the supplied, null terminated, 16 bit string pointer.
\r
603 * \param[in] factory - Pointer to the string factory that owns strings.
\r
604 * \param[in] ptr - Pointer to the 16 bit encoded string
\r
605 * \return Pointer to the newly initialized string
\r
607 static pANTLR3_STRING
\r
608 newStr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
\r
611 ANTLR3_UINT32 count;
\r
613 /** First, determine the length of the input string
\r
615 in = (pANTLR3_UINT16)ptr;
\r
618 while (*in++ != '\0')
\r
622 return factory->newPtr(factory, ptr, count);
\r
626 destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
\r
628 // Record which string we are deleting
\r
630 ANTLR3_UINT32 strIndex = string->index;
\r
632 // Ensure that the string was not factory made, or we would try
\r
633 // to delete memory that wasn't allocated outside the factory
\r
635 // Remove the specific indexed string from the vector
\r
637 factory->strings->del(factory->strings, strIndex);
\r
639 // One less string in the vector, so decrement the factory index
\r
640 // so that the next string allocated is indexed correctly with
\r
641 // respect to the vector.
\r
645 // Now we have to reindex the strings in the vector that followed
\r
646 // the one we just deleted. We only do this if the one we just deleted
\r
647 // was not the last one.
\r
649 if (strIndex< factory->index)
\r
651 // We must reindex the strings after the one we just deleted.
\r
652 // The one that follows the one we just deleted is also out
\r
653 // of whack, so we start there.
\r
657 for (i = strIndex; i < factory->index; i++)
\r
659 // Renumber the entry
\r
661 ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
\r
665 // The string has been destroyed and the elements of the factory are reindexed.
\r
670 static pANTLR3_STRING
\r
671 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
\r
673 pANTLR3_STRING string;
\r
675 /* We don't need to be too efficient here, this is mostly for error messages and so on.
\r
677 pANTLR3_UINT8 scannedText;
\r
680 /* Assume we need as much as twice as much space to parse out the control characters
\r
682 string = factory->newSize(factory, instr->len *2 + 1);
\r
684 /* Scan through and replace unprintable (in terms of this routine)
\r
687 scannedText = string->chars;
\r
689 for (i = 0; i < instr->len; i++)
\r
691 if (*(instr->chars + i) == '\n')
\r
693 *scannedText++ = '\\';
\r
694 *scannedText++ = 'n';
\r
696 else if (*(instr->chars + i) == '\r')
\r
698 *scannedText++ = '\\';
\r
699 *scannedText++ = 'r';
\r
701 else if (!isprint(*(instr->chars +i)))
\r
703 *scannedText++ = '?';
\r
707 *scannedText++ = *(instr->chars + i);
\r
710 *scannedText = '\0';
\r
712 string->len = (ANTLR3_UINT32)(scannedText - string->chars);
\r
717 static pANTLR3_STRING
\r
718 printable16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
\r
720 pANTLR3_STRING string;
\r
722 /* We don't need to be too efficient here, this is mostly for error messages and so on.
\r
724 pANTLR3_UINT16 scannedText;
\r
725 pANTLR3_UINT16 inText;
\r
727 ANTLR3_UINT32 outLen;
\r
729 /* Assume we need as much as twice as much space to parse out the control characters
\r
731 string = factory->newSize(factory, instr->len *2 + 1);
\r
733 /* Scan through and replace unprintable (in terms of this routine)
\r
736 scannedText = (pANTLR3_UINT16)(string->chars);
\r
737 inText = (pANTLR3_UINT16)(instr->chars);
\r
740 for (i = 0; i < instr->len; i++)
\r
742 if (*(inText + i) == '\n')
\r
744 *scannedText++ = '\\';
\r
745 *scannedText++ = 'n';
\r
748 else if (*(inText + i) == '\r')
\r
750 *scannedText++ = '\\';
\r
751 *scannedText++ = 'r';
\r
754 else if (!isprint(*(inText +i)))
\r
756 *scannedText++ = '?';
\r
761 *scannedText++ = *(inText + i);
\r
765 *scannedText = '\0';
\r
767 string->len = outLen;
\r
772 /** Fascist Capitalist Pig function created
\r
773 * to oppress the workers comrade.
\r
776 closeFactory (pANTLR3_STRING_FACTORY factory)
\r
778 /* Delete the vector we were tracking the strings with, this will
\r
779 * causes all the allocated strings to be deallocated too
\r
781 factory->strings->free(factory->strings);
\r
783 /* Delete the space for the factory itself
\r
785 ANTLR3_FREE((void *)factory);
\r
788 static pANTLR3_UINT8
\r
789 append8 (pANTLR3_STRING string, const char * newbit)
\r
793 len = (ANTLR3_UINT32)strlen(newbit);
\r
795 if (string->size < (string->len + len + 1))
\r
797 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
\r
798 string->size = string->len + len + 1;
\r
801 /* Note we copy one more byte than the strlen in order to get the trailing
\r
803 ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
\r
804 string->len += len;
\r
806 return string->chars;
\r
809 static pANTLR3_UINT8
\r
810 append16_8 (pANTLR3_STRING string, const char * newbit)
\r
813 pANTLR3_UINT16 apPoint;
\r
814 ANTLR3_UINT32 count;
\r
816 len = (ANTLR3_UINT32)strlen(newbit);
\r
818 if (string->size < (string->len + len + 1))
\r
820 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
\r
821 string->size = string->len + len + 1;
\r
824 apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
\r
825 string->len += len;
\r
827 for (count = 0; count < len; count++)
\r
829 *apPoint++ = *(newbit + count);
\r
833 return string->chars;
\r
836 static pANTLR3_UINT8
\r
837 append16_16 (pANTLR3_STRING string, const char * newbit)
\r
842 /** First, determine the length of the input string
\r
844 in = (pANTLR3_UINT16)newbit;
\r
847 while (*in++ != '\0')
\r
852 if (string->size < (string->len + len + 1))
\r
854 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
\r
855 string->size = string->len + len + 1;
\r
858 /* Note we copy one more byte than the strlen in order to get the trailing delimiter
\r
860 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
\r
861 string->len += len;
\r
863 return string->chars;
\r
866 static pANTLR3_UINT8
\r
867 set8 (pANTLR3_STRING string, const char * chars)
\r
871 len = (ANTLR3_UINT32)strlen(chars);
\r
872 if (string->size < len + 1)
\r
874 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
\r
875 string->size = len + 1;
\r
878 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
\r
880 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
\r
883 return string->chars;
\r
887 static pANTLR3_UINT8
\r
888 set16_8 (pANTLR3_STRING string, const char * chars)
\r
891 ANTLR3_UINT32 count;
\r
892 pANTLR3_UINT16 apPoint;
\r
894 len = (ANTLR3_UINT32)strlen(chars);
\r
895 if (string->size < len + 1)
\r
897 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
\r
898 string->size = len + 1;
\r
900 apPoint = ((pANTLR3_UINT16)string->chars);
\r
903 for (count = 0; count < string->len; count++)
\r
905 *apPoint++ = *(chars + count);
\r
909 return string->chars;
\r
912 static pANTLR3_UINT8
\r
913 set16_16 (pANTLR3_STRING string, const char * chars)
\r
918 /** First, determine the length of the input string
\r
920 in = (pANTLR3_UINT16)chars;
\r
923 while (*in++ != '\0')
\r
928 if (string->size < len + 1)
\r
930 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
\r
931 string->size = len + 1;
\r
934 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
\r
936 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
\r
939 return string->chars;
\r
943 static pANTLR3_UINT8
\r
944 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c)
\r
946 if (string->size < string->len + 2)
\r
948 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
\r
949 string->size = string->len + 2;
\r
951 *(string->chars + string->len) = (ANTLR3_UINT8)c;
\r
952 *(string->chars + string->len + 1) = '\0';
\r
955 return string->chars;
\r
958 static pANTLR3_UINT8
\r
959 addc16 (pANTLR3_STRING string, ANTLR3_UINT32 c)
\r
961 pANTLR3_UINT16 ptr;
\r
963 if (string->size < string->len + 2)
\r
965 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
\r
966 string->size = string->len + 2;
\r
968 ptr = (pANTLR3_UINT16)(string->chars);
\r
970 *(ptr + string->len) = (ANTLR3_UINT16)c;
\r
971 *(ptr + string->len + 1) = '\0';
\r
974 return string->chars;
\r
977 static pANTLR3_UINT8
\r
978 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i)
\r
980 ANTLR3_UINT8 newbit[32];
\r
982 sprintf((char *)newbit, "%d", i);
\r
984 return string->append8(string, (const char *)newbit);
\r
986 static pANTLR3_UINT8
\r
987 addi16 (pANTLR3_STRING string, ANTLR3_INT32 i)
\r
989 ANTLR3_UINT8 newbit[32];
\r
991 sprintf((char *)newbit, "%d", i);
\r
993 return string->append8(string, (const char *)newbit);
\r
996 static pANTLR3_UINT8
\r
997 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
\r
999 ANTLR3_UINT8 newbit[32];
\r
1001 sprintf((char *)newbit, "%d", i);
\r
1002 return string->insert8(string, point, (const char *)newbit);
\r
1004 static pANTLR3_UINT8
\r
1005 inserti16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
\r
1007 ANTLR3_UINT8 newbit[32];
\r
1009 sprintf((char *)newbit, "%d", i);
\r
1010 return string->insert8(string, point, (const char *)newbit);
\r
1013 static pANTLR3_UINT8
\r
1014 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
\r
1016 ANTLR3_UINT32 len;
\r
1018 if (point >= string->len)
\r
1020 return string->append(string, newbit);
\r
1023 len = (ANTLR3_UINT32)strlen(newbit);
\r
1027 return string->chars;
\r
1030 if (string->size < (string->len + len + 1))
\r
1032 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
\r
1033 string->size = string->len + len + 1;
\r
1036 /* Move the characters we are inserting before, including the delimiter
\r
1038 ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
\r
1040 /* Note we copy the exact number of bytes
\r
1042 ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
\r
1044 string->len += len;
\r
1046 return string->chars;
\r
1049 static pANTLR3_UINT8
\r
1050 insert16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
\r
1052 ANTLR3_UINT32 len;
\r
1053 ANTLR3_UINT32 count;
\r
1054 pANTLR3_UINT16 inPoint;
\r
1056 if (point >= string->len)
\r
1058 return string->append8(string, newbit);
\r
1061 len = (ANTLR3_UINT32)strlen(newbit);
\r
1065 return string->chars;
\r
1068 if (string->size < (string->len + len + 1))
\r
1070 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
\r
1071 string->size = string->len + len + 1;
\r
1074 /* Move the characters we are inserting before, including the delimiter
\r
1076 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
\r
1078 string->len += len;
\r
1080 inPoint = ((pANTLR3_UINT16)(string->chars))+point;
\r
1081 for (count = 0; count<len; count++)
\r
1083 *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
\r
1086 return string->chars;
\r
1089 static pANTLR3_UINT8
\r
1090 insert16_16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
\r
1092 ANTLR3_UINT32 len;
\r
1093 pANTLR3_UINT16 in;
\r
1095 if (point >= string->len)
\r
1097 return string->append(string, newbit);
\r
1100 /** First, determine the length of the input string
\r
1102 in = (pANTLR3_UINT16)newbit;
\r
1105 while (*in++ != '\0')
\r
1112 return string->chars;
\r
1115 if (string->size < (string->len + len + 1))
\r
1117 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
\r
1118 string->size = string->len + len + 1;
\r
1121 /* Move the characters we are inserting before, including the delimiter
\r
1123 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
\r
1126 /* Note we copy the exact number of characters
\r
1128 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
\r
1130 string->len += len;
\r
1132 return string->chars;
\r
1135 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars)
\r
1137 return string->set(string, (const char *)(chars->chars));
\r
1140 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit)
\r
1142 /* We may be passed an empty string, in which case we just return the current pointer
\r
1144 if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
\r
1146 return string->chars;
\r
1150 return string->append(string, (const char *)(newbit->chars));
\r
1154 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
\r
1156 return string->insert(string, point, (const char *)(newbit->chars));
\r
1159 /* Function that compares the text of a string to the supplied
\r
1160 * 8 bit character string and returns a result a la strcmp()
\r
1162 static ANTLR3_UINT32
\r
1163 compare8 (pANTLR3_STRING string, const char * compStr)
\r
1165 return strcmp((const char *)(string->chars), compStr);
\r
1168 /* Function that compares the text of a string with the supplied character string
\r
1169 * (which is assumed to be in the same encoding as the string itself) and returns a result
\r
1172 static ANTLR3_UINT32
\r
1173 compare16_8 (pANTLR3_STRING string, const char * compStr)
\r
1175 pANTLR3_UINT16 ourString;
\r
1176 ANTLR3_UINT32 charDiff;
\r
1178 ourString = (pANTLR3_UINT16)(string->chars);
\r
1180 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
\r
1182 charDiff = *ourString - *compStr;
\r
1183 if (charDiff != 0)
\r
1191 /* At this point, one of the strings was terminated
\r
1193 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
\r
1197 /* Function that compares the text of a string with the supplied character string
\r
1198 * (which is assumed to be in the same encoding as the string itself) and returns a result
\r
1201 static ANTLR3_UINT32
\r
1202 compare16_16 (pANTLR3_STRING string, const char * compStr8)
\r
1204 pANTLR3_UINT16 ourString;
\r
1205 pANTLR3_UINT16 compStr;
\r
1206 ANTLR3_UINT32 charDiff;
\r
1208 ourString = (pANTLR3_UINT16)(string->chars);
\r
1209 compStr = (pANTLR3_UINT16)(compStr8);
\r
1211 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
\r
1213 charDiff = *ourString - *compStr;
\r
1214 if (charDiff != 0)
\r
1222 /* At this point, one of the strings was terminated
\r
1224 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
\r
1227 /* Function that compares the text of a string with the supplied string
\r
1228 * (which is assumed to be in the same encoding as the string itself) and returns a result
\r
1231 static ANTLR3_UINT32
\r
1232 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr)
\r
1234 return string->compare(string, (const char *)compStr->chars);
\r
1238 /* Function that returns the character indexed at the supplied
\r
1239 * offset as a 32 bit character.
\r
1241 static ANTLR3_UCHAR
\r
1242 charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
\r
1244 if (offset > string->len)
\r
1246 return (ANTLR3_UCHAR)'\0';
\r
1250 return (ANTLR3_UCHAR)(*(string->chars + offset));
\r
1254 /* Function that returns the character indexed at the supplied
\r
1255 * offset as a 32 bit character.
\r
1257 static ANTLR3_UCHAR
\r
1258 charAt16 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
\r
1260 if (offset > string->len)
\r
1262 return (ANTLR3_UCHAR)'\0';
\r
1266 return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
\r
1270 /* Function that returns a substring of the supplied string a la .subString(s,e)
\r
1271 * in java runtimes.
\r
1273 static pANTLR3_STRING
\r
1274 subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
\r
1276 pANTLR3_STRING newStr;
\r
1278 if (endIndex > string->len)
\r
1280 endIndex = string->len + 1;
\r
1282 newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
\r
1287 /* Returns a substring of the supplied string a la .subString(s,e)
\r
1288 * in java runtimes.
\r
1290 static pANTLR3_STRING
\r
1291 subString16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
\r
1293 pANTLR3_STRING newStr;
\r
1295 if (endIndex > string->len)
\r
1297 endIndex = string->len + 1;
\r
1299 newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
\r
1304 /* Function that can convert the characters in the string to an integer
\r
1306 static ANTLR3_INT32
\r
1307 toInt32_8 (struct ANTLR3_STRING_struct * string)
\r
1309 return atoi((const char *)(string->chars));
\r
1312 /* Function that can convert the characters in the string to an integer
\r
1314 static ANTLR3_INT32
\r
1315 toInt32_16 (struct ANTLR3_STRING_struct * string)
\r
1317 pANTLR3_UINT16 input;
\r
1318 ANTLR3_INT32 value;
\r
1319 ANTLR3_BOOLEAN negate;
\r
1322 input = (pANTLR3_UINT16)(string->chars);
\r
1323 negate = ANTLR3_FALSE;
\r
1325 if (*input == (ANTLR3_UCHAR)'-')
\r
1327 negate = ANTLR3_TRUE;
\r
1330 else if (*input == (ANTLR3_UCHAR)'+')
\r
1335 while (*input != '\0' && isdigit(*input))
\r
1337 value = value * 10;
\r
1338 value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
\r
1342 return negate ? -value : value;
\r
1345 /* Function that returns a pointer to an 8 bit version of the string,
\r
1346 * which in this case is just the string as this is
\r
1347 * 8 bit encodiing anyway.
\r
1349 static pANTLR3_STRING to8_8 (pANTLR3_STRING string)
\r
1354 /* Function that returns an 8 bit version of the string,
\r
1355 * which in this case is returning all the 16 bit characters
\r
1356 * narrowed back into 8 bits, with characters that are too large
\r
1357 * replaced with '_'
\r
1359 static pANTLR3_STRING to8_16 (pANTLR3_STRING string)
\r
1361 pANTLR3_STRING newStr;
\r
1364 /* Create a new 8 bit string
\r
1366 newStr = newRaw8(string->factory);
\r
1368 if (newStr == NULL)
\r
1373 /* Always add one more byte for a terminator
\r
1375 newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
\r
1376 newStr->size = string->len + 1;
\r
1377 newStr->len = string->len;
\r
1379 /* Now copy each 16 bit charActer , making it an 8 bit character of
\r
1382 for (i=0; i<string->len; i++)
\r
1386 c = *(((pANTLR3_UINT16)(string->chars)) + i);
\r
1388 *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
\r
1393 *(newStr->chars + newStr->len) = '\0';
\r