2 * Implementation of the ANTLR3 string and string factory classes
6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7 // http://www.temporal-wave.com
8 // http://www.linkedin.com/in/jimidle
10 // All rights reserved.
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 // 3. The name of the author may not be used to endorse or promote products
21 // derived from this software without specific prior written permission.
23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <antlr3string.h>
38 static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory);
39 static pANTLR3_STRING newRaw16 (pANTLR3_STRING_FACTORY factory);
40 static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
41 static pANTLR3_STRING newSize16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
42 static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
43 static pANTLR3_STRING newPtr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
44 static pANTLR3_STRING newPtr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
45 static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
46 static pANTLR3_STRING newStr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
47 static pANTLR3_STRING newStr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
48 static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
49 static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
50 static pANTLR3_STRING printable16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
51 static void closeFactory(pANTLR3_STRING_FACTORY factory);
55 static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars);
56 static pANTLR3_UINT8 set16_8 (pANTLR3_STRING string, const char * chars);
57 static pANTLR3_UINT8 set16_16 (pANTLR3_STRING string, const char * chars);
58 static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit);
59 static pANTLR3_UINT8 append16_8 (pANTLR3_STRING string, const char * newbit);
60 static pANTLR3_UINT8 append16_16 (pANTLR3_STRING string, const char * newbit);
61 static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
62 static pANTLR3_UINT8 insert16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
63 static pANTLR3_UINT8 insert16_16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
65 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars);
66 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit);
67 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
69 static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c);
70 static pANTLR3_UINT8 addc16 (pANTLR3_STRING string, ANTLR3_UINT32 c);
71 static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i);
72 static pANTLR3_UINT8 addi16 (pANTLR3_STRING string, ANTLR3_INT32 i);
73 static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
74 static pANTLR3_UINT8 inserti16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
76 static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr);
77 static ANTLR3_UINT32 compare16_8 (pANTLR3_STRING string, const char * compStr);
78 static ANTLR3_UINT32 compare16_16(pANTLR3_STRING string, const char * compStr);
79 static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr);
80 static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
81 static ANTLR3_UCHAR charAt16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
82 static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
83 static pANTLR3_STRING subString16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
84 static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string);
85 static ANTLR3_INT32 toInt32_16 (pANTLR3_STRING string);
86 static pANTLR3_STRING to8_8 (pANTLR3_STRING string);
87 static pANTLR3_STRING to8_16 (pANTLR3_STRING string);
88 static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string);
89 static pANTLR3_STRING toUTF8_16 (pANTLR3_STRING string);
93 static void stringInit8 (pANTLR3_STRING string);
94 static void stringInit16 (pANTLR3_STRING string);
95 static void ANTLR3_CDECL stringFree (pANTLR3_STRING string);
97 ANTLR3_API pANTLR3_STRING_FACTORY
98 antlr3StringFactoryNew()
100 pANTLR3_STRING_FACTORY factory;
104 factory = (pANTLR3_STRING_FACTORY) ANTLR3_MALLOC(sizeof(ANTLR3_STRING_FACTORY));
111 /* Now we make a new list to track the strings.
113 factory->strings = antlr3VectorNew(0);
116 if (factory->strings == NULL)
118 ANTLR3_FREE(factory);
122 /* Install the API (8 bit assumed)
124 factory->newRaw = newRaw8;
125 factory->newSize = newSize8;
127 factory->newPtr = newPtr8;
128 factory->newPtr8 = newPtr8;
129 factory->newStr = newStr8;
130 factory->newStr8 = newStr8;
131 factory->destroy = destroy;
132 factory->printable = printable8;
133 factory->destroy = destroy;
134 factory->close = closeFactory;
139 /** Create a string factory that is UCS2 (16 bit) encoding based
141 ANTLR3_API pANTLR3_STRING_FACTORY
142 antlr3UCS2StringFactoryNew()
144 pANTLR3_STRING_FACTORY factory;
146 /* Allocate an 8 bit factory, then override with 16 bit UCS2 functions where we
149 factory = antlr3StringFactoryNew();
156 /* Override the 8 bit API with the UCS2 (mostly just 16 bit) API
158 factory->newRaw = newRaw16;
159 factory->newSize = newSize16;
161 factory->newPtr = newPtr16_16;
162 factory->newPtr8 = newPtr16_8;
163 factory->newStr = newStr16_16;
164 factory->newStr8 = newStr16_8;
165 factory->printable = printable16;
167 factory->destroy = destroy;
168 factory->destroy = destroy;
169 factory->close = closeFactory;
179 static pANTLR3_STRING
180 newRaw8 (pANTLR3_STRING_FACTORY factory)
182 pANTLR3_STRING string;
184 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
191 /* Structure is allocated, now fill in the API etc.
194 string->factory = factory;
196 /* Add the string into the allocated list
198 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
199 string->index = factory->index++;
208 static pANTLR3_STRING
209 newRaw16 (pANTLR3_STRING_FACTORY factory)
211 pANTLR3_STRING string;
213 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
220 /* Structure is allocated, now fill in the API etc.
222 stringInit16(string);
223 string->factory = factory;
225 /* Add the string into the allocated list
227 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
228 string->index = factory->index++;
233 void ANTLR3_CDECL stringFree (pANTLR3_STRING string)
235 /* First free the string itself if there was anything in it
239 ANTLR3_FREE(string->chars);
242 /* Now free the space for this string
254 stringInit8 (pANTLR3_STRING string)
258 string->chars = NULL;
259 string->encoding = ANTLR3_ENCODING_LATIN1;
261 /* API for 8 bit strings*/
265 string->append = append8;
266 string->append8 = append8;
267 string->insert = insert8;
268 string->insert8 = insert8;
269 string->addi = addi8;
270 string->inserti = inserti8;
271 string->addc = addc8;
272 string->charAt = charAt8;
273 string->compare = compare8;
274 string->compare8 = compare8;
275 string->subString = subString8;
276 string->toInt32 = toInt32_8;
278 string->toUTF8 = toUTF8_8;
279 string->compareS = compareS;
281 string->appendS = appendS;
282 string->insertS = insertS;
291 stringInit16 (pANTLR3_STRING string)
295 string->chars = NULL;
296 string->encoding = ANTLR3_ENCODING_UCS2;
298 /* API for 16 bit strings */
300 string->set = set16_16;
301 string->set8 = set16_8;
302 string->append = append16_16;
303 string->append8 = append16_8;
304 string->insert = insert16_16;
305 string->insert8 = insert16_8;
306 string->addi = addi16;
307 string->inserti = inserti16;
308 string->addc = addc16;
309 string->charAt = charAt16;
310 string->compare = compare16_16;
311 string->compare8 = compare16_8;
312 string->subString = subString16;
313 string->toInt32 = toInt32_16;
314 string->to8 = to8_16;
315 string->toUTF8 = toUTF8_16;
317 string->compareS = compareS;
319 string->appendS = appendS;
320 string->insertS = insertS;
326 * TODO: Implement UTF-8
329 stringInitUTF8 (pANTLR3_STRING string)
333 string->chars = NULL;
339 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
340 // a memcpy as we make no assumptions about the 8 bit encoding.
342 static pANTLR3_STRING
343 toUTF8_8 (pANTLR3_STRING string)
345 return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
348 // Convert a 16 bit (UCS2) string into a UTF8 representation using the Unicode.org
349 // supplied C algorithms, which are now contained within the ANTLR3 C runtime
350 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
351 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
353 static pANTLR3_STRING
354 toUTF8_16 (pANTLR3_STRING string)
359 pANTLR3_STRING utf8String;
361 ConversionResult cResult;
363 // Allocate the output buffer, which needs to accommodate potentially
364 // 3X (in bytes) the input size (in chars).
366 utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
368 if (utf8String != NULL)
370 // Free existing allocation
372 ANTLR3_FREE(utf8String->chars);
374 // Reallocate according to maximum expected size
376 utf8String->size = string->len *3;
377 utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
379 if (utf8String->chars != NULL)
381 inputEnd = (UTF16 *) (string->chars);
382 outputEnd = (UTF8 *) (utf8String->chars);
384 // Call the Unicode converter
386 cResult = ConvertUTF16toUTF8
388 (const UTF16**)&inputEnd,
389 ((const UTF16 *)(string->chars)) + string->len,
391 outputEnd + utf8String->size - 1,
395 // We don't really care if things failed or not here, we just converted
396 // everything that was vaguely possible and stopped when it wasn't. It is
397 // up to the grammar programmer to verify that the input is sensible.
399 utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
401 *(outputEnd+1) = '\0'; // Always null terminate
408 * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
410 * \param[in] factory - Pointer to the string factory that owns strings
411 * \param[in] size - In characters
412 * \return pointer to the new string.
414 static pANTLR3_STRING
415 newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
417 pANTLR3_STRING string;
419 string = factory->newRaw(factory);
426 /* Always add one more byte for a terminator ;-)
428 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
429 *(string->chars) = '\0';
430 string->size = size + 1;
436 * Creates a new string with enough capacity for size 16 bit characters plus a terminator.
438 * \param[in] factory - POitner to the string factory that owns strings
439 * \param[in] size - In characters
440 * \return pointer to the new string.
442 static pANTLR3_STRING
443 newSize16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
445 pANTLR3_STRING string;
447 string = factory->newRaw(factory);
454 /* Always add one more byte for a terminator ;-)
456 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
457 *(string->chars) = '\0';
458 string->size = size+1; /* Size is always in characters, as is len */
463 /** Creates a new 8 bit string initialized with the 8 bit characters at the
464 * supplied ptr, of pre-determined size.
465 * \param[in] factory - Pointer to the string factory that owns the strings
466 * \param[in] ptr - Pointer to 8 bit encoded characters
467 * \return pointer to the new string
469 static pANTLR3_STRING
470 newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
472 pANTLR3_STRING string;
474 string = factory->newSize(factory, size);
488 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
489 *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */
496 /** Creates a new 16 bit string initialized with the 8 bit characters at the
497 * supplied 8 bit character ptr, of pre-determined size.
498 * \param[in] factory - Pointer to the string factory that owns the strings
499 * \param[in] ptr - Pointer to 8 bit encoded characters
500 * \return pointer to the new string
502 static pANTLR3_STRING
503 newPtr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
505 pANTLR3_STRING string;
507 /* newSize accepts size in characters, not bytes
509 string = factory->newSize(factory, size);
526 out = (pANTLR3_UINT16)(string->chars);
531 *out++ = (ANTLR3_UINT16)(*ptr++);
534 /* Terminate, these strings are usually used for Token streams and printing etc.
536 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
544 /** Creates a new 16 bit string initialized with the 16 bit characters at the
545 * supplied ptr, of pre-determined size.
546 * \param[in] factory - Pointer to the string factory that owns the strings
547 * \param[in] ptr - Pointer to 16 bit encoded characters
548 * \return pointer to the new string
550 static pANTLR3_STRING
551 newPtr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
553 pANTLR3_STRING string;
555 string = factory->newSize(factory, size);
569 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
571 /* Terminate, these strings are usually used for Token streams and printing etc.
573 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
580 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
581 * \param[in] factory - Pointer to the string factory that owns strings.
582 * \param[in] ptr - Pointer to the 8 bit encoded string
583 * \return Pointer to the newly initialized string
585 static pANTLR3_STRING
586 newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
588 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
591 /** Create a new 16 bit string from the supplied, null terminated, 8 bit string pointer.
592 * \param[in] factory - Pointer to the string factory that owns strings.
593 * \param[in] ptr - Pointer to the 8 bit encoded string
594 * \return POinter to the newly initialized string
596 static pANTLR3_STRING
597 newStr16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
599 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
602 /** Create a new 16 bit string from the supplied, null terminated, 16 bit string pointer.
603 * \param[in] factory - Pointer to the string factory that owns strings.
604 * \param[in] ptr - Pointer to the 16 bit encoded string
605 * \return Pointer to the newly initialized string
607 static pANTLR3_STRING
608 newStr16_16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
613 /** First, determine the length of the input string
615 in = (pANTLR3_UINT16)ptr;
618 while (*in++ != '\0')
622 return factory->newPtr(factory, ptr, count);
626 destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
628 // Record which string we are deleting
630 ANTLR3_UINT32 strIndex = string->index;
632 // Ensure that the string was not factory made, or we would try
633 // to delete memory that wasn't allocated outside the factory
635 // Remove the specific indexed string from the vector
637 factory->strings->del(factory->strings, strIndex);
639 // One less string in the vector, so decrement the factory index
640 // so that the next string allocated is indexed correctly with
641 // respect to the vector.
645 // Now we have to reindex the strings in the vector that followed
646 // the one we just deleted. We only do this if the one we just deleted
647 // was not the last one.
649 if (strIndex< factory->index)
651 // We must reindex the strings after the one we just deleted.
652 // The one that follows the one we just deleted is also out
653 // of whack, so we start there.
657 for (i = strIndex; i < factory->index; i++)
659 // Renumber the entry
661 ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
665 // The string has been destroyed and the elements of the factory are reindexed.
670 static pANTLR3_STRING
671 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
673 pANTLR3_STRING string;
675 /* We don't need to be too efficient here, this is mostly for error messages and so on.
677 pANTLR3_UINT8 scannedText;
680 /* Assume we need as much as twice as much space to parse out the control characters
682 string = factory->newSize(factory, instr->len *2 + 1);
684 /* Scan through and replace unprintable (in terms of this routine)
687 scannedText = string->chars;
689 for (i = 0; i < instr->len; i++)
691 if (*(instr->chars + i) == '\n')
693 *scannedText++ = '\\';
694 *scannedText++ = 'n';
696 else if (*(instr->chars + i) == '\r')
698 *scannedText++ = '\\';
699 *scannedText++ = 'r';
701 else if (!isprint(*(instr->chars +i)))
703 *scannedText++ = '?';
707 *scannedText++ = *(instr->chars + i);
712 string->len = (ANTLR3_UINT32)(scannedText - string->chars);
717 static pANTLR3_STRING
718 printable16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
720 pANTLR3_STRING string;
722 /* We don't need to be too efficient here, this is mostly for error messages and so on.
724 pANTLR3_UINT16 scannedText;
725 pANTLR3_UINT16 inText;
727 ANTLR3_UINT32 outLen;
729 /* Assume we need as much as twice as much space to parse out the control characters
731 string = factory->newSize(factory, instr->len *2 + 1);
733 /* Scan through and replace unprintable (in terms of this routine)
736 scannedText = (pANTLR3_UINT16)(string->chars);
737 inText = (pANTLR3_UINT16)(instr->chars);
740 for (i = 0; i < instr->len; i++)
742 if (*(inText + i) == '\n')
744 *scannedText++ = '\\';
745 *scannedText++ = 'n';
748 else if (*(inText + i) == '\r')
750 *scannedText++ = '\\';
751 *scannedText++ = 'r';
754 else if (!isprint(*(inText +i)))
756 *scannedText++ = '?';
761 *scannedText++ = *(inText + i);
767 string->len = outLen;
772 /** Fascist Capitalist Pig function created
773 * to oppress the workers comrade.
776 closeFactory (pANTLR3_STRING_FACTORY factory)
778 /* Delete the vector we were tracking the strings with, this will
779 * causes all the allocated strings to be deallocated too
781 factory->strings->free(factory->strings);
783 /* Delete the space for the factory itself
785 ANTLR3_FREE((void *)factory);
789 append8 (pANTLR3_STRING string, const char * newbit)
793 len = (ANTLR3_UINT32)strlen(newbit);
795 if (string->size < (string->len + len + 1))
797 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
798 string->size = string->len + len + 1;
801 /* Note we copy one more byte than the strlen in order to get the trailing
803 ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
806 return string->chars;
810 append16_8 (pANTLR3_STRING string, const char * newbit)
813 pANTLR3_UINT16 apPoint;
816 len = (ANTLR3_UINT32)strlen(newbit);
818 if (string->size < (string->len + len + 1))
820 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
821 string->size = string->len + len + 1;
824 apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
827 for (count = 0; count < len; count++)
829 *apPoint++ = *(newbit + count);
833 return string->chars;
837 append16_16 (pANTLR3_STRING string, const char * newbit)
842 /** First, determine the length of the input string
844 in = (pANTLR3_UINT16)newbit;
847 while (*in++ != '\0')
852 if (string->size < (string->len + len + 1))
854 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
855 string->size = string->len + len + 1;
858 /* Note we copy one more byte than the strlen in order to get the trailing delimiter
860 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
863 return string->chars;
867 set8 (pANTLR3_STRING string, const char * chars)
871 len = (ANTLR3_UINT32)strlen(chars);
872 if (string->size < len + 1)
874 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
875 string->size = len + 1;
878 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
880 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
883 return string->chars;
888 set16_8 (pANTLR3_STRING string, const char * chars)
892 pANTLR3_UINT16 apPoint;
894 len = (ANTLR3_UINT32)strlen(chars);
895 if (string->size < len + 1)
897 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
898 string->size = len + 1;
900 apPoint = ((pANTLR3_UINT16)string->chars);
903 for (count = 0; count < string->len; count++)
905 *apPoint++ = *(chars + count);
909 return string->chars;
913 set16_16 (pANTLR3_STRING string, const char * chars)
918 /** First, determine the length of the input string
920 in = (pANTLR3_UINT16)chars;
923 while (*in++ != '\0')
928 if (string->size < len + 1)
930 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
931 string->size = len + 1;
934 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
936 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
939 return string->chars;
944 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c)
946 if (string->size < string->len + 2)
948 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
949 string->size = string->len + 2;
951 *(string->chars + string->len) = (ANTLR3_UINT8)c;
952 *(string->chars + string->len + 1) = '\0';
955 return string->chars;
959 addc16 (pANTLR3_STRING string, ANTLR3_UINT32 c)
963 if (string->size < string->len + 2)
965 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
966 string->size = string->len + 2;
968 ptr = (pANTLR3_UINT16)(string->chars);
970 *(ptr + string->len) = (ANTLR3_UINT16)c;
971 *(ptr + string->len + 1) = '\0';
974 return string->chars;
978 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i)
980 ANTLR3_UINT8 newbit[32];
982 sprintf((char *)newbit, "%d", i);
984 return string->append8(string, (const char *)newbit);
987 addi16 (pANTLR3_STRING string, ANTLR3_INT32 i)
989 ANTLR3_UINT8 newbit[32];
991 sprintf((char *)newbit, "%d", i);
993 return string->append8(string, (const char *)newbit);
997 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
999 ANTLR3_UINT8 newbit[32];
1001 sprintf((char *)newbit, "%d", i);
1002 return string->insert8(string, point, (const char *)newbit);
1004 static pANTLR3_UINT8
1005 inserti16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1007 ANTLR3_UINT8 newbit[32];
1009 sprintf((char *)newbit, "%d", i);
1010 return string->insert8(string, point, (const char *)newbit);
1013 static pANTLR3_UINT8
1014 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1018 if (point >= string->len)
1020 return string->append(string, newbit);
1023 len = (ANTLR3_UINT32)strlen(newbit);
1027 return string->chars;
1030 if (string->size < (string->len + len + 1))
1032 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
1033 string->size = string->len + len + 1;
1036 /* Move the characters we are inserting before, including the delimiter
1038 ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
1040 /* Note we copy the exact number of bytes
1042 ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
1046 return string->chars;
1049 static pANTLR3_UINT8
1050 insert16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1053 ANTLR3_UINT32 count;
1054 pANTLR3_UINT16 inPoint;
1056 if (point >= string->len)
1058 return string->append8(string, newbit);
1061 len = (ANTLR3_UINT32)strlen(newbit);
1065 return string->chars;
1068 if (string->size < (string->len + len + 1))
1070 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1071 string->size = string->len + len + 1;
1074 /* Move the characters we are inserting before, including the delimiter
1076 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1080 inPoint = ((pANTLR3_UINT16)(string->chars))+point;
1081 for (count = 0; count<len; count++)
1083 *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
1086 return string->chars;
1089 static pANTLR3_UINT8
1090 insert16_16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1095 if (point >= string->len)
1097 return string->append(string, newbit);
1100 /** First, determine the length of the input string
1102 in = (pANTLR3_UINT16)newbit;
1105 while (*in++ != '\0')
1112 return string->chars;
1115 if (string->size < (string->len + len + 1))
1117 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1118 string->size = string->len + len + 1;
1121 /* Move the characters we are inserting before, including the delimiter
1123 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1126 /* Note we copy the exact number of characters
1128 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
1132 return string->chars;
1135 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars)
1137 return string->set(string, (const char *)(chars->chars));
1140 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit)
1142 /* We may be passed an empty string, in which case we just return the current pointer
1144 if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
1146 return string->chars;
1150 return string->append(string, (const char *)(newbit->chars));
1154 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
1156 return string->insert(string, point, (const char *)(newbit->chars));
1159 /* Function that compares the text of a string to the supplied
1160 * 8 bit character string and returns a result a la strcmp()
1162 static ANTLR3_UINT32
1163 compare8 (pANTLR3_STRING string, const char * compStr)
1165 return strcmp((const char *)(string->chars), compStr);
1168 /* Function that compares the text of a string with the supplied character string
1169 * (which is assumed to be in the same encoding as the string itself) and returns a result
1172 static ANTLR3_UINT32
1173 compare16_8 (pANTLR3_STRING string, const char * compStr)
1175 pANTLR3_UINT16 ourString;
1176 ANTLR3_UINT32 charDiff;
1178 ourString = (pANTLR3_UINT16)(string->chars);
1180 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
1182 charDiff = *ourString - *compStr;
1191 /* At this point, one of the strings was terminated
1193 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1197 /* Function that compares the text of a string with the supplied character string
1198 * (which is assumed to be in the same encoding as the string itself) and returns a result
1201 static ANTLR3_UINT32
1202 compare16_16 (pANTLR3_STRING string, const char * compStr8)
1204 pANTLR3_UINT16 ourString;
1205 pANTLR3_UINT16 compStr;
1206 ANTLR3_UINT32 charDiff;
1208 ourString = (pANTLR3_UINT16)(string->chars);
1209 compStr = (pANTLR3_UINT16)(compStr8);
1211 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
1213 charDiff = *ourString - *compStr;
1222 /* At this point, one of the strings was terminated
1224 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1227 /* Function that compares the text of a string with the supplied string
1228 * (which is assumed to be in the same encoding as the string itself) and returns a result
1231 static ANTLR3_UINT32
1232 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr)
1234 return string->compare(string, (const char *)compStr->chars);
1238 /* Function that returns the character indexed at the supplied
1239 * offset as a 32 bit character.
1242 charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1244 if (offset > string->len)
1246 return (ANTLR3_UCHAR)'\0';
1250 return (ANTLR3_UCHAR)(*(string->chars + offset));
1254 /* Function that returns the character indexed at the supplied
1255 * offset as a 32 bit character.
1258 charAt16 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1260 if (offset > string->len)
1262 return (ANTLR3_UCHAR)'\0';
1266 return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
1270 /* Function that returns a substring of the supplied string a la .subString(s,e)
1273 static pANTLR3_STRING
1274 subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1276 pANTLR3_STRING newStr;
1278 if (endIndex > string->len)
1280 endIndex = string->len + 1;
1282 newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
1287 /* Returns a substring of the supplied string a la .subString(s,e)
1290 static pANTLR3_STRING
1291 subString16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1293 pANTLR3_STRING newStr;
1295 if (endIndex > string->len)
1297 endIndex = string->len + 1;
1299 newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
1304 /* Function that can convert the characters in the string to an integer
1307 toInt32_8 (struct ANTLR3_STRING_struct * string)
1309 return atoi((const char *)(string->chars));
1312 /* Function that can convert the characters in the string to an integer
1315 toInt32_16 (struct ANTLR3_STRING_struct * string)
1317 pANTLR3_UINT16 input;
1319 ANTLR3_BOOLEAN negate;
1322 input = (pANTLR3_UINT16)(string->chars);
1323 negate = ANTLR3_FALSE;
1325 if (*input == (ANTLR3_UCHAR)'-')
1327 negate = ANTLR3_TRUE;
1330 else if (*input == (ANTLR3_UCHAR)'+')
1335 while (*input != '\0' && isdigit(*input))
1338 value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
1342 return negate ? -value : value;
1345 /* Function that returns a pointer to an 8 bit version of the string,
1346 * which in this case is just the string as this is
1347 * 8 bit encodiing anyway.
1349 static pANTLR3_STRING to8_8 (pANTLR3_STRING string)
1354 /* Function that returns an 8 bit version of the string,
1355 * which in this case is returning all the 16 bit characters
1356 * narrowed back into 8 bits, with characters that are too large
1359 static pANTLR3_STRING to8_16 (pANTLR3_STRING string)
1361 pANTLR3_STRING newStr;
1364 /* Create a new 8 bit string
1366 newStr = newRaw8(string->factory);
1373 /* Always add one more byte for a terminator
1375 newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
1376 newStr->size = string->len + 1;
1377 newStr->len = string->len;
1379 /* Now copy each 16 bit charActer , making it an 8 bit character of
1382 for (i=0; i<string->len; i++)
1386 c = *(((pANTLR3_UINT16)(string->chars)) + i);
1388 *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
1393 *(newStr->chars + newStr->len) = '\0';