--- /dev/null
+/// \file\r
+/// Base functions to initialize and manipulate a UCS2 input stream\r
+///\r
+#include <antlr3input.h>\r
+\r
+// [The "BSD licence"]\r
+// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC\r
+// http://www.temporal-wave.com\r
+// http://www.linkedin.com/in/jimidle\r
+//\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+// 3. The name of the author may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\r
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\r
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\r
+// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\r
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\r
+// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\r
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+\r
+// INT Stream API\r
+//\r
+static void antlr3UCS2Consume (pANTLR3_INT_STREAM is);\r
+static ANTLR3_UCHAR antlr3UCS2LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la);\r
+static ANTLR3_MARKER antlr3UCS2Index (pANTLR3_INT_STREAM is);\r
+static void antlr3UCS2Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint);\r
+\r
+// ucs2 Charstream API functions\r
+//\r
+static pANTLR3_STRING antlr3UCS2Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop);\r
+\r
+/// \brief Common function to setup function interface for a 16 bit "UCS2" input stream.\r
+///\r
+/// \param input Input stream context pointer\r
+///\r
+/// \remark\r
+/// - Strictly speaking, there is no such thing as a UCS2 input stream as the term\r
+/// tends to confuse the notions of character encoding, unicode and so on. However\r
+/// because there will possibly be a need for a UTF-16 stream, I needed to identify 16 bit\r
+/// streams that do not support surrogate encodings and UCS2 is how it is mostly referred to.\r
+/// For instance Java, Oracle and others use a 16 bit encoding of characters and so this type\r
+/// of stream is very common.\r
+/// Take it to mean, therefore, a straight 16 bit uncomplicated encoding of Unicode code points.\r
+///\r
+void \r
+antlr3UCS2SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 type)\r
+{\r
+ // Build a string factory for this stream. This is a 16 bit string "UCS2" factory which is a standard\r
+ // part of the ANTLR3 string. The string factory is then passed through the whole chain of lexer->parser->tree->treeparser\r
+ // and so on.\r
+ //\r
+ input->strFactory = antlr3UCS2StringFactoryNew();\r
+\r
+ // Install function pointers for an 8 bit ASCII input, which are good for almost\r
+ // all input stream functions. We will then override those that won't work for 16 bit characters.\r
+ //\r
+ antlr3GenericSetupStream (input, type);\r
+\r
+ // Intstream API overrides for UCS2\r
+ //\r
+ input->istream->consume = antlr3UCS2Consume; // Consume the next 16 bit character in the buffer\r
+ input->istream->_LA = antlr3UCS2LA; // Return the UTF32 character at offset n (1 based)\r
+ input->istream->index = antlr3UCS2Index; // Calculate current index in input stream, 16 bit based\r
+ input->istream->seek = antlr3UCS2Seek; // How to seek to a specific point in the stream\r
+ \r
+ // Charstream API overrides for UCS2\r
+ //\r
+ input->substr = antlr3UCS2Substr; // Return a string from the input stream\r
+ \r
+ input->charByteSize = 2; // Size in bytes of characters in this stream.\r
+\r
+}\r
+\r
+/// \brief Consume the next character in an 8 bit ASCII input stream\r
+///\r
+/// \param input Input stream context pointer\r
+///\r
+static void\r
+antlr3UCS2Consume(pANTLR3_INT_STREAM is)\r
+{\r
+ pANTLR3_INPUT_STREAM input;\r
+\r
+ input = ((pANTLR3_INPUT_STREAM) (is->super));\r
+\r
+ if ((pANTLR3_UINT16)(input->nextChar) < (((pANTLR3_UINT16)input->data) + input->sizeBuf))\r
+ { \r
+ // Indicate one more character in this line\r
+ //\r
+ input->charPositionInLine++;\r
+\r
+ if ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar)\r
+ {\r
+ // Reset for start of a new line of input\r
+ //\r
+ input->line++;\r
+ input->charPositionInLine = 0;\r
+ input->currentLine = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);\r
+ }\r
+\r
+ // Increment to next character position\r
+ //\r
+ input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);\r
+ }\r
+}\r
+\r
+/// \brief Return the input element assuming an 8 bit ascii input\r
+///\r
+/// \param[in] input Input stream context pointer\r
+/// \param[in] la 1 based offset of next input stream element\r
+///\r
+/// \return Next input character in internal ANTLR3 encoding (UTF32)\r
+///\r
+static ANTLR3_UCHAR \r
+antlr3UCS2LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)\r
+{\r
+ pANTLR3_INPUT_STREAM input;\r
+\r
+ input = ((pANTLR3_INPUT_STREAM) (is->super));\r
+\r
+ if (( ((pANTLR3_UINT16)input->nextChar) + la - 1) >= (((pANTLR3_UINT16)input->data) + input->sizeBuf))\r
+ {\r
+ return ANTLR3_CHARSTREAM_EOF;\r
+ }\r
+ else\r
+ {\r
+ return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar + la - 1));\r
+ }\r
+}\r
+\r
+\r
+/// \brief Calculate the current index in the output stream.\r
+/// \param[in] input Input stream context pointer\r
+///\r
+static ANTLR3_MARKER \r
+antlr3UCS2Index(pANTLR3_INT_STREAM is)\r
+{\r
+ pANTLR3_INPUT_STREAM input;\r
+\r
+ input = ((pANTLR3_INPUT_STREAM) (is->super));\r
+\r
+ return (ANTLR3_MARKER)(input->nextChar);\r
+}\r
+\r
+/// \brief Rewind the lexer input to the state specified by the supplied mark.\r
+///\r
+/// \param[in] input Input stream context pointer\r
+///\r
+/// \remark\r
+/// Assumes ASCII (or at least, 8 Bit) input stream.\r
+///\r
+static void\r
+antlr3UCS2Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint)\r
+{\r
+ ANTLR3_INT32 count;\r
+ pANTLR3_INPUT_STREAM input;\r
+\r
+ input = ((pANTLR3_INPUT_STREAM) is->super);\r
+\r
+ // If the requested seek point is less than the current\r
+ // input point, then we assume that we are resetting from a mark\r
+ // and do not need to scan, but can just set to there.\r
+ //\r
+ if (seekPoint <= (ANTLR3_MARKER)(input->nextChar))\r
+ {\r
+ input->nextChar = (void *)seekPoint;\r
+ }\r
+ else\r
+ {\r
+ count = (ANTLR3_UINT32)((seekPoint - (ANTLR3_MARKER)(input->nextChar)) / 2); // 16 bits per character in UCS2\r
+\r
+ while (count--)\r
+ {\r
+ is->consume(is);\r
+ }\r
+ }\r
+}\r
+/// \brief Return a substring of the ucs2 (16 bit) input stream in\r
+/// newly allocated memory.\r
+///\r
+/// \param input Input stream context pointer\r
+/// \param start Offset in input stream where the string starts\r
+/// \param stop Offset in the input stream where the string ends.\r
+///\r
+static pANTLR3_STRING\r
+antlr3UCS2Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop)\r
+{\r
+ return input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, ((ANTLR3_UINT32_CAST(stop - start))/2) + 1);\r
+}\r