]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java
Modified UTF-8 decoding fixes for TG reading and indexing
[simantics/platform.git] / bundles / org.simantics.graph / src / org / simantics / graph / representation / ByteFileReader.java
index 4c14bded6639fbf0662e6548f486d38dd27be912..3a76a911a0d3c269c4c72cf5053b742aa8579432 100644 (file)
@@ -6,6 +6,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UTFDataFormatException;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 
@@ -49,29 +50,71 @@ public class ByteFileReader implements Closeable {
                return bytes;
 
        }
-       final protected String utf(byte[] bytes, int index, int target) {
-               int i = 0;
-               while(index < target) {
-                       int c = bytes[index++]&0xff;
-                       if(c <= 0x7F) {
-                               chars[i++] = (char)(c&0x7F);
-                       } else if (c > 0x07FF) {
-                               int c2 = bytes[index++]&0xff;
-                               int c3 = bytes[index++]&0xff;
-                               chars[i++] = (char)(((c&0xf)<<12) + ((c2&0x3f)<<6) + (c3&0x3f)); 
-                       } else {
-                               int c2 = bytes[index++]&0xff;
-                               chars[i++] = (char)(((c&0x1f)<<6) + (c2&0x3f)); 
+
+       final protected String utf(byte[] bytearr, int index, int target) throws UTFDataFormatException {
+               // Copied from DataInputStream
+               int utflen = target - index;
+               char[] chararr = utflen > chars.length ? new char[utflen] : chars;
+
+               int c, char2, char3;
+               int count = index;
+               int chararr_count=0;
+
+               while (count < target) {
+                       c = (int) bytearr[count] & 0xff;
+                       if (c > 127) break;
+                       count++;
+                       chararr[chararr_count++]=(char)c;
+               }
+
+               while (count < target) {
+                       c = (int) bytearr[count] & 0xff;
+                       switch (c >> 4) {
+                       case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
+                               /* 0xxxxxxx*/
+                               count++;
+                               chararr[chararr_count++]=(char)c;
+                               break;
+                       case 12: case 13:
+                               /* 110x xxxx   10xx xxxx*/
+                               count += 2;
+                               if (count > target)
+                                       throw new UTFDataFormatException(
+                                                       "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
+                               char2 = (int) bytearr[count-1];
+                               if ((char2 & 0xC0) != 0x80)
+                                       throw new UTFDataFormatException(
+                                                       "malformed input around byte " + count); 
+                               chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | 
+                                               (char2 & 0x3F));  
+                               break;
+                       case 14:
+                               /* 1110 xxxx  10xx xxxx  10xx xxxx */
+                               count += 3;
+                               if (count > target)
+                                       throw new UTFDataFormatException(
+                                                       "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
+                               char2 = (int) bytearr[count-2];
+                               char3 = (int) bytearr[count-1];
+                               if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+                                       throw new UTFDataFormatException(
+                                                       "malformed input around byte " + (count-1));
+                               chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
+                                               ((char2 & 0x3F) << 6)  |
+                                               ((char3 & 0x3F) << 0));
+                               break;
+                       default:
+                               /* 10xx xxxx,  1111 xxxx */
+                               throw new UTFDataFormatException(
+                                               "malformed input around byte " + count);
                        }
-                       
                }
-               return new String(chars, 0, i);
+               // The number of chars produced may be less than utflen
+               return new String(chararr, 0, chararr_count);
        }
 
        final protected byte[] safeBytes(int amount) throws IOException {
-
                byte[] result = new byte[amount];
-               
                int has = size-byteIndex;
                if(amount >= has) {
                        ReadableByteChannel c = channel;
@@ -86,6 +129,9 @@ public class ByteFileReader implements Closeable {
                                int got = c.read(bb2);
                                if(got == -1) throw new IOException("Unexpected end-of-file");
                                has += got; 
+                               // For some unknown reason this is needed!
+                               // Spec indicates that read would increment position but it does not.
+                               bb2.position(has);
                        }
                        size = c.read(bb);
                        bb.position(0);
@@ -104,7 +150,7 @@ public class ByteFileReader implements Closeable {
            int result;
         if(has == 0) {
             ReadableByteChannel c = channel;
-            ByteBuffer bb = byteBuffer;            
+            ByteBuffer bb = byteBuffer;
             size = c.read(bb);
             if(size == -1) {
                                throw new EOFException("Unexpected end-of-file");
@@ -114,43 +160,40 @@ public class ByteFileReader implements Closeable {
             if(size == 0)
                 return -1;
         }
-        result = bytes[byteIndex];
-        if(result < 0)
-            result += 256;
-        ++byteIndex;        
+        result = bytes[byteIndex++] & 0xff;
         return result;
        }
 
        public int getDynamicUInt32() throws IOException {
-               int length = getByte()&0xff
+               int length = getByte(); 
                if(length >= 0x80) {
                        if(length >= 0xc0) {
                                if(length >= 0xe0) {
                                        if(length >= 0xf0) {
                                                length &= 0x0f;
-                                               length += ((getByte()&0xff)<<3);
-                                               length += ((getByte()&0xff)<<11);
-                                               length += ((getByte()&0xff)<<19);
+                                               length += (getByte()<<3);
+                                               length += (getByte()<<11);
+                                               length += (getByte()<<19);
                                                length += 0x10204080;
                                        }
                                        else {
                                                length &= 0x1f;
-                                               length += ((getByte()&0xff)<<4);
-                                               length += ((getByte()&0xff)<<12);
-                                               length += ((getByte()&0xff)<<20);
+                                               length += (getByte()<<4);
+                                               length += (getByte()<<12);
+                                               length += (getByte()<<20);
                                                length += 0x204080;
                                        }
                                }
                                else {
                                        length &= 0x3f;
-                                       length += ((getByte()&0xff)<<5);
-                                       length += ((getByte()&0xff)<<13);
+                                       length += (getByte()<<5);
+                                       length += (getByte()<<13);
                                        length += 0x4080;
                                }
                        }
                        else {
                                length &= 0x7f;
-                               length += ((getByte()&0xff)<<6);
+                               length += (getByte()<<6);
                                length += 0x80;
                        }
                }
@@ -159,10 +202,12 @@ public class ByteFileReader implements Closeable {
 
        final protected int safeInt() throws IOException {
 
+               byte[] bytes = this.bytes;
+
                if(byteIndex >= (size-5)) {
                        int result = 0;
                        ReadableByteChannel c = channel;
-               ByteBuffer bb = byteBuffer;
+                       ByteBuffer bb = byteBuffer;
                        if(byteIndex == size) {
                                size = c.read(bb);
                                if(size == -1) throw new EOFException("Unexpected end-of-file");
@@ -190,7 +235,7 @@ public class ByteFileReader implements Closeable {
                                bb.position(0);
                                byteIndex = 0;
                        }
-                       result |= ((int)(bytes[byteIndex++]&0xff)<<0);
+                       result |= ((int)(bytes[byteIndex++]&0xff));
                        if(byteIndex == size) {
                                size = c.read(bb);
                                bb.position(0);
@@ -200,9 +245,9 @@ public class ByteFileReader implements Closeable {
                } else {
                        return ((bytes[byteIndex++]&0xff)<<24) | ((bytes[byteIndex++]&0xff)<<16) | ((bytes[byteIndex++]&0xff)<<8) | ((bytes[byteIndex++]&0xff));
                }
-               
+
        }
-       
+
        final protected int getSize() {
                return size;
        }