]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java
Modified UTF-8 decoding fixes for TG reading and indexing
[simantics/platform.git] / bundles / org.simantics.graph / src / org / simantics / graph / representation / ByteFileReader.java
index d4fb31f24648e20a33997d7dbdc836ed4ba3ff19..3a76a911a0d3c269c4c72cf5053b742aa8579432 100644 (file)
-package org.simantics.graph.representation;\r
-\r
-import java.io.Closeable;\r
-import java.io.EOFException;\r
-import java.io.File;\r
-import java.io.FileInputStream;\r
-import java.io.IOException;\r
-import java.io.InputStream;\r
-import java.nio.ByteBuffer;\r
-import java.nio.channels.ReadableByteChannel;\r
-\r
-/**\r
- * Must be closed after using by invoking {@link #close()}.\r
- */\r
-public class ByteFileReader implements Closeable {\r
-\r
-       final char[] chars = new char[3*128];\r
-\r
-       final private File file;\r
-       \r
-       /**\r
-        * May be <code>null</code>. If specified, it will be closed in\r
-        * {@link #close()}.\r
-        */\r
-       private InputStream stream;\r
-\r
-       /**\r
-        * A readable channel must always be specified since it is used for all\r
-        * reading. Channel is never closed by this class.\r
-        */\r
-       private ReadableByteChannel channel;\r
-       \r
-       final private ByteBuffer byteBuffer;\r
-       \r
-       final protected byte[] bytes;\r
-       private int size;\r
-\r
-       protected int byteIndex = 0;\r
-\r
-       final protected ReadableByteChannel getChannel() {\r
-               return channel;\r
-       }\r
-       \r
-       final protected ByteBuffer getByteBuffer() {\r
-               return byteBuffer;\r
-       }\r
-\r
-       final protected byte[] getBytes() {\r
-               return bytes;\r
-\r
-       }\r
-       final protected String utf(byte[] bytes, int index, int target) {\r
-               int i = 0;\r
-               while(index < target) {\r
-                       int c = bytes[index++]&0xff;\r
-                       if(c <= 0x7F) {\r
-                               chars[i++] = (char)(c&0x7F);\r
-                       } else if (c > 0x07FF) {\r
-                               int c2 = bytes[index++]&0xff;\r
-                               int c3 = bytes[index++]&0xff;\r
-                               chars[i++] = (char)(((c&0xf)<<12) + ((c2&0x3f)<<6) + (c3&0x3f)); \r
-                       } else {\r
-                               int c2 = bytes[index++]&0xff;\r
-                               chars[i++] = (char)(((c&0x1f)<<6) + (c2&0x3f)); \r
-                       }\r
-                       \r
-               }\r
-               return new String(chars, 0, i);\r
-       }\r
-\r
-       final protected byte[] safeBytes(int amount) throws IOException {\r
-\r
-               byte[] result = new byte[amount];\r
-               \r
-               int has = size-byteIndex;\r
-               if(amount >= has) {\r
-                       ReadableByteChannel c = channel;\r
-               ByteBuffer bb = byteBuffer;\r
-                       System.arraycopy(bytes, byteIndex, result, 0, has);\r
-                       ByteBuffer bb2 = ByteBuffer.wrap(result);\r
-                       bb2.position(has);\r
-                       // For some peculiar reason this seems to avoid OOM with large blocks as compared to c.read(bb2\r
-                       while(has < amount) {\r
-                               int todo = Math.min(amount-has, 65536);\r
-                               bb2.limit(has+todo);\r
-                               int got = c.read(bb2);\r
-                               if(got == -1) throw new IOException("Unexpected end-of-file");\r
-                               has += got; \r
-                       }\r
-                       size = c.read(bb);\r
-                       bb.position(0);\r
-                       byteIndex = 0;\r
-               } else {\r
-                       System.arraycopy(bytes, byteIndex, result, 0, amount);\r
-                       byteIndex += amount;\r
-               }\r
-\r
-               return result;\r
-               \r
-       }\r
-\r
-       final protected int getByte() throws IOException {\r
-           int has = size-byteIndex;\r
-           int result;\r
-        if(has == 0) {\r
-            ReadableByteChannel c = channel;\r
-            ByteBuffer bb = byteBuffer;            \r
-            size = c.read(bb);\r
-            if(size == -1) {\r
-                               throw new EOFException("Unexpected end-of-file");\r
-            }\r
-            bb.position(0);\r
-            byteIndex = 0;\r
-            if(size == 0)\r
-                return -1;\r
-        }\r
-        result = bytes[byteIndex];\r
-        if(result < 0)\r
-            result += 256;\r
-        ++byteIndex;        \r
-        return result;\r
-       }\r
-\r
-       public int getDynamicUInt32() throws IOException {\r
-               int length = getByte()&0xff; \r
-               if(length >= 0x80) {\r
-                       if(length >= 0xc0) {\r
-                               if(length >= 0xe0) {\r
-                                       if(length >= 0xf0) {\r
-                                               length &= 0x0f;\r
-                                               length += ((getByte()&0xff)<<3);\r
-                                               length += ((getByte()&0xff)<<11);\r
-                                               length += ((getByte()&0xff)<<19);\r
-                                               length += 0x10204080;\r
-                                       }\r
-                                       else {\r
-                                               length &= 0x1f;\r
-                                               length += ((getByte()&0xff)<<4);\r
-                                               length += ((getByte()&0xff)<<12);\r
-                                               length += ((getByte()&0xff)<<20);\r
-                                               length += 0x204080;\r
-                                       }\r
-                               }\r
-                               else {\r
-                                       length &= 0x3f;\r
-                                       length += ((getByte()&0xff)<<5);\r
-                                       length += ((getByte()&0xff)<<13);\r
-                                       length += 0x4080;\r
-                               }\r
-                       }\r
-                       else {\r
-                               length &= 0x7f;\r
-                               length += ((getByte()&0xff)<<6);\r
-                               length += 0x80;\r
-                       }\r
-               }\r
-               return length;\r
-       }\r
-\r
-       final protected int safeInt() throws IOException {\r
-\r
-               if(byteIndex >= (size-5)) {\r
-                       int result = 0;\r
-                       ReadableByteChannel c = channel;\r
-               ByteBuffer bb = byteBuffer;\r
-                       if(byteIndex == size) {\r
-                               size = c.read(bb);\r
-                               if(size == -1) throw new EOFException("Unexpected end-of-file");\r
-                               bb.position(0);\r
-                               byteIndex = 0;\r
-                       }\r
-                       result |= ((int)(bytes[byteIndex++]&0xff)<<24);\r
-                       if(byteIndex == size) {\r
-                               size = c.read(bb);\r
-                               if(size == -1) throw new EOFException("Unexpected end-of-file");\r
-                               bb.position(0);\r
-                               byteIndex = 0;\r
-                       }\r
-                       result |= ((int)(bytes[byteIndex++]&0xff)<<16);\r
-                       if(byteIndex == size) {\r
-                               size = c.read(bb);\r
-                               if(size == -1) throw new EOFException("Unexpected end-of-file");\r
-                               bb.position(0);\r
-                               byteIndex = 0;\r
-                       }\r
-                       result |= ((int)(bytes[byteIndex++]&0xff)<<8);\r
-                       if(byteIndex == size) {\r
-                               size = c.read(bb);\r
-                               if(size == -1) throw new EOFException("Unexpected end-of-file");\r
-                               bb.position(0);\r
-                               byteIndex = 0;\r
-                       }\r
-                       result |= ((int)(bytes[byteIndex++]&0xff)<<0);\r
-                       if(byteIndex == size) {\r
-                               size = c.read(bb);\r
-                               bb.position(0);\r
-                               byteIndex = 0;\r
-                       }\r
-                       return result;\r
-               } else {\r
-                       return ((bytes[byteIndex++]&0xff)<<24) | ((bytes[byteIndex++]&0xff)<<16) | ((bytes[byteIndex++]&0xff)<<8) | ((bytes[byteIndex++]&0xff));\r
-               }\r
-               \r
-       }\r
-       \r
-       final protected int getSize() {\r
-               return size;\r
-       }\r
-\r
-       public ByteFileReader(File file, int size) throws IOException {\r
-           \r
-        bytes = new byte[size];\r
-        byteBuffer = ByteBuffer.wrap(bytes);\r
-\r
-        this.file = file;\r
-        \r
-        FileInputStream fis = new FileInputStream(file); \r
-        stream = fis; \r
-        channel = fis.getChannel();\r
-        this.size = channel.read(byteBuffer);\r
-        byteBuffer.position(0);\r
-           \r
-       }\r
-\r
-       public ByteFileReader(FileInputStream stream, int size) throws IOException {\r
-               this(stream, stream.getChannel(), size);\r
-       }\r
-    \r
-       public ByteFileReader(InputStream stream, ReadableByteChannel channel, int size) throws IOException {\r
-           \r
-               bytes = new byte[size];\r
-               byteBuffer = ByteBuffer.wrap(bytes);\r
-\r
-               this.file = null;\r
-               this.stream = stream;\r
-               this.channel = channel;\r
-               this.size = channel.read(byteBuffer);\r
-               byteBuffer.position(0);\r
-               \r
-       }\r
-\r
-       public void close() throws IOException {\r
-               if (stream != null) {\r
-                       stream.close();\r
-                       stream = null;\r
-               }\r
-       }\r
-       \r
-       public void reset() throws IOException {\r
-           \r
-           if(file == null) throw new IllegalStateException("No file - cannot reset");\r
-        \r
-        FileInputStream fis = new FileInputStream(file); \r
-        stream = fis; \r
-        channel = fis.getChannel();\r
-        this.size = channel.read(byteBuffer);\r
-        byteBuffer.position(0);\r
-        \r
-       }\r
-\r
-}\r
+package org.simantics.graph.representation;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UTFDataFormatException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+
+/**
+ * Must be closed after using by invoking {@link #close()}.
+ */
+public class ByteFileReader implements Closeable {
+
+       final char[] chars = new char[3*128];
+
+       final private File file;
+       
+       /**
+        * May be <code>null</code>. If specified, it will be closed in
+        * {@link #close()}.
+        */
+       private InputStream stream;
+
+       /**
+        * A readable channel must always be specified since it is used for all
+        * reading. Channel is never closed by this class.
+        */
+       private ReadableByteChannel channel;
+       
+       final private ByteBuffer byteBuffer;
+       
+       final protected byte[] bytes;
+       private int size;
+
+       protected int byteIndex = 0;
+
+       final protected ReadableByteChannel getChannel() {
+               return channel;
+       }
+       
+       final protected ByteBuffer getByteBuffer() {
+               return byteBuffer;
+       }
+
+       final protected byte[] getBytes() {
+               return bytes;
+
+       }
+
+       final protected String utf(byte[] bytearr, int index, int target) throws UTFDataFormatException {
+               // Copied from DataInputStream
+               int utflen = target - index;
+               char[] chararr = utflen > chars.length ? new char[utflen] : chars;
+
+               int c, char2, char3;
+               int count = index;
+               int chararr_count=0;
+
+               while (count < target) {
+                       c = (int) bytearr[count] & 0xff;
+                       if (c > 127) break;
+                       count++;
+                       chararr[chararr_count++]=(char)c;
+               }
+
+               while (count < target) {
+                       c = (int) bytearr[count] & 0xff;
+                       switch (c >> 4) {
+                       case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
+                               /* 0xxxxxxx*/
+                               count++;
+                               chararr[chararr_count++]=(char)c;
+                               break;
+                       case 12: case 13:
+                               /* 110x xxxx   10xx xxxx*/
+                               count += 2;
+                               if (count > target)
+                                       throw new UTFDataFormatException(
+                                                       "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
+                               char2 = (int) bytearr[count-1];
+                               if ((char2 & 0xC0) != 0x80)
+                                       throw new UTFDataFormatException(
+                                                       "malformed input around byte " + count); 
+                               chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | 
+                                               (char2 & 0x3F));  
+                               break;
+                       case 14:
+                               /* 1110 xxxx  10xx xxxx  10xx xxxx */
+                               count += 3;
+                               if (count > target)
+                                       throw new UTFDataFormatException(
+                                                       "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
+                               char2 = (int) bytearr[count-2];
+                               char3 = (int) bytearr[count-1];
+                               if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+                                       throw new UTFDataFormatException(
+                                                       "malformed input around byte " + (count-1));
+                               chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
+                                               ((char2 & 0x3F) << 6)  |
+                                               ((char3 & 0x3F) << 0));
+                               break;
+                       default:
+                               /* 10xx xxxx,  1111 xxxx */
+                               throw new UTFDataFormatException(
+                                               "malformed input around byte " + count);
+                       }
+               }
+               // The number of chars produced may be less than utflen
+               return new String(chararr, 0, chararr_count);
+       }
+
+       final protected byte[] safeBytes(int amount) throws IOException {
+               byte[] result = new byte[amount];
+               int has = size-byteIndex;
+               if(amount >= has) {
+                       ReadableByteChannel c = channel;
+               ByteBuffer bb = byteBuffer;
+                       System.arraycopy(bytes, byteIndex, result, 0, has);
+                       ByteBuffer bb2 = ByteBuffer.wrap(result);
+                       bb2.position(has);
+                       // For some peculiar reason this seems to avoid OOM with large blocks as compared to c.read(bb2
+                       while(has < amount) {
+                               int todo = Math.min(amount-has, 65536);
+                               bb2.limit(has+todo);
+                               int got = c.read(bb2);
+                               if(got == -1) throw new IOException("Unexpected end-of-file");
+                               has += got; 
+                               // For some unknown reason this is needed!
+                               // Spec indicates that read would increment position but it does not.
+                               bb2.position(has);
+                       }
+                       size = c.read(bb);
+                       bb.position(0);
+                       byteIndex = 0;
+               } else {
+                       System.arraycopy(bytes, byteIndex, result, 0, amount);
+                       byteIndex += amount;
+               }
+
+               return result;
+               
+       }
+
+       final protected int getByte() throws IOException {
+           int has = size-byteIndex;
+           int result;
+        if(has == 0) {
+            ReadableByteChannel c = channel;
+            ByteBuffer bb = byteBuffer;
+            size = c.read(bb);
+            if(size == -1) {
+                               throw new EOFException("Unexpected end-of-file");
+            }
+            bb.position(0);
+            byteIndex = 0;
+            if(size == 0)
+                return -1;
+        }
+        result = bytes[byteIndex++] & 0xff;
+        return result;
+       }
+
+       public int getDynamicUInt32() throws IOException {
+               int length = getByte(); 
+               if(length >= 0x80) {
+                       if(length >= 0xc0) {
+                               if(length >= 0xe0) {
+                                       if(length >= 0xf0) {
+                                               length &= 0x0f;
+                                               length += (getByte()<<3);
+                                               length += (getByte()<<11);
+                                               length += (getByte()<<19);
+                                               length += 0x10204080;
+                                       }
+                                       else {
+                                               length &= 0x1f;
+                                               length += (getByte()<<4);
+                                               length += (getByte()<<12);
+                                               length += (getByte()<<20);
+                                               length += 0x204080;
+                                       }
+                               }
+                               else {
+                                       length &= 0x3f;
+                                       length += (getByte()<<5);
+                                       length += (getByte()<<13);
+                                       length += 0x4080;
+                               }
+                       }
+                       else {
+                               length &= 0x7f;
+                               length += (getByte()<<6);
+                               length += 0x80;
+                       }
+               }
+               return length;
+       }
+
+       final protected int safeInt() throws IOException {
+
+               byte[] bytes = this.bytes;
+
+               if(byteIndex >= (size-5)) {
+                       int result = 0;
+                       ReadableByteChannel c = channel;
+                       ByteBuffer bb = byteBuffer;
+                       if(byteIndex == size) {
+                               size = c.read(bb);
+                               if(size == -1) throw new EOFException("Unexpected end-of-file");
+                               bb.position(0);
+                               byteIndex = 0;
+                       }
+                       result |= ((int)(bytes[byteIndex++]&0xff)<<24);
+                       if(byteIndex == size) {
+                               size = c.read(bb);
+                               if(size == -1) throw new EOFException("Unexpected end-of-file");
+                               bb.position(0);
+                               byteIndex = 0;
+                       }
+                       result |= ((int)(bytes[byteIndex++]&0xff)<<16);
+                       if(byteIndex == size) {
+                               size = c.read(bb);
+                               if(size == -1) throw new EOFException("Unexpected end-of-file");
+                               bb.position(0);
+                               byteIndex = 0;
+                       }
+                       result |= ((int)(bytes[byteIndex++]&0xff)<<8);
+                       if(byteIndex == size) {
+                               size = c.read(bb);
+                               if(size == -1) throw new EOFException("Unexpected end-of-file");
+                               bb.position(0);
+                               byteIndex = 0;
+                       }
+                       result |= ((int)(bytes[byteIndex++]&0xff));
+                       if(byteIndex == size) {
+                               size = c.read(bb);
+                               bb.position(0);
+                               byteIndex = 0;
+                       }
+                       return result;
+               } else {
+                       return ((bytes[byteIndex++]&0xff)<<24) | ((bytes[byteIndex++]&0xff)<<16) | ((bytes[byteIndex++]&0xff)<<8) | ((bytes[byteIndex++]&0xff));
+               }
+
+       }
+
+       final protected int getSize() {
+               return size;
+       }
+
+       public ByteFileReader(File file, int size) throws IOException {
+           
+        bytes = new byte[size];
+        byteBuffer = ByteBuffer.wrap(bytes);
+
+        this.file = file;
+        
+        FileInputStream fis = new FileInputStream(file); 
+        stream = fis; 
+        channel = fis.getChannel();
+        this.size = channel.read(byteBuffer);
+        byteBuffer.position(0);
+           
+       }
+
+       public ByteFileReader(FileInputStream stream, int size) throws IOException {
+               this(stream, stream.getChannel(), size);
+       }
+    
+       public ByteFileReader(InputStream stream, ReadableByteChannel channel, int size) throws IOException {
+           
+               bytes = new byte[size];
+               byteBuffer = ByteBuffer.wrap(bytes);
+
+               this.file = null;
+               this.stream = stream;
+               this.channel = channel;
+               this.size = channel.read(byteBuffer);
+               byteBuffer.position(0);
+               
+       }
+
+       public void close() throws IOException {
+               if (stream != null) {
+                       stream.close();
+                       stream = null;
+               }
+       }
+       
+       public void reset() throws IOException {
+           
+           if(file == null) throw new IllegalStateException("No file - cannot reset");
+        
+        FileInputStream fis = new FileInputStream(file); 
+        stream = fis; 
+        channel = fis.getChannel();
+        this.size = channel.read(byteBuffer);
+        byteBuffer.position(0);
+        
+       }
+
+}