X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Forg.simantics.graph%2Fsrc%2Forg%2Fsimantics%2Fgraph%2Frepresentation%2FByteFileReader.java;h=3a76a911a0d3c269c4c72cf5053b742aa8579432;hp=e930a00fcad35b9c36bc323bd119c165f8f495dc;hb=2bbecd30ee49d821a0abeaf6d417b5a7fdbe015f;hpb=5b3cb26acc141f8ec129ccc8bd087ced2f5b9dbb diff --git a/bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java b/bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java index e930a00fc..3a76a911a 100644 --- a/bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java +++ b/bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java @@ -6,6 +6,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UTFDataFormatException; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; @@ -49,23 +50,67 @@ public class ByteFileReader implements Closeable { return bytes; } - final protected String utf(byte[] bytes, int index, int target) { - int i = 0; - while(index < target) { - int c = bytes[index++]&0xff; - if(c <= 0x7F) { - chars[i++] = (char)(c&0x7F); - } else if (c > 0x07FF) { - int c2 = bytes[index++]&0xff; - int c3 = bytes[index++]&0xff; - chars[i++] = (char)(((c&0xf)<<12) + ((c2&0x3f)<<6) + (c3&0x3f)); - } else { - int c2 = bytes[index++]&0xff; - chars[i++] = (char)(((c&0x1f)<<6) + (c2&0x3f)); + + final protected String utf(byte[] bytearr, int index, int target) throws UTFDataFormatException { + // Copied from DataInputStream + int utflen = target - index; + char[] chararr = utflen > chars.length ? new char[utflen] : chars; + + int c, char2, char3; + int count = index; + int chararr_count=0; + + while (count < target) { + c = (int) bytearr[count] & 0xff; + if (c > 127) break; + count++; + chararr[chararr_count++]=(char)c; + } + + while (count < target) { + c = (int) bytearr[count] & 0xff; + switch (c >> 4) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: + /* 0xxxxxxx*/ + count++; + chararr[chararr_count++]=(char)c; + break; + case 12: case 13: + /* 110x xxxx 10xx xxxx*/ + count += 2; + if (count > target) + throw new UTFDataFormatException( + "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")"); + char2 = (int) bytearr[count-1]; + if ((char2 & 0xC0) != 0x80) + throw new UTFDataFormatException( + "malformed input around byte " + count); + chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | + (char2 & 0x3F)); + break; + case 14: + /* 1110 xxxx 10xx xxxx 10xx xxxx */ + count += 3; + if (count > target) + throw new UTFDataFormatException( + "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")"); + char2 = (int) bytearr[count-2]; + char3 = (int) bytearr[count-1]; + if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) + throw new UTFDataFormatException( + "malformed input around byte " + (count-1)); + chararr[chararr_count++]=(char)(((c & 0x0F) << 12) | + ((char2 & 0x3F) << 6) | + ((char3 & 0x3F) << 0)); + break; + default: + /* 10xx xxxx, 1111 xxxx */ + throw new UTFDataFormatException( + "malformed input around byte " + count); } - } - return new String(chars, 0, i); + // The number of chars produced may be less than utflen + return new String(chararr, 0, chararr_count); } final protected byte[] safeBytes(int amount) throws IOException {