]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java
Modified UTF-8 decoding fixes for TG reading and indexing
[simantics/platform.git] / bundles / org.simantics.graph / src / org / simantics / graph / representation / ByteFileReader.java
1 package org.simantics.graph.representation;
2
3 import java.io.Closeable;
4 import java.io.EOFException;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.UTFDataFormatException;
10 import java.nio.ByteBuffer;
11 import java.nio.channels.ReadableByteChannel;
12
13 /**
14  * Must be closed after using by invoking {@link #close()}.
15  */
16 public class ByteFileReader implements Closeable {
17
18         final char[] chars = new char[3*128];
19
20         final private File file;
21         
22         /**
23          * May be <code>null</code>. If specified, it will be closed in
24          * {@link #close()}.
25          */
26         private InputStream stream;
27
28         /**
29          * A readable channel must always be specified since it is used for all
30          * reading. Channel is never closed by this class.
31          */
32         private ReadableByteChannel channel;
33         
34         final private ByteBuffer byteBuffer;
35         
36         final protected byte[] bytes;
37         private int size;
38
39         protected int byteIndex = 0;
40
41         final protected ReadableByteChannel getChannel() {
42                 return channel;
43         }
44         
45         final protected ByteBuffer getByteBuffer() {
46                 return byteBuffer;
47         }
48
49         final protected byte[] getBytes() {
50                 return bytes;
51
52         }
53
54         final protected String utf(byte[] bytearr, int index, int target) throws UTFDataFormatException {
55                 // Copied from DataInputStream
56                 int utflen = target - index;
57                 char[] chararr = utflen > chars.length ? new char[utflen] : chars;
58
59                 int c, char2, char3;
60                 int count = index;
61                 int chararr_count=0;
62
63                 while (count < target) {
64                         c = (int) bytearr[count] & 0xff;
65                         if (c > 127) break;
66                         count++;
67                         chararr[chararr_count++]=(char)c;
68                 }
69
70                 while (count < target) {
71                         c = (int) bytearr[count] & 0xff;
72                         switch (c >> 4) {
73                         case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
74                                 /* 0xxxxxxx*/
75                                 count++;
76                                 chararr[chararr_count++]=(char)c;
77                                 break;
78                         case 12: case 13:
79                                 /* 110x xxxx   10xx xxxx*/
80                                 count += 2;
81                                 if (count > target)
82                                         throw new UTFDataFormatException(
83                                                         "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
84                                 char2 = (int) bytearr[count-1];
85                                 if ((char2 & 0xC0) != 0x80)
86                                         throw new UTFDataFormatException(
87                                                         "malformed input around byte " + count); 
88                                 chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | 
89                                                 (char2 & 0x3F));  
90                                 break;
91                         case 14:
92                                 /* 1110 xxxx  10xx xxxx  10xx xxxx */
93                                 count += 3;
94                                 if (count > target)
95                                         throw new UTFDataFormatException(
96                                                         "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
97                                 char2 = (int) bytearr[count-2];
98                                 char3 = (int) bytearr[count-1];
99                                 if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
100                                         throw new UTFDataFormatException(
101                                                         "malformed input around byte " + (count-1));
102                                 chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
103                                                 ((char2 & 0x3F) << 6)  |
104                                                 ((char3 & 0x3F) << 0));
105                                 break;
106                         default:
107                                 /* 10xx xxxx,  1111 xxxx */
108                                 throw new UTFDataFormatException(
109                                                 "malformed input around byte " + count);
110                         }
111                 }
112                 // The number of chars produced may be less than utflen
113                 return new String(chararr, 0, chararr_count);
114         }
115
116         final protected byte[] safeBytes(int amount) throws IOException {
117                 byte[] result = new byte[amount];
118                 int has = size-byteIndex;
119                 if(amount >= has) {
120                         ReadableByteChannel c = channel;
121                 ByteBuffer bb = byteBuffer;
122                         System.arraycopy(bytes, byteIndex, result, 0, has);
123                         ByteBuffer bb2 = ByteBuffer.wrap(result);
124                         bb2.position(has);
125                         // For some peculiar reason this seems to avoid OOM with large blocks as compared to c.read(bb2
126                         while(has < amount) {
127                                 int todo = Math.min(amount-has, 65536);
128                                 bb2.limit(has+todo);
129                                 int got = c.read(bb2);
130                                 if(got == -1) throw new IOException("Unexpected end-of-file");
131                                 has += got; 
132                                 // For some unknown reason this is needed!
133                                 // Spec indicates that read would increment position but it does not.
134                                 bb2.position(has);
135                         }
136                         size = c.read(bb);
137                         bb.position(0);
138                         byteIndex = 0;
139                 } else {
140                         System.arraycopy(bytes, byteIndex, result, 0, amount);
141                         byteIndex += amount;
142                 }
143
144                 return result;
145                 
146         }
147
148         final protected int getByte() throws IOException {
149             int has = size-byteIndex;
150             int result;
151         if(has == 0) {
152             ReadableByteChannel c = channel;
153             ByteBuffer bb = byteBuffer;
154             size = c.read(bb);
155             if(size == -1) {
156                                 throw new EOFException("Unexpected end-of-file");
157             }
158             bb.position(0);
159             byteIndex = 0;
160             if(size == 0)
161                 return -1;
162         }
163         result = bytes[byteIndex++] & 0xff;
164         return result;
165         }
166
167         public int getDynamicUInt32() throws IOException {
168                 int length = getByte(); 
169                 if(length >= 0x80) {
170                         if(length >= 0xc0) {
171                                 if(length >= 0xe0) {
172                                         if(length >= 0xf0) {
173                                                 length &= 0x0f;
174                                                 length += (getByte()<<3);
175                                                 length += (getByte()<<11);
176                                                 length += (getByte()<<19);
177                                                 length += 0x10204080;
178                                         }
179                                         else {
180                                                 length &= 0x1f;
181                                                 length += (getByte()<<4);
182                                                 length += (getByte()<<12);
183                                                 length += (getByte()<<20);
184                                                 length += 0x204080;
185                                         }
186                                 }
187                                 else {
188                                         length &= 0x3f;
189                                         length += (getByte()<<5);
190                                         length += (getByte()<<13);
191                                         length += 0x4080;
192                                 }
193                         }
194                         else {
195                                 length &= 0x7f;
196                                 length += (getByte()<<6);
197                                 length += 0x80;
198                         }
199                 }
200                 return length;
201         }
202
203         final protected int safeInt() throws IOException {
204
205                 byte[] bytes = this.bytes;
206
207                 if(byteIndex >= (size-5)) {
208                         int result = 0;
209                         ReadableByteChannel c = channel;
210                         ByteBuffer bb = byteBuffer;
211                         if(byteIndex == size) {
212                                 size = c.read(bb);
213                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
214                                 bb.position(0);
215                                 byteIndex = 0;
216                         }
217                         result |= ((int)(bytes[byteIndex++]&0xff)<<24);
218                         if(byteIndex == size) {
219                                 size = c.read(bb);
220                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
221                                 bb.position(0);
222                                 byteIndex = 0;
223                         }
224                         result |= ((int)(bytes[byteIndex++]&0xff)<<16);
225                         if(byteIndex == size) {
226                                 size = c.read(bb);
227                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
228                                 bb.position(0);
229                                 byteIndex = 0;
230                         }
231                         result |= ((int)(bytes[byteIndex++]&0xff)<<8);
232                         if(byteIndex == size) {
233                                 size = c.read(bb);
234                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
235                                 bb.position(0);
236                                 byteIndex = 0;
237                         }
238                         result |= ((int)(bytes[byteIndex++]&0xff));
239                         if(byteIndex == size) {
240                                 size = c.read(bb);
241                                 bb.position(0);
242                                 byteIndex = 0;
243                         }
244                         return result;
245                 } else {
246                         return ((bytes[byteIndex++]&0xff)<<24) | ((bytes[byteIndex++]&0xff)<<16) | ((bytes[byteIndex++]&0xff)<<8) | ((bytes[byteIndex++]&0xff));
247                 }
248
249         }
250
251         final protected int getSize() {
252                 return size;
253         }
254
255         public ByteFileReader(File file, int size) throws IOException {
256             
257         bytes = new byte[size];
258         byteBuffer = ByteBuffer.wrap(bytes);
259
260         this.file = file;
261         
262         FileInputStream fis = new FileInputStream(file); 
263         stream = fis; 
264         channel = fis.getChannel();
265         this.size = channel.read(byteBuffer);
266         byteBuffer.position(0);
267             
268         }
269
270         public ByteFileReader(FileInputStream stream, int size) throws IOException {
271                 this(stream, stream.getChannel(), size);
272         }
273     
274         public ByteFileReader(InputStream stream, ReadableByteChannel channel, int size) throws IOException {
275             
276                 bytes = new byte[size];
277                 byteBuffer = ByteBuffer.wrap(bytes);
278
279                 this.file = null;
280                 this.stream = stream;
281                 this.channel = channel;
282                 this.size = channel.read(byteBuffer);
283                 byteBuffer.position(0);
284                 
285         }
286
287         public void close() throws IOException {
288                 if (stream != null) {
289                         stream.close();
290                         stream = null;
291                 }
292         }
293         
294         public void reset() throws IOException {
295             
296             if(file == null) throw new IllegalStateException("No file - cannot reset");
297         
298         FileInputStream fis = new FileInputStream(file); 
299         stream = fis; 
300         channel = fis.getChannel();
301         this.size = channel.read(byteBuffer);
302         byteBuffer.position(0);
303         
304         }
305
306 }