]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/org.simantics.graph/src/org/simantics/graph/representation/ByteFileReader.java
Modified UTF-8 decoding fixes for TG reading and indexing
[simantics/platform.git] / bundles / org.simantics.graph / src / org / simantics / graph / representation / ByteFileReader.java
1 package org.simantics.graph.representation;
2
3 import java.io.Closeable;
4 import java.io.EOFException;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.UTFDataFormatException;
10 import java.nio.ByteBuffer;
11 import java.nio.channels.ReadableByteChannel;
12
13 /**
14  * Must be closed after using by invoking {@link #close()}.
15  */
16 public class ByteFileReader implements Closeable {
17
18         final char[] chars = new char[3*128];
19
20         final private File file;
21         
22         /**
23          * May be <code>null</code>. If specified, it will be closed in
24          * {@link #close()}.
25          */
26         private InputStream stream;
27
28         /**
29          * A readable channel must always be specified since it is used for all
30          * reading. Channel is never closed by this class.
31          */
32         private ReadableByteChannel channel;
33         
34         final private ByteBuffer byteBuffer;
35         
36         final protected byte[] bytes;
37         private int size;
38
39         protected int byteIndex = 0;
40
41         final protected ReadableByteChannel getChannel() {
42                 return channel;
43         }
44         
45         final protected ByteBuffer getByteBuffer() {
46                 return byteBuffer;
47         }
48
49         final protected byte[] getBytes() {
50                 return bytes;
51
52         }
53
54         final protected String utf(byte[] bytearr, int index, int target) throws UTFDataFormatException {
55                 // Copied from DataInputStream
56                 int utflen = target - index;
57                 char[] chararr = utflen > chars.length ? new char[utflen] : chars;
58
59                 int c, char2, char3;
60                 int count = index;
61                 int chararr_count=0;
62
63                 while (count < target) {
64                         c = (int) bytearr[count] & 0xff;
65                         if (c > 127) break;
66                         count++;
67                         chararr[chararr_count++]=(char)c;
68                 }
69
70                 while (count < target) {
71                         c = (int) bytearr[count] & 0xff;
72                         switch (c >> 4) {
73                         case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
74                                 /* 0xxxxxxx*/
75                                 count++;
76                                 chararr[chararr_count++]=(char)c;
77                                 break;
78                         case 12: case 13:
79                                 /* 110x xxxx   10xx xxxx*/
80                                 count += 2;
81                                 if (count > target)
82                                         throw new UTFDataFormatException(
83                                                         "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
84                                 char2 = (int) bytearr[count-1];
85                                 if ((char2 & 0xC0) != 0x80)
86                                         throw new UTFDataFormatException(
87                                                         "malformed input around byte " + count); 
88                                 chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | 
89                                                 (char2 & 0x3F));  
90                                 break;
91                         case 14:
92                                 /* 1110 xxxx  10xx xxxx  10xx xxxx */
93                                 count += 3;
94                                 if (count > target)
95                                         throw new UTFDataFormatException(
96                                                         "malformed input: partial character at end (" + (count-index) + " > " + utflen + ")");
97                                 char2 = (int) bytearr[count-2];
98                                 char3 = (int) bytearr[count-1];
99                                 if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
100                                         throw new UTFDataFormatException(
101                                                         "malformed input around byte " + (count-1));
102                                 chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
103                                                 ((char2 & 0x3F) << 6)  |
104                                                 ((char3 & 0x3F) << 0));
105                                 break;
106                         default:
107                                 /* 10xx xxxx,  1111 xxxx */
108                                 throw new UTFDataFormatException(
109                                                 "malformed input around byte " + count);
110                         }
111                 }
112                 // The number of chars produced may be less than utflen
113                 return new String(chararr, 0, chararr_count);
114         }
115
116         final protected byte[] safeBytes(int amount) throws IOException {
117
118                 byte[] result = new byte[amount];
119                 
120                 int has = size-byteIndex;
121                 if(amount >= has) {
122                         ReadableByteChannel c = channel;
123                 ByteBuffer bb = byteBuffer;
124                         System.arraycopy(bytes, byteIndex, result, 0, has);
125                         ByteBuffer bb2 = ByteBuffer.wrap(result);
126                         bb2.position(has);
127                         // For some peculiar reason this seems to avoid OOM with large blocks as compared to c.read(bb2
128                         while(has < amount) {
129                                 int todo = Math.min(amount-has, 65536);
130                                 bb2.limit(has+todo);
131                                 int got = c.read(bb2);
132                                 if(got == -1) throw new IOException("Unexpected end-of-file");
133                                 has += got; 
134                         }
135                         size = c.read(bb);
136                         bb.position(0);
137                         byteIndex = 0;
138                 } else {
139                         System.arraycopy(bytes, byteIndex, result, 0, amount);
140                         byteIndex += amount;
141                 }
142
143                 return result;
144                 
145         }
146
147         final protected int getByte() throws IOException {
148             int has = size-byteIndex;
149             int result;
150         if(has == 0) {
151             ReadableByteChannel c = channel;
152             ByteBuffer bb = byteBuffer;            
153             size = c.read(bb);
154             if(size == -1) {
155                                 throw new EOFException("Unexpected end-of-file");
156             }
157             bb.position(0);
158             byteIndex = 0;
159             if(size == 0)
160                 return -1;
161         }
162         result = bytes[byteIndex];
163         if(result < 0)
164             result += 256;
165         ++byteIndex;        
166         return result;
167         }
168
169         public int getDynamicUInt32() throws IOException {
170                 int length = getByte()&0xff; 
171                 if(length >= 0x80) {
172                         if(length >= 0xc0) {
173                                 if(length >= 0xe0) {
174                                         if(length >= 0xf0) {
175                                                 length &= 0x0f;
176                                                 length += ((getByte()&0xff)<<3);
177                                                 length += ((getByte()&0xff)<<11);
178                                                 length += ((getByte()&0xff)<<19);
179                                                 length += 0x10204080;
180                                         }
181                                         else {
182                                                 length &= 0x1f;
183                                                 length += ((getByte()&0xff)<<4);
184                                                 length += ((getByte()&0xff)<<12);
185                                                 length += ((getByte()&0xff)<<20);
186                                                 length += 0x204080;
187                                         }
188                                 }
189                                 else {
190                                         length &= 0x3f;
191                                         length += ((getByte()&0xff)<<5);
192                                         length += ((getByte()&0xff)<<13);
193                                         length += 0x4080;
194                                 }
195                         }
196                         else {
197                                 length &= 0x7f;
198                                 length += ((getByte()&0xff)<<6);
199                                 length += 0x80;
200                         }
201                 }
202                 return length;
203         }
204
205         final protected int safeInt() throws IOException {
206
207                 if(byteIndex >= (size-5)) {
208                         int result = 0;
209                         ReadableByteChannel c = channel;
210                 ByteBuffer bb = byteBuffer;
211                         if(byteIndex == size) {
212                                 size = c.read(bb);
213                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
214                                 bb.position(0);
215                                 byteIndex = 0;
216                         }
217                         result |= ((int)(bytes[byteIndex++]&0xff)<<24);
218                         if(byteIndex == size) {
219                                 size = c.read(bb);
220                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
221                                 bb.position(0);
222                                 byteIndex = 0;
223                         }
224                         result |= ((int)(bytes[byteIndex++]&0xff)<<16);
225                         if(byteIndex == size) {
226                                 size = c.read(bb);
227                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
228                                 bb.position(0);
229                                 byteIndex = 0;
230                         }
231                         result |= ((int)(bytes[byteIndex++]&0xff)<<8);
232                         if(byteIndex == size) {
233                                 size = c.read(bb);
234                                 if(size == -1) throw new EOFException("Unexpected end-of-file");
235                                 bb.position(0);
236                                 byteIndex = 0;
237                         }
238                         result |= ((int)(bytes[byteIndex++]&0xff)<<0);
239                         if(byteIndex == size) {
240                                 size = c.read(bb);
241                                 bb.position(0);
242                                 byteIndex = 0;
243                         }
244                         return result;
245                 } else {
246                         return ((bytes[byteIndex++]&0xff)<<24) | ((bytes[byteIndex++]&0xff)<<16) | ((bytes[byteIndex++]&0xff)<<8) | ((bytes[byteIndex++]&0xff));
247                 }
248                 
249         }
250         
251         final protected int getSize() {
252                 return size;
253         }
254
255         public ByteFileReader(File file, int size) throws IOException {
256             
257         bytes = new byte[size];
258         byteBuffer = ByteBuffer.wrap(bytes);
259
260         this.file = file;
261         
262         FileInputStream fis = new FileInputStream(file); 
263         stream = fis; 
264         channel = fis.getChannel();
265         this.size = channel.read(byteBuffer);
266         byteBuffer.position(0);
267             
268         }
269
270         public ByteFileReader(FileInputStream stream, int size) throws IOException {
271                 this(stream, stream.getChannel(), size);
272         }
273     
274         public ByteFileReader(InputStream stream, ReadableByteChannel channel, int size) throws IOException {
275             
276                 bytes = new byte[size];
277                 byteBuffer = ByteBuffer.wrap(bytes);
278
279                 this.file = null;
280                 this.stream = stream;
281                 this.channel = channel;
282                 this.size = channel.read(byteBuffer);
283                 byteBuffer.position(0);
284                 
285         }
286
287         public void close() throws IOException {
288                 if (stream != null) {
289                         stream.close();
290                         stream = null;
291                 }
292         }
293         
294         public void reset() throws IOException {
295             
296             if(file == null) throw new IllegalStateException("No file - cannot reset");
297         
298         FileInputStream fis = new FileInputStream(file); 
299         stream = fis; 
300         channel = fis.getChannel();
301         this.size = channel.read(byteBuffer);
302         byteBuffer.position(0);
303         
304         }
305
306 }