1 /*******************************************************************************
\r
2 * Copyright (c) 2007, 2011 Association for Decentralized Information Management
\r
3 * in Industry THTH ry.
\r
4 * All rights reserved. This program and the accompanying materials
\r
5 * are made available under the terms of the Eclipse Public License v1.0
\r
6 * which accompanies this distribution, and is available at
\r
7 * http://www.eclipse.org/legal/epl-v10.html
\r
10 * VTT Technical Research Centre of Finland - initial API and implementation
\r
11 *******************************************************************************/
\r
12 package org.simantics.fastlz.java;
\r
16 * @author Tuukka Lehtonen
\r
18 public class FastLZImpl {
\r
21 private static final int MAX_LEN = 264;
\r
22 private static final int MAX_COPY = 32;
\r
23 // For level 1 compression
\r
24 private static final int MAX_DISTANCE = 8192;
\r
25 // For level 2 compression
\r
26 private static final int MAX_DISTANCE2 = 8191;
\r
27 private static final int MAX_FARDISTANCE = (65535 + MAX_DISTANCE2 - 1);
\r
29 private static final boolean FASTLZ_SAFE = false;
\r
31 public static final int FASTLZ_VERSION = 0x000100;
\r
33 public static final int FASTLZ_VERSION_MAJOR = 0;
\r
34 public static final int FASTLZ_VERSION_MINOR = 1;
\r
35 public static final int FASTLZ_VERSION_REVISION = 0;
\r
37 public static final String FASTLZ_VERSION_STRING = "0.1.0";
\r
39 private static final int HASH_LOG = 13;
\r
40 private static final int HASH_SIZE = (1 << HASH_LOG);
\r
41 private static final int HASH_MASK = (HASH_SIZE - 1);
\r
43 private static final int FASTLZ_READU16(byte[] array, int offset) {
\r
44 final int b0 = array[offset] & 0xff;
\r
45 final int b1 = array[offset + 1] & 0xff;
\r
46 return b0 | (b1 << 8);
\r
54 private static final int HASH_FUNCTION(byte[] array, int offset) {
\r
55 final int b0 = array[offset] & 0xff;
\r
56 final int b1 = array[offset+1] & 0xff;
\r
57 final int b2 = array[offset+2] & 0xff;
\r
58 int v = b0 | (b1 << 8);
\r
59 final int v2 = b1 | (b2 << 8);
\r
60 v ^= v2 ^ (v >>> (16 - HASH_LOG));
\r
66 Compress a block of data in the input buffer and returns the size of
\r
67 compressed block. The size of input buffer is specified by length. The
\r
68 minimum input buffer size is 16.
\r
70 The output buffer must be at least 5% larger than the input buffer
\r
71 and can not be smaller than 66 bytes.
\r
73 If the input is not compressible, the return value might be larger than
\r
74 length (input buffer size).
\r
76 The input buffer and the output buffer can not overlap.
\r
78 public static int fastlz_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {
\r
79 /* for short block, choose fastlz1 */
\r
81 return fastlz1_compress(input, inputOffset, length, output, outputOffset);
\r
84 return fastlz2_compress(input, inputOffset, length, output, outputOffset);
\r
88 Decompress a block of compressed data and returns the size of the
\r
89 decompressed block. If error occurs, e.g. the compressed data is
\r
90 corrupted or the output buffer is not large enough, then 0 (zero)
\r
91 will be returned instead.
\r
93 The input buffer and the output buffer can not overlap.
\r
95 Decompression is memory safe and guaranteed not to write the output buffer
\r
96 more than what is specified in maxout.
\r
98 public static int fastlz_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
\r
100 // magic identifier for compression level
\r
101 int level = (((int) input[0] & 0xff) >>> 5) + 1;
\r
104 return fastlz1_decompress(input, inputOffset, length, output, outputOffset, maxout);
\r
106 return fastlz2_decompress(input, inputOffset, length, output, outputOffset, maxout);
\r
108 // unknown level, trigger error
\r
113 Compress a block of data in the input buffer and returns the size of
\r
114 compressed block. The size of input buffer is specified by length. The
\r
115 minimum input buffer size is 16.
\r
117 The output buffer must be at least 5% larger than the input buffer
\r
118 and can not be smaller than 66 bytes.
\r
120 If the input is not compressible, the return value might be larger than
\r
121 length (input buffer size).
\r
123 The input buffer and the output buffer can not overlap.
\r
125 Compression level can be specified in parameter level. At the moment,
\r
126 only level 1 and level 2 are supported.
\r
127 Level 1 is the fastest compression and generally useful for short data.
\r
128 Level 2 is slightly slower but it gives better compression ratio.
\r
130 Note that the compressed data, regardless of the level, can always be
\r
131 decompressed using the function fastlz_decompress above.
\r
133 public static int fastlz_compress_level(int level, byte[] input, int inputOffset, int length, byte[] output,
\r
134 int outputOffset) {
\r
136 return fastlz1_compress(input, inputOffset, length, output, outputOffset);
\r
138 return fastlz2_compress(input, inputOffset, length, output, outputOffset);
\r
144 private static int fastlz1_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {
\r
145 int ip = inputOffset;
\r
146 int ip_bound = inputOffset + length - 2;
\r
147 int ip_limit = inputOffset + length - 12;
\r
148 int op = outputOffset;
\r
150 int[] htab = new int[HASH_SIZE];
\r
161 // create literal copy only
\r
162 output[op++] = (byte) (length-1);
\r
164 while(ip <= ip_bound)
\r
165 output[op++] = input[ip++];
\r
172 // initializes hash table
\r
173 for (hslot = 0; hslot < HASH_SIZE; ++hslot)
\r
176 // we start with literal copy
\r
178 output[op++] = MAX_COPY-1;
\r
179 output[op++] = input[ip++];
\r
180 output[op++] = input[ip++];
\r
183 while (ip < ip_limit) {
\r
187 // minimum match length
\r
190 // comparison starting-point
\r
193 // find potential match
\r
194 hval = HASH_FUNCTION(input, ip);
\r
198 // calculate distance to the match
\r
199 distance = anchor - ref;
\r
201 // update hash table
\r
202 htab[hslot] = anchor;
\r
204 // is this a match? check the first 3 bytes
\r
206 (distance >= MAX_DISTANCE) ||
\r
207 input[ref++] != input[ip++] ||
\r
208 input[ref++] != input[ip++] ||
\r
209 input[ref++] != input[ip++])
\r
211 output[op++] = input[anchor++];
\r
214 if (copy == MAX_COPY) {
\r
216 output[op++] = MAX_COPY-1;
\r
219 // last matched byte
\r
222 // distance is biased
\r
225 if(distance == 0) {
\r
226 // zero distance means a run
\r
227 byte x = input[ip - 1];
\r
228 while (ip < ip_bound)
\r
229 if (input[ref++] != x) break;
\r
233 // safe because the outer check against ip limit
\r
234 if(input[ref++] != input[ip++]) break;
\r
235 if(input[ref++] != input[ip++]) break;
\r
236 if(input[ref++] != input[ip++]) break;
\r
237 if(input[ref++] != input[ip++]) break;
\r
238 if(input[ref++] != input[ip++]) break;
\r
239 if(input[ref++] != input[ip++]) break;
\r
240 if(input[ref++] != input[ip++]) break;
\r
241 if(input[ref++] != input[ip++]) break;
\r
242 while (ip < ip_bound)
\r
243 if (input[ref++] != input[ip++]) break;
\r
248 // if we have copied something, adjust the copy count
\r
250 // copy is biased, '0' means 1 byte copy
\r
251 output[op-copy-1] = (byte)(copy-1);
\r
253 // back, to overwrite the copy count
\r
256 // reset literal counter
\r
259 // length is biased, '1' means a match of 3 bytes
\r
263 // encode the match
\r
264 if (len > MAX_LEN - 2)
\r
265 while (len > MAX_LEN - 2) {
\r
266 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
\r
267 output[op++] = (byte) (MAX_LEN - 2 - 7 -2);
\r
268 output[op++] = (byte) (distance & 0xff);
\r
269 len -= MAX_LEN - 2;
\r
273 output[op++] = (byte) ((len << 5) + (distance >>> 8));
\r
274 output[op++] = (byte) (distance & 255);
\r
276 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
\r
277 output[op++] = (byte) (len - 7);
\r
278 output[op++] = (byte) (distance & 255);
\r
281 // update the hash at match boundary
\r
282 hval = HASH_FUNCTION(input, ip);
\r
284 hval = HASH_FUNCTION(input, ip);
\r
287 // assuming literal copy
\r
288 output[op++] = MAX_COPY-1;
\r
292 // left-over as literal copy
\r
294 while (ip <= ip_bound) {
\r
295 output[op++] = input[ip++];
\r
297 if (copy == MAX_COPY) {
\r
299 output[op++] = MAX_COPY-1;
\r
303 // if we have copied something, adjust the copy length
\r
305 output[op - copy - 1] = (byte) (copy - 1);
\r
309 return op - outputOffset;
\r
312 private static int fastlz2_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset)
\r
314 int ip = inputOffset;
\r
315 int ip_bound = ip + length - 2;
\r
316 int ip_limit = ip + length - 12;
\r
317 int op = outputOffset;
\r
319 int[] htab = new int[HASH_SIZE];
\r
328 // create literal copy only
\r
329 output[op++] = (byte) (length - 1);
\r
331 while(ip <= ip_bound)
\r
332 output[op++] = input[ip++];
\r
338 // initializes hash table
\r
339 for (hslot = 0; hslot < HASH_SIZE; ++hslot)
\r
342 // we start with literal copy
\r
344 output[op++] = MAX_COPY-1;
\r
345 output[op++] = input[ip++];
\r
346 output[op++] = input[ip++];
\r
349 while (ip < ip_limit) {
\r
353 // minimum match length
\r
356 // comparison starting-point
\r
360 if (input[ip] == input[ip-1] && FASTLZ_READU16(input, ip-1) == FASTLZ_READU16(input, ip+1)) {
\r
363 ref = anchor - 1 + 3;
\r
365 // NOTE: cannot do this goto, so copied the code here from line 487
\r
368 // last matched byte
\r
371 // distance is biased
\r
374 if (distance == 0) {
\r
375 // zero distance means a run
\r
376 byte x = input[ip - 1];
\r
377 while (ip < ip_bound)
\r
378 if (input[ref++] != x) break;
\r
382 // safe because the outer check against ip limit
\r
383 if(input[ref++] != input[ip++]) break;
\r
384 if(input[ref++] != input[ip++]) break;
\r
385 if(input[ref++] != input[ip++]) break;
\r
386 if(input[ref++] != input[ip++]) break;
\r
387 if(input[ref++] != input[ip++]) break;
\r
388 if(input[ref++] != input[ip++]) break;
\r
389 if(input[ref++] != input[ip++]) break;
\r
390 if(input[ref++] != input[ip++]) break;
\r
391 while (ip < ip_bound)
\r
392 if (input[ref++] != input[ip++]) break;
\r
397 // if we have copied something, adjust the copy count
\r
399 // copy is biased, '0' means 1 byte copy
\r
400 output[op - copy - 1] = (byte) (copy - 1);
\r
402 // back, to overwrite the copy count
\r
405 // reset literal counter
\r
408 // length is biased, '1' means a match of 3 bytes
\r
412 // encode the match
\r
413 if (distance < MAX_DISTANCE2) {
\r
415 output[op++] = (byte) ((len << 5) + (distance >>> 8));
\r
416 output[op++] = (byte) (distance & 255);
\r
418 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
\r
419 for (len -= 7; len >= 255; len -= 255)
\r
420 output[op++] = (byte) 255;
\r
421 output[op++] = (byte) len;
\r
422 output[op++] = (byte) (distance & 255);
\r
425 // far away, but not yet in the another galaxy...
\r
427 distance -= MAX_DISTANCE2;
\r
428 output[op++] = (byte) ((len << 5) + 31);
\r
429 output[op++] = (byte) 255;
\r
430 output[op++] = (byte) (distance >>> 8);
\r
431 output[op++] = (byte) (distance & 255);
\r
433 distance -= MAX_DISTANCE2;
\r
434 output[op++] = (byte) ((7 << 5) + 31);
\r
435 for (len -=7; len >= 255; len -= 255)
\r
436 output[op++] = (byte) 255;
\r
437 output[op++] = (byte) len;
\r
438 output[op++] = (byte) 255;
\r
439 output[op++] = (byte) (distance >>> 8);
\r
440 output[op++] = (byte) (distance & 255);
\r
444 // update the hash at match boundary
\r
445 hval = HASH_FUNCTION(input, ip);
\r
447 hval = HASH_FUNCTION(input, ip);
\r
450 // assuming literal copy
\r
451 output[op++] = MAX_COPY-1;
\r
456 // find potential match
\r
457 hval = HASH_FUNCTION(input, ip);
\r
461 // calculate distance to the match
\r
462 distance = anchor - ref;
\r
464 // update hash table
\r
465 htab[hslot] = anchor;
\r
467 // is this a match? check the first 3 bytes
\r
468 if (distance==0 ||
\r
469 (distance >= MAX_FARDISTANCE) ||
\r
470 input[ref++] != input[ip++] ||
\r
471 input[ref++] != input[ip++] ||
\r
472 input[ref++] != input[ip++])
\r
474 output[op++] = input[anchor++];
\r
477 if (copy == MAX_COPY) {
\r
479 output[op++] = MAX_COPY-1;
\r
482 // far, needs at least 5-byte match
\r
483 if (distance >= MAX_DISTANCE2) {
\r
484 if (input[ip++] != input[ref++] || input[ip++] != input[ref++]) {
\r
485 // LITERAL COPY, code copied from above
\r
486 output[op++] = input[anchor++];
\r
489 if (copy == MAX_COPY) {
\r
491 output[op++] = MAX_COPY-1;
\r
498 // NOTE: all of the following code within this block is copied above
\r
502 // last matched byte
\r
505 // distance is biased
\r
508 if (distance == 0) {
\r
509 // zero distance means a run
\r
510 byte x = input[ip - 1];
\r
511 while (ip < ip_bound)
\r
512 if (input[ref++] != x) break;
\r
516 // safe because the outer check against ip limit
\r
517 if(input[ref++] != input[ip++]) break;
\r
518 if(input[ref++] != input[ip++]) break;
\r
519 if(input[ref++] != input[ip++]) break;
\r
520 if(input[ref++] != input[ip++]) break;
\r
521 if(input[ref++] != input[ip++]) break;
\r
522 if(input[ref++] != input[ip++]) break;
\r
523 if(input[ref++] != input[ip++]) break;
\r
524 if(input[ref++] != input[ip++]) break;
\r
525 while (ip < ip_bound)
\r
526 if (input[ref++] != input[ip++]) break;
\r
531 // if we have copied something, adjust the copy count
\r
533 // copy is biased, '0' means 1 byte copy
\r
534 output[op - copy - 1] = (byte) (copy - 1);
\r
536 // back, to overwrite the copy count
\r
539 // reset literal counter
\r
542 // length is biased, '1' means a match of 3 bytes
\r
546 // encode the match
\r
547 if (distance < MAX_DISTANCE2) {
\r
549 output[op++] = (byte) ((len << 5) + (distance >>> 8));
\r
550 output[op++] = (byte) (distance & 255);
\r
552 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
\r
553 for (len -= 7; len >= 255; len -= 255)
\r
554 output[op++] = (byte) 255;
\r
555 output[op++] = (byte) len;
\r
556 output[op++] = (byte) (distance & 255);
\r
559 // far away, but not yet in the another galaxy...
\r
561 distance -= MAX_DISTANCE2;
\r
562 output[op++] = (byte) ((len << 5) + 31);
\r
563 output[op++] = (byte) 255;
\r
564 output[op++] = (byte) (distance >>> 8);
\r
565 output[op++] = (byte) (distance & 255);
\r
567 distance -= MAX_DISTANCE2;
\r
568 output[op++] = (byte) ((7 << 5) + 31);
\r
569 for (len -=7; len >= 255; len -= 255)
\r
570 output[op++] = (byte) 255;
\r
571 output[op++] = (byte) len;
\r
572 output[op++] = (byte) 255;
\r
573 output[op++] = (byte) (distance >>> 8);
\r
574 output[op++] = (byte) (distance & 255);
\r
578 // update the hash at match boundary
\r
579 hval = HASH_FUNCTION(input, ip);
\r
581 hval = HASH_FUNCTION(input, ip);
\r
584 // assuming literal copy
\r
585 output[op++] = MAX_COPY-1;
\r
589 // left-over as literal copy
\r
591 while (ip <= ip_bound) {
\r
592 output[op++] = input[ip++];
\r
594 if (copy == MAX_COPY) {
\r
596 output[op++] = MAX_COPY-1;
\r
600 // if we have copied something, adjust the copy length
\r
602 output[op-copy-1] = (byte) (copy - 1);
\r
606 // marker for fastlz2
\r
607 output[outputOffset] |= (1 << 5);
\r
609 return op - outputOffset;
\r
612 // ------------------------------------------------------------------------
\r
614 private static int fastlz1_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
\r
616 int ip = inputOffset;
\r
617 int ip_limit = ip + length;
\r
618 int op = outputOffset;
\r
619 int op_limit = op + maxout;
\r
620 int ctrl = (int) input[ip++] & 31;
\r
621 boolean loop = true;
\r
625 int len = ctrl >>> 5;
\r
626 int ofs = (ctrl & 31) << 8;
\r
632 len += (int) input[ip++] & 0xff;
\r
633 ref -= (int) input[ip++] & 0xff;
\r
636 if (op + len + 3 > op_limit)
\r
639 if (ref-1 < outputOffset)
\r
644 ctrl = (int) input[ip++] & 0xff;
\r
649 // optimize copy for a run
\r
650 byte b = output[ref-1];
\r
654 for(; len > 0; --len)
\r
657 /* copy from reference */
\r
659 output[op++] = output[ref++];
\r
660 output[op++] = output[ref++];
\r
661 output[op++] = output[ref++];
\r
662 for (; len > 0; --len)
\r
663 output[op++] = output[ref++];
\r
668 if (op + ctrl > op_limit)
\r
670 if (ip + ctrl > ip_limit)
\r
674 output[op++] = input[ip++];
\r
675 for (--ctrl; ctrl > 0; --ctrl)
\r
676 output[op++] = input[ip++];
\r
678 loop = ip < ip_limit;
\r
680 ctrl = (int) input[ip++] & 0xff;
\r
684 return op - outputOffset;
\r
687 private static int fastlz2_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
\r
689 int ip = inputOffset;
\r
690 int ip_limit = ip + length;
\r
691 int op = outputOffset;
\r
692 int op_limit = op + maxout;
\r
693 int ctrl = (int) input[ip++] & 31;
\r
694 boolean loop = true;
\r
698 int len = ctrl >>> 5;
\r
699 int ofs = (ctrl & 31) << 8;
\r
707 code = (int) input[ip++] & 0xff;
\r
709 } while (code==255);
\r
711 code = (int) input[ip++] & 0xff;
\r
714 // match from 16-bit distance
\r
716 if (ofs == (31 << 8)) {
\r
717 ofs = ((int) input[ip++] & 0xff) << 8;
\r
718 ofs += (int) input[ip++] & 0xff;
\r
719 ref = op - ofs - MAX_DISTANCE2;
\r
724 if (op + len + 3 > op_limit)
\r
727 if (ref-1 < outputOffset)
\r
732 ctrl = (int) input[ip++] & 0xff;
\r
737 /* optimize copy for a run */
\r
738 byte b = output[ref-1];
\r
742 for (; len > 0; --len)
\r
745 /* copy from reference */
\r
747 output[op++] = output[ref++];
\r
748 output[op++] = output[ref++];
\r
749 output[op++] = output[ref++];
\r
750 for (; len > 0; --len)
\r
751 output[op++] = output[ref++];
\r
756 if (op + ctrl > op_limit)
\r
758 if (ip + ctrl > ip_limit)
\r
762 output[op++] = input[ip++];
\r
763 for (--ctrl; ctrl > 0; --ctrl)
\r
764 output[op++] = input[ip++];
\r
766 loop = ip < ip_limit;
\r
768 ctrl = (int) input[ip++] & 0xff;
\r
772 return op - outputOffset;
\r