1 /*******************************************************************************
2 * Copyright (c) 2007, 2011 Association for Decentralized Information Management
4 * All rights reserved. This program and the accompanying materials
5 * are made available under the terms of the Eclipse Public License v1.0
6 * which accompanies this distribution, and is available at
7 * http://www.eclipse.org/legal/epl-v10.html
10 * VTT Technical Research Centre of Finland - initial API and implementation
11 *******************************************************************************/
12 package org.simantics.fastlz.java;
16 * @author Tuukka Lehtonen
18 public class FastLZImpl {
21 private static final int MAX_LEN = 264;
22 private static final int MAX_COPY = 32;
23 // For level 1 compression
24 private static final int MAX_DISTANCE = 8192;
25 // For level 2 compression
26 private static final int MAX_DISTANCE2 = 8191;
27 private static final int MAX_FARDISTANCE = (65535 + MAX_DISTANCE2 - 1);
29 private static final boolean FASTLZ_SAFE = false;
31 public static final int FASTLZ_VERSION = 0x000100;
33 public static final int FASTLZ_VERSION_MAJOR = 0;
34 public static final int FASTLZ_VERSION_MINOR = 1;
35 public static final int FASTLZ_VERSION_REVISION = 0;
37 public static final String FASTLZ_VERSION_STRING = "0.1.0";
39 private static final int HASH_LOG = 13;
40 private static final int HASH_SIZE = (1 << HASH_LOG);
41 private static final int HASH_MASK = (HASH_SIZE - 1);
43 private static final int FASTLZ_READU16(byte[] array, int offset) {
44 final int b0 = array[offset] & 0xff;
45 final int b1 = array[offset + 1] & 0xff;
46 return b0 | (b1 << 8);
54 private static final int HASH_FUNCTION(byte[] array, int offset) {
55 final int b0 = array[offset] & 0xff;
56 final int b1 = array[offset+1] & 0xff;
57 final int b2 = array[offset+2] & 0xff;
58 int v = b0 | (b1 << 8);
59 final int v2 = b1 | (b2 << 8);
60 v ^= v2 ^ (v >>> (16 - HASH_LOG));
66 Compress a block of data in the input buffer and returns the size of
67 compressed block. The size of input buffer is specified by length. The
68 minimum input buffer size is 16.
70 The output buffer must be at least 5% larger than the input buffer
71 and can not be smaller than 66 bytes.
73 If the input is not compressible, the return value might be larger than
74 length (input buffer size).
76 The input buffer and the output buffer can not overlap.
78 public static int fastlz_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {
79 /* for short block, choose fastlz1 */
81 return fastlz1_compress(input, inputOffset, length, output, outputOffset);
84 return fastlz2_compress(input, inputOffset, length, output, outputOffset);
88 Decompress a block of compressed data and returns the size of the
89 decompressed block. If error occurs, e.g. the compressed data is
90 corrupted or the output buffer is not large enough, then 0 (zero)
91 will be returned instead.
93 The input buffer and the output buffer can not overlap.
95 Decompression is memory safe and guaranteed not to write the output buffer
96 more than what is specified in maxout.
98 public static int fastlz_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
100 // magic identifier for compression level
101 int level = (((int) input[0] & 0xff) >>> 5) + 1;
104 return fastlz1_decompress(input, inputOffset, length, output, outputOffset, maxout);
106 return fastlz2_decompress(input, inputOffset, length, output, outputOffset, maxout);
108 // unknown level, trigger error
113 Compress a block of data in the input buffer and returns the size of
114 compressed block. The size of input buffer is specified by length. The
115 minimum input buffer size is 16.
117 The output buffer must be at least 5% larger than the input buffer
118 and can not be smaller than 66 bytes.
120 If the input is not compressible, the return value might be larger than
121 length (input buffer size).
123 The input buffer and the output buffer can not overlap.
125 Compression level can be specified in parameter level. At the moment,
126 only level 1 and level 2 are supported.
127 Level 1 is the fastest compression and generally useful for short data.
128 Level 2 is slightly slower but it gives better compression ratio.
130 Note that the compressed data, regardless of the level, can always be
131 decompressed using the function fastlz_decompress above.
133 public static int fastlz_compress_level(int level, byte[] input, int inputOffset, int length, byte[] output,
136 return fastlz1_compress(input, inputOffset, length, output, outputOffset);
138 return fastlz2_compress(input, inputOffset, length, output, outputOffset);
144 private static int fastlz1_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {
145 int ip = inputOffset;
146 int ip_bound = inputOffset + length - 2;
147 int ip_limit = inputOffset + length - 12;
148 int op = outputOffset;
150 int[] htab = new int[HASH_SIZE];
161 // create literal copy only
162 output[op++] = (byte) (length-1);
164 while(ip <= ip_bound)
165 output[op++] = input[ip++];
172 // initializes hash table
173 for (hslot = 0; hslot < HASH_SIZE; ++hslot)
176 // we start with literal copy
178 output[op++] = MAX_COPY-1;
179 output[op++] = input[ip++];
180 output[op++] = input[ip++];
183 while (ip < ip_limit) {
187 // minimum match length
190 // comparison starting-point
193 // find potential match
194 hval = HASH_FUNCTION(input, ip);
198 // calculate distance to the match
199 distance = anchor - ref;
202 htab[hslot] = anchor;
204 // is this a match? check the first 3 bytes
206 (distance >= MAX_DISTANCE) ||
207 input[ref++] != input[ip++] ||
208 input[ref++] != input[ip++] ||
209 input[ref++] != input[ip++])
211 output[op++] = input[anchor++];
214 if (copy == MAX_COPY) {
216 output[op++] = MAX_COPY-1;
222 // distance is biased
226 // zero distance means a run
227 byte x = input[ip - 1];
228 while (ip < ip_bound)
229 if (input[ref++] != x) break;
233 // safe because the outer check against ip limit
234 if(input[ref++] != input[ip++]) break;
235 if(input[ref++] != input[ip++]) break;
236 if(input[ref++] != input[ip++]) break;
237 if(input[ref++] != input[ip++]) break;
238 if(input[ref++] != input[ip++]) break;
239 if(input[ref++] != input[ip++]) break;
240 if(input[ref++] != input[ip++]) break;
241 if(input[ref++] != input[ip++]) break;
242 while (ip < ip_bound)
243 if (input[ref++] != input[ip++]) break;
248 // if we have copied something, adjust the copy count
250 // copy is biased, '0' means 1 byte copy
251 output[op-copy-1] = (byte)(copy-1);
253 // back, to overwrite the copy count
256 // reset literal counter
259 // length is biased, '1' means a match of 3 bytes
264 if (len > MAX_LEN - 2)
265 while (len > MAX_LEN - 2) {
266 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
267 output[op++] = (byte) (MAX_LEN - 2 - 7 -2);
268 output[op++] = (byte) (distance & 0xff);
273 output[op++] = (byte) ((len << 5) + (distance >>> 8));
274 output[op++] = (byte) (distance & 255);
276 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
277 output[op++] = (byte) (len - 7);
278 output[op++] = (byte) (distance & 255);
281 // update the hash at match boundary
282 hval = HASH_FUNCTION(input, ip);
284 hval = HASH_FUNCTION(input, ip);
287 // assuming literal copy
288 output[op++] = MAX_COPY-1;
292 // left-over as literal copy
294 while (ip <= ip_bound) {
295 output[op++] = input[ip++];
297 if (copy == MAX_COPY) {
299 output[op++] = MAX_COPY-1;
303 // if we have copied something, adjust the copy length
305 output[op - copy - 1] = (byte) (copy - 1);
309 return op - outputOffset;
312 private static int fastlz2_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset)
314 int ip = inputOffset;
315 int ip_bound = ip + length - 2;
316 int ip_limit = ip + length - 12;
317 int op = outputOffset;
319 int[] htab = new int[HASH_SIZE];
328 // create literal copy only
329 output[op++] = (byte) (length - 1);
331 while(ip <= ip_bound)
332 output[op++] = input[ip++];
338 // initializes hash table
339 for (hslot = 0; hslot < HASH_SIZE; ++hslot)
342 // we start with literal copy
344 output[op++] = MAX_COPY-1;
345 output[op++] = input[ip++];
346 output[op++] = input[ip++];
349 while (ip < ip_limit) {
353 // minimum match length
356 // comparison starting-point
360 if (input[ip] == input[ip-1] && FASTLZ_READU16(input, ip-1) == FASTLZ_READU16(input, ip+1)) {
363 ref = anchor - 1 + 3;
365 // NOTE: cannot do this goto, so copied the code here from line 487
371 // distance is biased
375 // zero distance means a run
376 byte x = input[ip - 1];
377 while (ip < ip_bound)
378 if (input[ref++] != x) break;
382 // safe because the outer check against ip limit
383 if(input[ref++] != input[ip++]) break;
384 if(input[ref++] != input[ip++]) break;
385 if(input[ref++] != input[ip++]) break;
386 if(input[ref++] != input[ip++]) break;
387 if(input[ref++] != input[ip++]) break;
388 if(input[ref++] != input[ip++]) break;
389 if(input[ref++] != input[ip++]) break;
390 if(input[ref++] != input[ip++]) break;
391 while (ip < ip_bound)
392 if (input[ref++] != input[ip++]) break;
397 // if we have copied something, adjust the copy count
399 // copy is biased, '0' means 1 byte copy
400 output[op - copy - 1] = (byte) (copy - 1);
402 // back, to overwrite the copy count
405 // reset literal counter
408 // length is biased, '1' means a match of 3 bytes
413 if (distance < MAX_DISTANCE2) {
415 output[op++] = (byte) ((len << 5) + (distance >>> 8));
416 output[op++] = (byte) (distance & 255);
418 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
419 for (len -= 7; len >= 255; len -= 255)
420 output[op++] = (byte) 255;
421 output[op++] = (byte) len;
422 output[op++] = (byte) (distance & 255);
425 // far away, but not yet in the another galaxy...
427 distance -= MAX_DISTANCE2;
428 output[op++] = (byte) ((len << 5) + 31);
429 output[op++] = (byte) 255;
430 output[op++] = (byte) (distance >>> 8);
431 output[op++] = (byte) (distance & 255);
433 distance -= MAX_DISTANCE2;
434 output[op++] = (byte) ((7 << 5) + 31);
435 for (len -=7; len >= 255; len -= 255)
436 output[op++] = (byte) 255;
437 output[op++] = (byte) len;
438 output[op++] = (byte) 255;
439 output[op++] = (byte) (distance >>> 8);
440 output[op++] = (byte) (distance & 255);
444 // update the hash at match boundary
445 hval = HASH_FUNCTION(input, ip);
447 hval = HASH_FUNCTION(input, ip);
450 // assuming literal copy
451 output[op++] = MAX_COPY-1;
456 // find potential match
457 hval = HASH_FUNCTION(input, ip);
461 // calculate distance to the match
462 distance = anchor - ref;
465 htab[hslot] = anchor;
467 // is this a match? check the first 3 bytes
469 (distance >= MAX_FARDISTANCE) ||
470 input[ref++] != input[ip++] ||
471 input[ref++] != input[ip++] ||
472 input[ref++] != input[ip++])
474 output[op++] = input[anchor++];
477 if (copy == MAX_COPY) {
479 output[op++] = MAX_COPY-1;
482 // far, needs at least 5-byte match
483 if (distance >= MAX_DISTANCE2) {
484 if (input[ip++] != input[ref++] || input[ip++] != input[ref++]) {
485 // LITERAL COPY, code copied from above
486 output[op++] = input[anchor++];
489 if (copy == MAX_COPY) {
491 output[op++] = MAX_COPY-1;
498 // NOTE: all of the following code within this block is copied above
505 // distance is biased
509 // zero distance means a run
510 byte x = input[ip - 1];
511 while (ip < ip_bound)
512 if (input[ref++] != x) break;
516 // safe because the outer check against ip limit
517 if(input[ref++] != input[ip++]) break;
518 if(input[ref++] != input[ip++]) break;
519 if(input[ref++] != input[ip++]) break;
520 if(input[ref++] != input[ip++]) break;
521 if(input[ref++] != input[ip++]) break;
522 if(input[ref++] != input[ip++]) break;
523 if(input[ref++] != input[ip++]) break;
524 if(input[ref++] != input[ip++]) break;
525 while (ip < ip_bound)
526 if (input[ref++] != input[ip++]) break;
531 // if we have copied something, adjust the copy count
533 // copy is biased, '0' means 1 byte copy
534 output[op - copy - 1] = (byte) (copy - 1);
536 // back, to overwrite the copy count
539 // reset literal counter
542 // length is biased, '1' means a match of 3 bytes
547 if (distance < MAX_DISTANCE2) {
549 output[op++] = (byte) ((len << 5) + (distance >>> 8));
550 output[op++] = (byte) (distance & 255);
552 output[op++] = (byte) ((7 << 5) + (distance >>> 8));
553 for (len -= 7; len >= 255; len -= 255)
554 output[op++] = (byte) 255;
555 output[op++] = (byte) len;
556 output[op++] = (byte) (distance & 255);
559 // far away, but not yet in the another galaxy...
561 distance -= MAX_DISTANCE2;
562 output[op++] = (byte) ((len << 5) + 31);
563 output[op++] = (byte) 255;
564 output[op++] = (byte) (distance >>> 8);
565 output[op++] = (byte) (distance & 255);
567 distance -= MAX_DISTANCE2;
568 output[op++] = (byte) ((7 << 5) + 31);
569 for (len -=7; len >= 255; len -= 255)
570 output[op++] = (byte) 255;
571 output[op++] = (byte) len;
572 output[op++] = (byte) 255;
573 output[op++] = (byte) (distance >>> 8);
574 output[op++] = (byte) (distance & 255);
578 // update the hash at match boundary
579 hval = HASH_FUNCTION(input, ip);
581 hval = HASH_FUNCTION(input, ip);
584 // assuming literal copy
585 output[op++] = MAX_COPY-1;
589 // left-over as literal copy
591 while (ip <= ip_bound) {
592 output[op++] = input[ip++];
594 if (copy == MAX_COPY) {
596 output[op++] = MAX_COPY-1;
600 // if we have copied something, adjust the copy length
602 output[op-copy-1] = (byte) (copy - 1);
606 // marker for fastlz2
607 output[outputOffset] |= (1 << 5);
609 return op - outputOffset;
612 // ------------------------------------------------------------------------
614 private static int fastlz1_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
616 int ip = inputOffset;
617 int ip_limit = ip + length;
618 int op = outputOffset;
619 int op_limit = op + maxout;
620 int ctrl = (int) input[ip++] & 31;
625 int len = ctrl >>> 5;
626 int ofs = (ctrl & 31) << 8;
632 len += (int) input[ip++] & 0xff;
633 ref -= (int) input[ip++] & 0xff;
636 if (op + len + 3 > op_limit)
639 if (ref-1 < outputOffset)
644 ctrl = (int) input[ip++] & 0xff;
649 // optimize copy for a run
650 byte b = output[ref-1];
654 for(; len > 0; --len)
657 /* copy from reference */
659 output[op++] = output[ref++];
660 output[op++] = output[ref++];
661 output[op++] = output[ref++];
662 for (; len > 0; --len)
663 output[op++] = output[ref++];
668 if (op + ctrl > op_limit)
670 if (ip + ctrl > ip_limit)
674 output[op++] = input[ip++];
675 for (--ctrl; ctrl > 0; --ctrl)
676 output[op++] = input[ip++];
678 loop = ip < ip_limit;
680 ctrl = (int) input[ip++] & 0xff;
684 return op - outputOffset;
687 private static int fastlz2_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
689 int ip = inputOffset;
690 int ip_limit = ip + length;
691 int op = outputOffset;
692 int op_limit = op + maxout;
693 int ctrl = (int) input[ip++] & 31;
698 int len = ctrl >>> 5;
699 int ofs = (ctrl & 31) << 8;
707 code = (int) input[ip++] & 0xff;
711 code = (int) input[ip++] & 0xff;
714 // match from 16-bit distance
716 if (ofs == (31 << 8)) {
717 ofs = ((int) input[ip++] & 0xff) << 8;
718 ofs += (int) input[ip++] & 0xff;
719 ref = op - ofs - MAX_DISTANCE2;
724 if (op + len + 3 > op_limit)
727 if (ref-1 < outputOffset)
732 ctrl = (int) input[ip++] & 0xff;
737 /* optimize copy for a run */
738 byte b = output[ref-1];
742 for (; len > 0; --len)
745 /* copy from reference */
747 output[op++] = output[ref++];
748 output[op++] = output[ref++];
749 output[op++] = output[ref++];
750 for (; len > 0; --len)
751 output[op++] = output[ref++];
756 if (op + ctrl > op_limit)
758 if (ip + ctrl > ip_limit)
762 output[op++] = input[ip++];
763 for (--ctrl; ctrl > 0; --ctrl)
764 output[op++] = input[ip++];
766 loop = ip < ip_limit;
768 ctrl = (int) input[ip++] & 0xff;
772 return op - outputOffset;