/******************************************************************************* * Copyright (c) 2007, 2011 Association for Decentralized Information Management * in Industry THTH ry. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * VTT Technical Research Centre of Finland - initial API and implementation *******************************************************************************/ package org.simantics.fastlz.java; /** * @author Tuukka Lehtonen */ public class FastLZImpl { /** 256 + 8 */ private static final int MAX_LEN = 264; private static final int MAX_COPY = 32; // For level 1 compression private static final int MAX_DISTANCE = 8192; // For level 2 compression private static final int MAX_DISTANCE2 = 8191; private static final int MAX_FARDISTANCE = (65535 + MAX_DISTANCE2 - 1); private static final boolean FASTLZ_SAFE = false; public static final int FASTLZ_VERSION = 0x000100; public static final int FASTLZ_VERSION_MAJOR = 0; public static final int FASTLZ_VERSION_MINOR = 1; public static final int FASTLZ_VERSION_REVISION = 0; public static final String FASTLZ_VERSION_STRING = "0.1.0"; private static final int HASH_LOG = 13; private static final int HASH_SIZE = (1 << HASH_LOG); private static final int HASH_MASK = (HASH_SIZE - 1); private static final int FASTLZ_READU16(byte[] array, int offset) { final int b0 = array[offset] & 0xff; final int b1 = array[offset + 1] & 0xff; return b0 | (b1 << 8); } /** * @param array * @param offset * @return */ private static final int HASH_FUNCTION(byte[] array, int offset) { final int b0 = array[offset] & 0xff; final int b1 = array[offset+1] & 0xff; final int b2 = array[offset+2] & 0xff; int v = b0 | (b1 << 8); final int v2 = b1 | (b2 << 8); v ^= v2 ^ (v >>> (16 - HASH_LOG)); v &= HASH_MASK; return v; } /** Compress a block of data in the input buffer and returns the size of compressed block. The size of input buffer is specified by length. The minimum input buffer size is 16. The output buffer must be at least 5% larger than the input buffer and can not be smaller than 66 bytes. If the input is not compressible, the return value might be larger than length (input buffer size). The input buffer and the output buffer can not overlap. */ public static int fastlz_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { /* for short block, choose fastlz1 */ if (length < 65536) return fastlz1_compress(input, inputOffset, length, output, outputOffset); /* else... */ return fastlz2_compress(input, inputOffset, length, output, outputOffset); } /** Decompress a block of compressed data and returns the size of the decompressed block. If error occurs, e.g. the compressed data is corrupted or the output buffer is not large enough, then 0 (zero) will be returned instead. The input buffer and the output buffer can not overlap. Decompression is memory safe and guaranteed not to write the output buffer more than what is specified in maxout. */ public static int fastlz_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, int maxout) { // magic identifier for compression level int level = (((int) input[0] & 0xff) >>> 5) + 1; if (level == 1) return fastlz1_decompress(input, inputOffset, length, output, outputOffset, maxout); if (level == 2) return fastlz2_decompress(input, inputOffset, length, output, outputOffset, maxout); // unknown level, trigger error return 0; } /** Compress a block of data in the input buffer and returns the size of compressed block. The size of input buffer is specified by length. The minimum input buffer size is 16. The output buffer must be at least 5% larger than the input buffer and can not be smaller than 66 bytes. If the input is not compressible, the return value might be larger than length (input buffer size). The input buffer and the output buffer can not overlap. Compression level can be specified in parameter level. At the moment, only level 1 and level 2 are supported. Level 1 is the fastest compression and generally useful for short data. Level 2 is slightly slower but it gives better compression ratio. Note that the compressed data, regardless of the level, can always be decompressed using the function fastlz_decompress above. */ public static int fastlz_compress_level(int level, byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { if (level == 1) return fastlz1_compress(input, inputOffset, length, output, outputOffset); if (level == 2) return fastlz2_compress(input, inputOffset, length, output, outputOffset); return 0; } private static int fastlz1_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { int ip = inputOffset; int ip_bound = inputOffset + length - 2; int ip_limit = inputOffset + length - 12; int op = outputOffset; int[] htab = new int[HASH_SIZE]; int hslot; int hval; int copy; // sanity check if(length < 4) { if(length != 0) { // create literal copy only output[op++] = (byte) (length-1); ip_bound++; while(ip <= ip_bound) output[op++] = input[ip++]; return length+1; } else return 0; } // initializes hash table for (hslot = 0; hslot < HASH_SIZE; ++hslot) htab[hslot] = ip; // we start with literal copy copy = 2; output[op++] = MAX_COPY-1; output[op++] = input[ip++]; output[op++] = input[ip++]; // main loop while (ip < ip_limit) { int ref; int distance; // minimum match length int len = 3; // comparison starting-point int anchor = ip; // find potential match hval = HASH_FUNCTION(input, ip); hslot = hval; ref = htab[hval]; // calculate distance to the match distance = anchor - ref; // update hash table htab[hslot] = anchor; // is this a match? check the first 3 bytes if (distance==0 || (distance >= MAX_DISTANCE) || input[ref++] != input[ip++] || input[ref++] != input[ip++] || input[ref++] != input[ip++]) { output[op++] = input[anchor++]; ip = anchor; copy++; if (copy == MAX_COPY) { copy = 0; output[op++] = MAX_COPY-1; } } else { // last matched byte ip = anchor + len; // distance is biased --distance; if(distance == 0) { // zero distance means a run byte x = input[ip - 1]; while (ip < ip_bound) if (input[ref++] != x) break; else ip++; } else { for (;;) { // safe because the outer check against ip limit if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; while (ip < ip_bound) if (input[ref++] != input[ip++]) break; break; } } // if we have copied something, adjust the copy count if (copy != 0) // copy is biased, '0' means 1 byte copy output[op-copy-1] = (byte)(copy-1); else // back, to overwrite the copy count op--; // reset literal counter copy = 0; // length is biased, '1' means a match of 3 bytes ip -= 3; len = ip - anchor; // encode the match if (len > MAX_LEN - 2) while (len > MAX_LEN - 2) { output[op++] = (byte) ((7 << 5) + (distance >>> 8)); output[op++] = (byte) (MAX_LEN - 2 - 7 -2); output[op++] = (byte) (distance & 0xff); len -= MAX_LEN - 2; } if (len < 7) { output[op++] = (byte) ((len << 5) + (distance >>> 8)); output[op++] = (byte) (distance & 255); } else { output[op++] = (byte) ((7 << 5) + (distance >>> 8)); output[op++] = (byte) (len - 7); output[op++] = (byte) (distance & 255); } // update the hash at match boundary hval = HASH_FUNCTION(input, ip); htab[hval] = ip++; hval = HASH_FUNCTION(input, ip); htab[hval] = ip++; // assuming literal copy output[op++] = MAX_COPY-1; } } // left-over as literal copy ip_bound++; while (ip <= ip_bound) { output[op++] = input[ip++]; copy++; if (copy == MAX_COPY) { copy = 0; output[op++] = MAX_COPY-1; } } // if we have copied something, adjust the copy length if (copy != 0) output[op - copy - 1] = (byte) (copy - 1); else --op; return op - outputOffset; } private static int fastlz2_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { int ip = inputOffset; int ip_bound = ip + length - 2; int ip_limit = ip + length - 12; int op = outputOffset; int[] htab = new int[HASH_SIZE]; int hslot; int hval; int copy; // sanity check if (length < 4) { if (length != 0) { // create literal copy only output[op++] = (byte) (length - 1); ip_bound++; while(ip <= ip_bound) output[op++] = input[ip++]; return length + 1; } else return 0; } // initializes hash table for (hslot = 0; hslot < HASH_SIZE; ++hslot) htab[hslot] = ip; // we start with literal copy copy = 2; output[op++] = MAX_COPY-1; output[op++] = input[ip++]; output[op++] = input[ip++]; // main loop while (ip < ip_limit) { int ref; int distance; // minimum match length int len = 3; // comparison starting-point int anchor = ip; // check for a run if (input[ip] == input[ip-1] && FASTLZ_READU16(input, ip-1) == FASTLZ_READU16(input, ip+1)) { distance = 1; ip += 3; ref = anchor - 1 + 3; // NOTE: cannot do this goto, so copied the code here from line 487 //goto match; // last matched byte ip = anchor + len; // distance is biased --distance; if (distance == 0) { // zero distance means a run byte x = input[ip - 1]; while (ip < ip_bound) if (input[ref++] != x) break; else ++ip; } else { for (;;) { // safe because the outer check against ip limit if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; while (ip < ip_bound) if (input[ref++] != input[ip++]) break; break; } } // if we have copied something, adjust the copy count if (copy != 0) // copy is biased, '0' means 1 byte copy output[op - copy - 1] = (byte) (copy - 1); else // back, to overwrite the copy count --op; // reset literal counter copy = 0; // length is biased, '1' means a match of 3 bytes ip -= 3; len = ip - anchor; // encode the match if (distance < MAX_DISTANCE2) { if (len < 7) { output[op++] = (byte) ((len << 5) + (distance >>> 8)); output[op++] = (byte) (distance & 255); } else { output[op++] = (byte) ((7 << 5) + (distance >>> 8)); for (len -= 7; len >= 255; len -= 255) output[op++] = (byte) 255; output[op++] = (byte) len; output[op++] = (byte) (distance & 255); } } else { // far away, but not yet in the another galaxy... if(len < 7) { distance -= MAX_DISTANCE2; output[op++] = (byte) ((len << 5) + 31); output[op++] = (byte) 255; output[op++] = (byte) (distance >>> 8); output[op++] = (byte) (distance & 255); } else { distance -= MAX_DISTANCE2; output[op++] = (byte) ((7 << 5) + 31); for (len -=7; len >= 255; len -= 255) output[op++] = (byte) 255; output[op++] = (byte) len; output[op++] = (byte) 255; output[op++] = (byte) (distance >>> 8); output[op++] = (byte) (distance & 255); } } // update the hash at match boundary hval = HASH_FUNCTION(input, ip); htab[hval] = ip++; hval = HASH_FUNCTION(input, ip); htab[hval] = ip++; // assuming literal copy output[op++] = MAX_COPY-1; continue; } // find potential match hval = HASH_FUNCTION(input, ip); hslot = hval; ref = htab[hval]; // calculate distance to the match distance = anchor - ref; // update hash table htab[hslot] = anchor; // is this a match? check the first 3 bytes if (distance==0 || (distance >= MAX_FARDISTANCE) || input[ref++] != input[ip++] || input[ref++] != input[ip++] || input[ref++] != input[ip++]) { output[op++] = input[anchor++]; ip = anchor; copy++; if (copy == MAX_COPY) { copy = 0; output[op++] = MAX_COPY-1; } } else { // far, needs at least 5-byte match if (distance >= MAX_DISTANCE2) { if (input[ip++] != input[ref++] || input[ip++] != input[ref++]) { // LITERAL COPY, code copied from above output[op++] = input[anchor++]; ip = anchor; copy++; if (copy == MAX_COPY) { copy = 0; output[op++] = MAX_COPY-1; } continue; } len += 2; } // NOTE: all of the following code within this block is copied above // to line 357 //match: // last matched byte ip = anchor + len; // distance is biased --distance; if (distance == 0) { // zero distance means a run byte x = input[ip - 1]; while (ip < ip_bound) if (input[ref++] != x) break; else ++ip; } else { for (;;) { // safe because the outer check against ip limit if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; if(input[ref++] != input[ip++]) break; while (ip < ip_bound) if (input[ref++] != input[ip++]) break; break; } } // if we have copied something, adjust the copy count if (copy != 0) // copy is biased, '0' means 1 byte copy output[op - copy - 1] = (byte) (copy - 1); else // back, to overwrite the copy count --op; // reset literal counter copy = 0; // length is biased, '1' means a match of 3 bytes ip -= 3; len = ip - anchor; // encode the match if (distance < MAX_DISTANCE2) { if (len < 7) { output[op++] = (byte) ((len << 5) + (distance >>> 8)); output[op++] = (byte) (distance & 255); } else { output[op++] = (byte) ((7 << 5) + (distance >>> 8)); for (len -= 7; len >= 255; len -= 255) output[op++] = (byte) 255; output[op++] = (byte) len; output[op++] = (byte) (distance & 255); } } else { // far away, but not yet in the another galaxy... if(len < 7) { distance -= MAX_DISTANCE2; output[op++] = (byte) ((len << 5) + 31); output[op++] = (byte) 255; output[op++] = (byte) (distance >>> 8); output[op++] = (byte) (distance & 255); } else { distance -= MAX_DISTANCE2; output[op++] = (byte) ((7 << 5) + 31); for (len -=7; len >= 255; len -= 255) output[op++] = (byte) 255; output[op++] = (byte) len; output[op++] = (byte) 255; output[op++] = (byte) (distance >>> 8); output[op++] = (byte) (distance & 255); } } // update the hash at match boundary hval = HASH_FUNCTION(input, ip); htab[hval] = ip++; hval = HASH_FUNCTION(input, ip); htab[hval] = ip++; // assuming literal copy output[op++] = MAX_COPY-1; } } // left-over as literal copy ip_bound++; while (ip <= ip_bound) { output[op++] = input[ip++]; copy++; if (copy == MAX_COPY) { copy = 0; output[op++] = MAX_COPY-1; } } // if we have copied something, adjust the copy length if (copy != 0) output[op-copy-1] = (byte) (copy - 1); else op--; // marker for fastlz2 output[outputOffset] |= (1 << 5); return op - outputOffset; } // ------------------------------------------------------------------------ private static int fastlz1_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, int maxout) { int ip = inputOffset; int ip_limit = ip + length; int op = outputOffset; int op_limit = op + maxout; int ctrl = (int) input[ip++] & 31; boolean loop = true; do { int ref = op; int len = ctrl >>> 5; int ofs = (ctrl & 31) << 8; if (ctrl >= 32) { len--; ref -= ofs; if (len == 7-1) len += (int) input[ip++] & 0xff; ref -= (int) input[ip++] & 0xff; if (FASTLZ_SAFE) { if (op + len + 3 > op_limit) return 0; if (ref-1 < outputOffset) return 0; } if (ip < ip_limit) ctrl = (int) input[ip++] & 0xff; else loop = false; if (ref == op) { // optimize copy for a run byte b = output[ref-1]; output[op++] = b; output[op++] = b; output[op++] = b; for(; len > 0; --len) output[op++] = b; } else { /* copy from reference */ ref--; output[op++] = output[ref++]; output[op++] = output[ref++]; output[op++] = output[ref++]; for (; len > 0; --len) output[op++] = output[ref++]; } } else { ctrl++; if (FASTLZ_SAFE) { if (op + ctrl > op_limit) return 0; if (ip + ctrl > ip_limit) return 0; } output[op++] = input[ip++]; for (--ctrl; ctrl > 0; --ctrl) output[op++] = input[ip++]; loop = ip < ip_limit; if (loop) ctrl = (int) input[ip++] & 0xff; } } while(loop); return op - outputOffset; } private static int fastlz2_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, int maxout) { int ip = inputOffset; int ip_limit = ip + length; int op = outputOffset; int op_limit = op + maxout; int ctrl = (int) input[ip++] & 31; boolean loop = true; do { int ref = op; int len = ctrl >>> 5; int ofs = (ctrl & 31) << 8; if (ctrl >= 32) { int code; len--; ref -= ofs; if (len == 7-1) { do { code = (int) input[ip++] & 0xff; len += code; } while (code==255); } code = (int) input[ip++] & 0xff; ref -= code; // match from 16-bit distance if (code == 255) { if (ofs == (31 << 8)) { ofs = ((int) input[ip++] & 0xff) << 8; ofs += (int) input[ip++] & 0xff; ref = op - ofs - MAX_DISTANCE2; } } if (FASTLZ_SAFE) { if (op + len + 3 > op_limit) return 0; if (ref-1 < outputOffset) return 0; } if (ip < ip_limit) ctrl = (int) input[ip++] & 0xff; else loop = false; if (ref == op) { /* optimize copy for a run */ byte b = output[ref-1]; output[op++] = b; output[op++] = b; output[op++] = b; for (; len > 0; --len) output[op++] = b; } else { /* copy from reference */ --ref; output[op++] = output[ref++]; output[op++] = output[ref++]; output[op++] = output[ref++]; for (; len > 0; --len) output[op++] = output[ref++]; } } else { ++ctrl; if (FASTLZ_SAFE) { if (op + ctrl > op_limit) return 0; if (ip + ctrl > ip_limit) return 0; } output[op++] = input[ip++]; for (--ctrl; ctrl > 0; --ctrl) output[op++] = input[ip++]; loop = ip < ip_limit; if (loop) ctrl = (int) input[ip++] & 0xff; } } while (loop); return op - outputOffset; } }