X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=blobdiff_plain;f=bundles%2Forg.simantics.fastlz%2Fsrc%2Forg%2Fsimantics%2Ffastlz%2Fjava%2FFastLZImpl.java;h=d801020472ecb44badd580ea96b989bd0fd88ae6;hb=0807209928f01e95669af6aeb671110209774bc6;hp=ef2ade2acde1def6f51a258c159409267cad8838;hpb=969bd23cab98a79ca9101af33334000879fb60c5;p=simantics%2Fplatform.git diff --git a/bundles/org.simantics.fastlz/src/org/simantics/fastlz/java/FastLZImpl.java b/bundles/org.simantics.fastlz/src/org/simantics/fastlz/java/FastLZImpl.java index ef2ade2ac..d80102047 100644 --- a/bundles/org.simantics.fastlz/src/org/simantics/fastlz/java/FastLZImpl.java +++ b/bundles/org.simantics.fastlz/src/org/simantics/fastlz/java/FastLZImpl.java @@ -1,775 +1,775 @@ -/******************************************************************************* - * Copyright (c) 2007, 2011 Association for Decentralized Information Management - * in Industry THTH ry. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * VTT Technical Research Centre of Finland - initial API and implementation - *******************************************************************************/ -package org.simantics.fastlz.java; - - -/** - * @author Tuukka Lehtonen - */ -public class FastLZImpl { - - /** 256 + 8 */ - private static final int MAX_LEN = 264; - private static final int MAX_COPY = 32; - // For level 1 compression - private static final int MAX_DISTANCE = 8192; - // For level 2 compression - private static final int MAX_DISTANCE2 = 8191; - private static final int MAX_FARDISTANCE = (65535 + MAX_DISTANCE2 - 1); - - private static final boolean FASTLZ_SAFE = false; - - public static final int FASTLZ_VERSION = 0x000100; - - public static final int FASTLZ_VERSION_MAJOR = 0; - public static final int FASTLZ_VERSION_MINOR = 1; - public static final int FASTLZ_VERSION_REVISION = 0; - - public static final String FASTLZ_VERSION_STRING = "0.1.0"; - - private static final int HASH_LOG = 13; - private static final int HASH_SIZE = (1 << HASH_LOG); - private static final int HASH_MASK = (HASH_SIZE - 1); - - private static final int FASTLZ_READU16(byte[] array, int offset) { - final int b0 = array[offset] & 0xff; - final int b1 = array[offset + 1] & 0xff; - return b0 | (b1 << 8); - } - - /** - * @param array - * @param offset - * @return - */ - private static final int HASH_FUNCTION(byte[] array, int offset) { - final int b0 = array[offset] & 0xff; - final int b1 = array[offset+1] & 0xff; - final int b2 = array[offset+2] & 0xff; - int v = b0 | (b1 << 8); - final int v2 = b1 | (b2 << 8); - v ^= v2 ^ (v >>> (16 - HASH_LOG)); - v &= HASH_MASK; - return v; - } - - /** - Compress a block of data in the input buffer and returns the size of - compressed block. The size of input buffer is specified by length. The - minimum input buffer size is 16. - - The output buffer must be at least 5% larger than the input buffer - and can not be smaller than 66 bytes. - - If the input is not compressible, the return value might be larger than - length (input buffer size). - - The input buffer and the output buffer can not overlap. - */ - public static int fastlz_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { - /* for short block, choose fastlz1 */ - if (length < 65536) - return fastlz1_compress(input, inputOffset, length, output, outputOffset); - - /* else... */ - return fastlz2_compress(input, inputOffset, length, output, outputOffset); - } - - /** - Decompress a block of compressed data and returns the size of the - decompressed block. If error occurs, e.g. the compressed data is - corrupted or the output buffer is not large enough, then 0 (zero) - will be returned instead. - - The input buffer and the output buffer can not overlap. - - Decompression is memory safe and guaranteed not to write the output buffer - more than what is specified in maxout. - */ - public static int fastlz_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, - int maxout) { - // magic identifier for compression level - int level = (((int) input[0] & 0xff) >>> 5) + 1; - - if (level == 1) - return fastlz1_decompress(input, inputOffset, length, output, outputOffset, maxout); - if (level == 2) - return fastlz2_decompress(input, inputOffset, length, output, outputOffset, maxout); - - // unknown level, trigger error - return 0; - } - - /** - Compress a block of data in the input buffer and returns the size of - compressed block. The size of input buffer is specified by length. The - minimum input buffer size is 16. - - The output buffer must be at least 5% larger than the input buffer - and can not be smaller than 66 bytes. - - If the input is not compressible, the return value might be larger than - length (input buffer size). - - The input buffer and the output buffer can not overlap. - - Compression level can be specified in parameter level. At the moment, - only level 1 and level 2 are supported. - Level 1 is the fastest compression and generally useful for short data. - Level 2 is slightly slower but it gives better compression ratio. - - Note that the compressed data, regardless of the level, can always be - decompressed using the function fastlz_decompress above. - */ - public static int fastlz_compress_level(int level, byte[] input, int inputOffset, int length, byte[] output, - int outputOffset) { - if (level == 1) - return fastlz1_compress(input, inputOffset, length, output, outputOffset); - if (level == 2) - return fastlz2_compress(input, inputOffset, length, output, outputOffset); - - return 0; - } - - - private static int fastlz1_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { - int ip = inputOffset; - int ip_bound = inputOffset + length - 2; - int ip_limit = inputOffset + length - 12; - int op = outputOffset; - - int[] htab = new int[HASH_SIZE]; - int hslot; - int hval; - - int copy; - - // sanity check - if(length < 4) - { - if(length != 0) - { - // create literal copy only - output[op++] = (byte) (length-1); - ip_bound++; - while(ip <= ip_bound) - output[op++] = input[ip++]; - return length+1; - } - else - return 0; - } - - // initializes hash table - for (hslot = 0; hslot < HASH_SIZE; ++hslot) - htab[hslot] = ip; - - // we start with literal copy - copy = 2; - output[op++] = MAX_COPY-1; - output[op++] = input[ip++]; - output[op++] = input[ip++]; - - // main loop - while (ip < ip_limit) { - int ref; - int distance; - - // minimum match length - int len = 3; - - // comparison starting-point - int anchor = ip; - - // find potential match - hval = HASH_FUNCTION(input, ip); - hslot = hval; - ref = htab[hval]; - - // calculate distance to the match - distance = anchor - ref; - - // update hash table - htab[hslot] = anchor; - - // is this a match? check the first 3 bytes - if (distance==0 || - (distance >= MAX_DISTANCE) || - input[ref++] != input[ip++] || - input[ref++] != input[ip++] || - input[ref++] != input[ip++]) - { - output[op++] = input[anchor++]; - ip = anchor; - copy++; - if (copy == MAX_COPY) { - copy = 0; - output[op++] = MAX_COPY-1; - } - } else { - // last matched byte - ip = anchor + len; - - // distance is biased - --distance; - - if(distance == 0) { - // zero distance means a run - byte x = input[ip - 1]; - while (ip < ip_bound) - if (input[ref++] != x) break; - else ip++; - } else { - for (;;) { - // safe because the outer check against ip limit - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - while (ip < ip_bound) - if (input[ref++] != input[ip++]) break; - break; - } - } - - // if we have copied something, adjust the copy count - if (copy != 0) - // copy is biased, '0' means 1 byte copy - output[op-copy-1] = (byte)(copy-1); - else - // back, to overwrite the copy count - op--; - - // reset literal counter - copy = 0; - - // length is biased, '1' means a match of 3 bytes - ip -= 3; - len = ip - anchor; - - // encode the match - if (len > MAX_LEN - 2) - while (len > MAX_LEN - 2) { - output[op++] = (byte) ((7 << 5) + (distance >>> 8)); - output[op++] = (byte) (MAX_LEN - 2 - 7 -2); - output[op++] = (byte) (distance & 0xff); - len -= MAX_LEN - 2; - } - - if (len < 7) { - output[op++] = (byte) ((len << 5) + (distance >>> 8)); - output[op++] = (byte) (distance & 255); - } else { - output[op++] = (byte) ((7 << 5) + (distance >>> 8)); - output[op++] = (byte) (len - 7); - output[op++] = (byte) (distance & 255); - } - - // update the hash at match boundary - hval = HASH_FUNCTION(input, ip); - htab[hval] = ip++; - hval = HASH_FUNCTION(input, ip); - htab[hval] = ip++; - - // assuming literal copy - output[op++] = MAX_COPY-1; - } - } - - // left-over as literal copy - ip_bound++; - while (ip <= ip_bound) { - output[op++] = input[ip++]; - copy++; - if (copy == MAX_COPY) { - copy = 0; - output[op++] = MAX_COPY-1; - } - } - - // if we have copied something, adjust the copy length - if (copy != 0) - output[op - copy - 1] = (byte) (copy - 1); - else - --op; - - return op - outputOffset; - } - - private static int fastlz2_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) - { - int ip = inputOffset; - int ip_bound = ip + length - 2; - int ip_limit = ip + length - 12; - int op = outputOffset; - - int[] htab = new int[HASH_SIZE]; - int hslot; - int hval; - - int copy; - - // sanity check - if (length < 4) { - if (length != 0) { - // create literal copy only - output[op++] = (byte) (length - 1); - ip_bound++; - while(ip <= ip_bound) - output[op++] = input[ip++]; - return length + 1; - } else - return 0; - } - - // initializes hash table - for (hslot = 0; hslot < HASH_SIZE; ++hslot) - htab[hslot] = ip; - - // we start with literal copy - copy = 2; - output[op++] = MAX_COPY-1; - output[op++] = input[ip++]; - output[op++] = input[ip++]; - - // main loop - while (ip < ip_limit) { - int ref; - int distance; - - // minimum match length - int len = 3; - - // comparison starting-point - int anchor = ip; - - // check for a run - if (input[ip] == input[ip-1] && FASTLZ_READU16(input, ip-1) == FASTLZ_READU16(input, ip+1)) { - distance = 1; - ip += 3; - ref = anchor - 1 + 3; - - // NOTE: cannot do this goto, so copied the code here from line 487 - //goto match; - - // last matched byte - ip = anchor + len; - - // distance is biased - --distance; - - if (distance == 0) { - // zero distance means a run - byte x = input[ip - 1]; - while (ip < ip_bound) - if (input[ref++] != x) break; - else ++ip; - } else { - for (;;) { - // safe because the outer check against ip limit - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - while (ip < ip_bound) - if (input[ref++] != input[ip++]) break; - break; - } - } - - // if we have copied something, adjust the copy count - if (copy != 0) - // copy is biased, '0' means 1 byte copy - output[op - copy - 1] = (byte) (copy - 1); - else - // back, to overwrite the copy count - --op; - - // reset literal counter - copy = 0; - - // length is biased, '1' means a match of 3 bytes - ip -= 3; - len = ip - anchor; - - // encode the match - if (distance < MAX_DISTANCE2) { - if (len < 7) { - output[op++] = (byte) ((len << 5) + (distance >>> 8)); - output[op++] = (byte) (distance & 255); - } else { - output[op++] = (byte) ((7 << 5) + (distance >>> 8)); - for (len -= 7; len >= 255; len -= 255) - output[op++] = (byte) 255; - output[op++] = (byte) len; - output[op++] = (byte) (distance & 255); - } - } else { - // far away, but not yet in the another galaxy... - if(len < 7) { - distance -= MAX_DISTANCE2; - output[op++] = (byte) ((len << 5) + 31); - output[op++] = (byte) 255; - output[op++] = (byte) (distance >>> 8); - output[op++] = (byte) (distance & 255); - } else { - distance -= MAX_DISTANCE2; - output[op++] = (byte) ((7 << 5) + 31); - for (len -=7; len >= 255; len -= 255) - output[op++] = (byte) 255; - output[op++] = (byte) len; - output[op++] = (byte) 255; - output[op++] = (byte) (distance >>> 8); - output[op++] = (byte) (distance & 255); - } - } - - // update the hash at match boundary - hval = HASH_FUNCTION(input, ip); - htab[hval] = ip++; - hval = HASH_FUNCTION(input, ip); - htab[hval] = ip++; - - // assuming literal copy - output[op++] = MAX_COPY-1; - - continue; - } - - // find potential match - hval = HASH_FUNCTION(input, ip); - hslot = hval; - ref = htab[hval]; - - // calculate distance to the match - distance = anchor - ref; - - // update hash table - htab[hslot] = anchor; - - // is this a match? check the first 3 bytes - if (distance==0 || - (distance >= MAX_FARDISTANCE) || - input[ref++] != input[ip++] || - input[ref++] != input[ip++] || - input[ref++] != input[ip++]) - { - output[op++] = input[anchor++]; - ip = anchor; - copy++; - if (copy == MAX_COPY) { - copy = 0; - output[op++] = MAX_COPY-1; - } - } else { - // far, needs at least 5-byte match - if (distance >= MAX_DISTANCE2) { - if (input[ip++] != input[ref++] || input[ip++] != input[ref++]) { - // LITERAL COPY, code copied from above - output[op++] = input[anchor++]; - ip = anchor; - copy++; - if (copy == MAX_COPY) { - copy = 0; - output[op++] = MAX_COPY-1; - } - continue; - } - len += 2; - } - - // NOTE: all of the following code within this block is copied above - // to line 357 - //match: - - // last matched byte - ip = anchor + len; - - // distance is biased - --distance; - - if (distance == 0) { - // zero distance means a run - byte x = input[ip - 1]; - while (ip < ip_bound) - if (input[ref++] != x) break; - else ++ip; - } else { - for (;;) { - // safe because the outer check against ip limit - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - if(input[ref++] != input[ip++]) break; - while (ip < ip_bound) - if (input[ref++] != input[ip++]) break; - break; - } - } - - // if we have copied something, adjust the copy count - if (copy != 0) - // copy is biased, '0' means 1 byte copy - output[op - copy - 1] = (byte) (copy - 1); - else - // back, to overwrite the copy count - --op; - - // reset literal counter - copy = 0; - - // length is biased, '1' means a match of 3 bytes - ip -= 3; - len = ip - anchor; - - // encode the match - if (distance < MAX_DISTANCE2) { - if (len < 7) { - output[op++] = (byte) ((len << 5) + (distance >>> 8)); - output[op++] = (byte) (distance & 255); - } else { - output[op++] = (byte) ((7 << 5) + (distance >>> 8)); - for (len -= 7; len >= 255; len -= 255) - output[op++] = (byte) 255; - output[op++] = (byte) len; - output[op++] = (byte) (distance & 255); - } - } else { - // far away, but not yet in the another galaxy... - if(len < 7) { - distance -= MAX_DISTANCE2; - output[op++] = (byte) ((len << 5) + 31); - output[op++] = (byte) 255; - output[op++] = (byte) (distance >>> 8); - output[op++] = (byte) (distance & 255); - } else { - distance -= MAX_DISTANCE2; - output[op++] = (byte) ((7 << 5) + 31); - for (len -=7; len >= 255; len -= 255) - output[op++] = (byte) 255; - output[op++] = (byte) len; - output[op++] = (byte) 255; - output[op++] = (byte) (distance >>> 8); - output[op++] = (byte) (distance & 255); - } - } - - // update the hash at match boundary - hval = HASH_FUNCTION(input, ip); - htab[hval] = ip++; - hval = HASH_FUNCTION(input, ip); - htab[hval] = ip++; - - // assuming literal copy - output[op++] = MAX_COPY-1; - } - } - - // left-over as literal copy - ip_bound++; - while (ip <= ip_bound) { - output[op++] = input[ip++]; - copy++; - if (copy == MAX_COPY) { - copy = 0; - output[op++] = MAX_COPY-1; - } - } - - // if we have copied something, adjust the copy length - if (copy != 0) - output[op-copy-1] = (byte) (copy - 1); - else - op--; - - // marker for fastlz2 - output[outputOffset] |= (1 << 5); - - return op - outputOffset; - } - - // ------------------------------------------------------------------------ - - private static int fastlz1_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, - int maxout) { - int ip = inputOffset; - int ip_limit = ip + length; - int op = outputOffset; - int op_limit = op + maxout; - int ctrl = (int) input[ip++] & 31; - boolean loop = true; - - do { - int ref = op; - int len = ctrl >>> 5; - int ofs = (ctrl & 31) << 8; - - if (ctrl >= 32) { - len--; - ref -= ofs; - if (len == 7-1) - len += (int) input[ip++] & 0xff; - ref -= (int) input[ip++] & 0xff; - - if (FASTLZ_SAFE) { - if (op + len + 3 > op_limit) - return 0; - - if (ref-1 < outputOffset) - return 0; - } - - if (ip < ip_limit) - ctrl = (int) input[ip++] & 0xff; - else - loop = false; - - if (ref == op) { - // optimize copy for a run - byte b = output[ref-1]; - output[op++] = b; - output[op++] = b; - output[op++] = b; - for(; len > 0; --len) - output[op++] = b; - } else { - /* copy from reference */ - ref--; - output[op++] = output[ref++]; - output[op++] = output[ref++]; - output[op++] = output[ref++]; - for (; len > 0; --len) - output[op++] = output[ref++]; - } - } else { - ctrl++; - if (FASTLZ_SAFE) { - if (op + ctrl > op_limit) - return 0; - if (ip + ctrl > ip_limit) - return 0; - } - - output[op++] = input[ip++]; - for (--ctrl; ctrl > 0; --ctrl) - output[op++] = input[ip++]; - - loop = ip < ip_limit; - if (loop) - ctrl = (int) input[ip++] & 0xff; - } - } while(loop); - - return op - outputOffset; - } - - private static int fastlz2_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, - int maxout) { - int ip = inputOffset; - int ip_limit = ip + length; - int op = outputOffset; - int op_limit = op + maxout; - int ctrl = (int) input[ip++] & 31; - boolean loop = true; - - do { - int ref = op; - int len = ctrl >>> 5; - int ofs = (ctrl & 31) << 8; - - if (ctrl >= 32) { - int code; - len--; - ref -= ofs; - if (len == 7-1) { - do { - code = (int) input[ip++] & 0xff; - len += code; - } while (code==255); - } - code = (int) input[ip++] & 0xff; - ref -= code; - - // match from 16-bit distance - if (code == 255) { - if (ofs == (31 << 8)) { - ofs = ((int) input[ip++] & 0xff) << 8; - ofs += (int) input[ip++] & 0xff; - ref = op - ofs - MAX_DISTANCE2; - } - } - - if (FASTLZ_SAFE) { - if (op + len + 3 > op_limit) - return 0; - - if (ref-1 < outputOffset) - return 0; - } - - if (ip < ip_limit) - ctrl = (int) input[ip++] & 0xff; - else - loop = false; - - if (ref == op) { - /* optimize copy for a run */ - byte b = output[ref-1]; - output[op++] = b; - output[op++] = b; - output[op++] = b; - for (; len > 0; --len) - output[op++] = b; - } else { - /* copy from reference */ - --ref; - output[op++] = output[ref++]; - output[op++] = output[ref++]; - output[op++] = output[ref++]; - for (; len > 0; --len) - output[op++] = output[ref++]; - } - } else { - ++ctrl; - if (FASTLZ_SAFE) { - if (op + ctrl > op_limit) - return 0; - if (ip + ctrl > ip_limit) - return 0; - } - - output[op++] = input[ip++]; - for (--ctrl; ctrl > 0; --ctrl) - output[op++] = input[ip++]; - - loop = ip < ip_limit; - if (loop) - ctrl = (int) input[ip++] & 0xff; - } - } while (loop); - - return op - outputOffset; - } - -} +/******************************************************************************* + * Copyright (c) 2007, 2011 Association for Decentralized Information Management + * in Industry THTH ry. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * VTT Technical Research Centre of Finland - initial API and implementation + *******************************************************************************/ +package org.simantics.fastlz.java; + + +/** + * @author Tuukka Lehtonen + */ +public class FastLZImpl { + + /** 256 + 8 */ + private static final int MAX_LEN = 264; + private static final int MAX_COPY = 32; + // For level 1 compression + private static final int MAX_DISTANCE = 8192; + // For level 2 compression + private static final int MAX_DISTANCE2 = 8191; + private static final int MAX_FARDISTANCE = (65535 + MAX_DISTANCE2 - 1); + + private static final boolean FASTLZ_SAFE = false; + + public static final int FASTLZ_VERSION = 0x000100; + + public static final int FASTLZ_VERSION_MAJOR = 0; + public static final int FASTLZ_VERSION_MINOR = 1; + public static final int FASTLZ_VERSION_REVISION = 0; + + public static final String FASTLZ_VERSION_STRING = "0.1.0"; + + private static final int HASH_LOG = 13; + private static final int HASH_SIZE = (1 << HASH_LOG); + private static final int HASH_MASK = (HASH_SIZE - 1); + + private static final int FASTLZ_READU16(byte[] array, int offset) { + final int b0 = array[offset] & 0xff; + final int b1 = array[offset + 1] & 0xff; + return b0 | (b1 << 8); + } + + /** + * @param array + * @param offset + * @return + */ + private static final int HASH_FUNCTION(byte[] array, int offset) { + final int b0 = array[offset] & 0xff; + final int b1 = array[offset+1] & 0xff; + final int b2 = array[offset+2] & 0xff; + int v = b0 | (b1 << 8); + final int v2 = b1 | (b2 << 8); + v ^= v2 ^ (v >>> (16 - HASH_LOG)); + v &= HASH_MASK; + return v; + } + + /** + Compress a block of data in the input buffer and returns the size of + compressed block. The size of input buffer is specified by length. The + minimum input buffer size is 16. + + The output buffer must be at least 5% larger than the input buffer + and can not be smaller than 66 bytes. + + If the input is not compressible, the return value might be larger than + length (input buffer size). + + The input buffer and the output buffer can not overlap. + */ + public static int fastlz_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { + /* for short block, choose fastlz1 */ + if (length < 65536) + return fastlz1_compress(input, inputOffset, length, output, outputOffset); + + /* else... */ + return fastlz2_compress(input, inputOffset, length, output, outputOffset); + } + + /** + Decompress a block of compressed data and returns the size of the + decompressed block. If error occurs, e.g. the compressed data is + corrupted or the output buffer is not large enough, then 0 (zero) + will be returned instead. + + The input buffer and the output buffer can not overlap. + + Decompression is memory safe and guaranteed not to write the output buffer + more than what is specified in maxout. + */ + public static int fastlz_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, + int maxout) { + // magic identifier for compression level + int level = (((int) input[0] & 0xff) >>> 5) + 1; + + if (level == 1) + return fastlz1_decompress(input, inputOffset, length, output, outputOffset, maxout); + if (level == 2) + return fastlz2_decompress(input, inputOffset, length, output, outputOffset, maxout); + + // unknown level, trigger error + return 0; + } + + /** + Compress a block of data in the input buffer and returns the size of + compressed block. The size of input buffer is specified by length. The + minimum input buffer size is 16. + + The output buffer must be at least 5% larger than the input buffer + and can not be smaller than 66 bytes. + + If the input is not compressible, the return value might be larger than + length (input buffer size). + + The input buffer and the output buffer can not overlap. + + Compression level can be specified in parameter level. At the moment, + only level 1 and level 2 are supported. + Level 1 is the fastest compression and generally useful for short data. + Level 2 is slightly slower but it gives better compression ratio. + + Note that the compressed data, regardless of the level, can always be + decompressed using the function fastlz_decompress above. + */ + public static int fastlz_compress_level(int level, byte[] input, int inputOffset, int length, byte[] output, + int outputOffset) { + if (level == 1) + return fastlz1_compress(input, inputOffset, length, output, outputOffset); + if (level == 2) + return fastlz2_compress(input, inputOffset, length, output, outputOffset); + + return 0; + } + + + private static int fastlz1_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) { + int ip = inputOffset; + int ip_bound = inputOffset + length - 2; + int ip_limit = inputOffset + length - 12; + int op = outputOffset; + + int[] htab = new int[HASH_SIZE]; + int hslot; + int hval; + + int copy; + + // sanity check + if(length < 4) + { + if(length != 0) + { + // create literal copy only + output[op++] = (byte) (length-1); + ip_bound++; + while(ip <= ip_bound) + output[op++] = input[ip++]; + return length+1; + } + else + return 0; + } + + // initializes hash table + for (hslot = 0; hslot < HASH_SIZE; ++hslot) + htab[hslot] = ip; + + // we start with literal copy + copy = 2; + output[op++] = MAX_COPY-1; + output[op++] = input[ip++]; + output[op++] = input[ip++]; + + // main loop + while (ip < ip_limit) { + int ref; + int distance; + + // minimum match length + int len = 3; + + // comparison starting-point + int anchor = ip; + + // find potential match + hval = HASH_FUNCTION(input, ip); + hslot = hval; + ref = htab[hval]; + + // calculate distance to the match + distance = anchor - ref; + + // update hash table + htab[hslot] = anchor; + + // is this a match? check the first 3 bytes + if (distance==0 || + (distance >= MAX_DISTANCE) || + input[ref++] != input[ip++] || + input[ref++] != input[ip++] || + input[ref++] != input[ip++]) + { + output[op++] = input[anchor++]; + ip = anchor; + copy++; + if (copy == MAX_COPY) { + copy = 0; + output[op++] = MAX_COPY-1; + } + } else { + // last matched byte + ip = anchor + len; + + // distance is biased + --distance; + + if(distance == 0) { + // zero distance means a run + byte x = input[ip - 1]; + while (ip < ip_bound) + if (input[ref++] != x) break; + else ip++; + } else { + for (;;) { + // safe because the outer check against ip limit + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + while (ip < ip_bound) + if (input[ref++] != input[ip++]) break; + break; + } + } + + // if we have copied something, adjust the copy count + if (copy != 0) + // copy is biased, '0' means 1 byte copy + output[op-copy-1] = (byte)(copy-1); + else + // back, to overwrite the copy count + op--; + + // reset literal counter + copy = 0; + + // length is biased, '1' means a match of 3 bytes + ip -= 3; + len = ip - anchor; + + // encode the match + if (len > MAX_LEN - 2) + while (len > MAX_LEN - 2) { + output[op++] = (byte) ((7 << 5) + (distance >>> 8)); + output[op++] = (byte) (MAX_LEN - 2 - 7 -2); + output[op++] = (byte) (distance & 0xff); + len -= MAX_LEN - 2; + } + + if (len < 7) { + output[op++] = (byte) ((len << 5) + (distance >>> 8)); + output[op++] = (byte) (distance & 255); + } else { + output[op++] = (byte) ((7 << 5) + (distance >>> 8)); + output[op++] = (byte) (len - 7); + output[op++] = (byte) (distance & 255); + } + + // update the hash at match boundary + hval = HASH_FUNCTION(input, ip); + htab[hval] = ip++; + hval = HASH_FUNCTION(input, ip); + htab[hval] = ip++; + + // assuming literal copy + output[op++] = MAX_COPY-1; + } + } + + // left-over as literal copy + ip_bound++; + while (ip <= ip_bound) { + output[op++] = input[ip++]; + copy++; + if (copy == MAX_COPY) { + copy = 0; + output[op++] = MAX_COPY-1; + } + } + + // if we have copied something, adjust the copy length + if (copy != 0) + output[op - copy - 1] = (byte) (copy - 1); + else + --op; + + return op - outputOffset; + } + + private static int fastlz2_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) + { + int ip = inputOffset; + int ip_bound = ip + length - 2; + int ip_limit = ip + length - 12; + int op = outputOffset; + + int[] htab = new int[HASH_SIZE]; + int hslot; + int hval; + + int copy; + + // sanity check + if (length < 4) { + if (length != 0) { + // create literal copy only + output[op++] = (byte) (length - 1); + ip_bound++; + while(ip <= ip_bound) + output[op++] = input[ip++]; + return length + 1; + } else + return 0; + } + + // initializes hash table + for (hslot = 0; hslot < HASH_SIZE; ++hslot) + htab[hslot] = ip; + + // we start with literal copy + copy = 2; + output[op++] = MAX_COPY-1; + output[op++] = input[ip++]; + output[op++] = input[ip++]; + + // main loop + while (ip < ip_limit) { + int ref; + int distance; + + // minimum match length + int len = 3; + + // comparison starting-point + int anchor = ip; + + // check for a run + if (input[ip] == input[ip-1] && FASTLZ_READU16(input, ip-1) == FASTLZ_READU16(input, ip+1)) { + distance = 1; + ip += 3; + ref = anchor - 1 + 3; + + // NOTE: cannot do this goto, so copied the code here from line 487 + //goto match; + + // last matched byte + ip = anchor + len; + + // distance is biased + --distance; + + if (distance == 0) { + // zero distance means a run + byte x = input[ip - 1]; + while (ip < ip_bound) + if (input[ref++] != x) break; + else ++ip; + } else { + for (;;) { + // safe because the outer check against ip limit + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + while (ip < ip_bound) + if (input[ref++] != input[ip++]) break; + break; + } + } + + // if we have copied something, adjust the copy count + if (copy != 0) + // copy is biased, '0' means 1 byte copy + output[op - copy - 1] = (byte) (copy - 1); + else + // back, to overwrite the copy count + --op; + + // reset literal counter + copy = 0; + + // length is biased, '1' means a match of 3 bytes + ip -= 3; + len = ip - anchor; + + // encode the match + if (distance < MAX_DISTANCE2) { + if (len < 7) { + output[op++] = (byte) ((len << 5) + (distance >>> 8)); + output[op++] = (byte) (distance & 255); + } else { + output[op++] = (byte) ((7 << 5) + (distance >>> 8)); + for (len -= 7; len >= 255; len -= 255) + output[op++] = (byte) 255; + output[op++] = (byte) len; + output[op++] = (byte) (distance & 255); + } + } else { + // far away, but not yet in the another galaxy... + if(len < 7) { + distance -= MAX_DISTANCE2; + output[op++] = (byte) ((len << 5) + 31); + output[op++] = (byte) 255; + output[op++] = (byte) (distance >>> 8); + output[op++] = (byte) (distance & 255); + } else { + distance -= MAX_DISTANCE2; + output[op++] = (byte) ((7 << 5) + 31); + for (len -=7; len >= 255; len -= 255) + output[op++] = (byte) 255; + output[op++] = (byte) len; + output[op++] = (byte) 255; + output[op++] = (byte) (distance >>> 8); + output[op++] = (byte) (distance & 255); + } + } + + // update the hash at match boundary + hval = HASH_FUNCTION(input, ip); + htab[hval] = ip++; + hval = HASH_FUNCTION(input, ip); + htab[hval] = ip++; + + // assuming literal copy + output[op++] = MAX_COPY-1; + + continue; + } + + // find potential match + hval = HASH_FUNCTION(input, ip); + hslot = hval; + ref = htab[hval]; + + // calculate distance to the match + distance = anchor - ref; + + // update hash table + htab[hslot] = anchor; + + // is this a match? check the first 3 bytes + if (distance==0 || + (distance >= MAX_FARDISTANCE) || + input[ref++] != input[ip++] || + input[ref++] != input[ip++] || + input[ref++] != input[ip++]) + { + output[op++] = input[anchor++]; + ip = anchor; + copy++; + if (copy == MAX_COPY) { + copy = 0; + output[op++] = MAX_COPY-1; + } + } else { + // far, needs at least 5-byte match + if (distance >= MAX_DISTANCE2) { + if (input[ip++] != input[ref++] || input[ip++] != input[ref++]) { + // LITERAL COPY, code copied from above + output[op++] = input[anchor++]; + ip = anchor; + copy++; + if (copy == MAX_COPY) { + copy = 0; + output[op++] = MAX_COPY-1; + } + continue; + } + len += 2; + } + + // NOTE: all of the following code within this block is copied above + // to line 357 + //match: + + // last matched byte + ip = anchor + len; + + // distance is biased + --distance; + + if (distance == 0) { + // zero distance means a run + byte x = input[ip - 1]; + while (ip < ip_bound) + if (input[ref++] != x) break; + else ++ip; + } else { + for (;;) { + // safe because the outer check against ip limit + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + if(input[ref++] != input[ip++]) break; + while (ip < ip_bound) + if (input[ref++] != input[ip++]) break; + break; + } + } + + // if we have copied something, adjust the copy count + if (copy != 0) + // copy is biased, '0' means 1 byte copy + output[op - copy - 1] = (byte) (copy - 1); + else + // back, to overwrite the copy count + --op; + + // reset literal counter + copy = 0; + + // length is biased, '1' means a match of 3 bytes + ip -= 3; + len = ip - anchor; + + // encode the match + if (distance < MAX_DISTANCE2) { + if (len < 7) { + output[op++] = (byte) ((len << 5) + (distance >>> 8)); + output[op++] = (byte) (distance & 255); + } else { + output[op++] = (byte) ((7 << 5) + (distance >>> 8)); + for (len -= 7; len >= 255; len -= 255) + output[op++] = (byte) 255; + output[op++] = (byte) len; + output[op++] = (byte) (distance & 255); + } + } else { + // far away, but not yet in the another galaxy... + if(len < 7) { + distance -= MAX_DISTANCE2; + output[op++] = (byte) ((len << 5) + 31); + output[op++] = (byte) 255; + output[op++] = (byte) (distance >>> 8); + output[op++] = (byte) (distance & 255); + } else { + distance -= MAX_DISTANCE2; + output[op++] = (byte) ((7 << 5) + 31); + for (len -=7; len >= 255; len -= 255) + output[op++] = (byte) 255; + output[op++] = (byte) len; + output[op++] = (byte) 255; + output[op++] = (byte) (distance >>> 8); + output[op++] = (byte) (distance & 255); + } + } + + // update the hash at match boundary + hval = HASH_FUNCTION(input, ip); + htab[hval] = ip++; + hval = HASH_FUNCTION(input, ip); + htab[hval] = ip++; + + // assuming literal copy + output[op++] = MAX_COPY-1; + } + } + + // left-over as literal copy + ip_bound++; + while (ip <= ip_bound) { + output[op++] = input[ip++]; + copy++; + if (copy == MAX_COPY) { + copy = 0; + output[op++] = MAX_COPY-1; + } + } + + // if we have copied something, adjust the copy length + if (copy != 0) + output[op-copy-1] = (byte) (copy - 1); + else + op--; + + // marker for fastlz2 + output[outputOffset] |= (1 << 5); + + return op - outputOffset; + } + + // ------------------------------------------------------------------------ + + private static int fastlz1_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, + int maxout) { + int ip = inputOffset; + int ip_limit = ip + length; + int op = outputOffset; + int op_limit = op + maxout; + int ctrl = (int) input[ip++] & 31; + boolean loop = true; + + do { + int ref = op; + int len = ctrl >>> 5; + int ofs = (ctrl & 31) << 8; + + if (ctrl >= 32) { + len--; + ref -= ofs; + if (len == 7-1) + len += (int) input[ip++] & 0xff; + ref -= (int) input[ip++] & 0xff; + + if (FASTLZ_SAFE) { + if (op + len + 3 > op_limit) + return 0; + + if (ref-1 < outputOffset) + return 0; + } + + if (ip < ip_limit) + ctrl = (int) input[ip++] & 0xff; + else + loop = false; + + if (ref == op) { + // optimize copy for a run + byte b = output[ref-1]; + output[op++] = b; + output[op++] = b; + output[op++] = b; + for(; len > 0; --len) + output[op++] = b; + } else { + /* copy from reference */ + ref--; + output[op++] = output[ref++]; + output[op++] = output[ref++]; + output[op++] = output[ref++]; + for (; len > 0; --len) + output[op++] = output[ref++]; + } + } else { + ctrl++; + if (FASTLZ_SAFE) { + if (op + ctrl > op_limit) + return 0; + if (ip + ctrl > ip_limit) + return 0; + } + + output[op++] = input[ip++]; + for (--ctrl; ctrl > 0; --ctrl) + output[op++] = input[ip++]; + + loop = ip < ip_limit; + if (loop) + ctrl = (int) input[ip++] & 0xff; + } + } while(loop); + + return op - outputOffset; + } + + private static int fastlz2_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset, + int maxout) { + int ip = inputOffset; + int ip_limit = ip + length; + int op = outputOffset; + int op_limit = op + maxout; + int ctrl = (int) input[ip++] & 31; + boolean loop = true; + + do { + int ref = op; + int len = ctrl >>> 5; + int ofs = (ctrl & 31) << 8; + + if (ctrl >= 32) { + int code; + len--; + ref -= ofs; + if (len == 7-1) { + do { + code = (int) input[ip++] & 0xff; + len += code; + } while (code==255); + } + code = (int) input[ip++] & 0xff; + ref -= code; + + // match from 16-bit distance + if (code == 255) { + if (ofs == (31 << 8)) { + ofs = ((int) input[ip++] & 0xff) << 8; + ofs += (int) input[ip++] & 0xff; + ref = op - ofs - MAX_DISTANCE2; + } + } + + if (FASTLZ_SAFE) { + if (op + len + 3 > op_limit) + return 0; + + if (ref-1 < outputOffset) + return 0; + } + + if (ip < ip_limit) + ctrl = (int) input[ip++] & 0xff; + else + loop = false; + + if (ref == op) { + /* optimize copy for a run */ + byte b = output[ref-1]; + output[op++] = b; + output[op++] = b; + output[op++] = b; + for (; len > 0; --len) + output[op++] = b; + } else { + /* copy from reference */ + --ref; + output[op++] = output[ref++]; + output[op++] = output[ref++]; + output[op++] = output[ref++]; + for (; len > 0; --len) + output[op++] = output[ref++]; + } + } else { + ++ctrl; + if (FASTLZ_SAFE) { + if (op + ctrl > op_limit) + return 0; + if (ip + ctrl > ip_limit) + return 0; + } + + output[op++] = input[ip++]; + for (--ctrl; ctrl > 0; --ctrl) + output[op++] = input[ip++]; + + loop = ip < ip_limit; + if (loop) + ctrl = (int) input[ip++] & 0xff; + } + } while (loop); + + return op - outputOffset; + } + +}