-/*******************************************************************************\r
- * Copyright (c) 2007, 2011 Association for Decentralized Information Management\r
- * in Industry THTH ry.\r
- * All rights reserved. This program and the accompanying materials\r
- * are made available under the terms of the Eclipse Public License v1.0\r
- * which accompanies this distribution, and is available at\r
- * http://www.eclipse.org/legal/epl-v10.html\r
- *\r
- * Contributors:\r
- * VTT Technical Research Centre of Finland - initial API and implementation\r
- *******************************************************************************/\r
-package org.simantics.fastlz.java;\r
-\r
-\r
-/**\r
- * @author Tuukka Lehtonen\r
- */\r
-public class FastLZImpl {\r
-\r
- /** 256 + 8 */\r
- private static final int MAX_LEN = 264;\r
- private static final int MAX_COPY = 32;\r
- // For level 1 compression\r
- private static final int MAX_DISTANCE = 8192;\r
- // For level 2 compression\r
- private static final int MAX_DISTANCE2 = 8191;\r
- private static final int MAX_FARDISTANCE = (65535 + MAX_DISTANCE2 - 1);\r
-\r
- private static final boolean FASTLZ_SAFE = false;\r
-\r
- public static final int FASTLZ_VERSION = 0x000100;\r
-\r
- public static final int FASTLZ_VERSION_MAJOR = 0;\r
- public static final int FASTLZ_VERSION_MINOR = 1;\r
- public static final int FASTLZ_VERSION_REVISION = 0;\r
-\r
- public static final String FASTLZ_VERSION_STRING = "0.1.0";\r
-\r
- private static final int HASH_LOG = 13;\r
- private static final int HASH_SIZE = (1 << HASH_LOG);\r
- private static final int HASH_MASK = (HASH_SIZE - 1);\r
-\r
- private static final int FASTLZ_READU16(byte[] array, int offset) {\r
- final int b0 = array[offset] & 0xff; \r
- final int b1 = array[offset + 1] & 0xff; \r
- return b0 | (b1 << 8);\r
- }\r
-\r
- /**\r
- * @param array\r
- * @param offset\r
- * @return\r
- */\r
- private static final int HASH_FUNCTION(byte[] array, int offset) {\r
- final int b0 = array[offset] & 0xff;\r
- final int b1 = array[offset+1] & 0xff;\r
- final int b2 = array[offset+2] & 0xff;\r
- int v = b0 | (b1 << 8);\r
- final int v2 = b1 | (b2 << 8);\r
- v ^= v2 ^ (v >>> (16 - HASH_LOG));\r
- v &= HASH_MASK;\r
- return v;\r
- }\r
-\r
- /**\r
- Compress a block of data in the input buffer and returns the size of \r
- compressed block. The size of input buffer is specified by length. The \r
- minimum input buffer size is 16.\r
-\r
- The output buffer must be at least 5% larger than the input buffer \r
- and can not be smaller than 66 bytes.\r
-\r
- If the input is not compressible, the return value might be larger than\r
- length (input buffer size).\r
-\r
- The input buffer and the output buffer can not overlap.\r
- */\r
- public static int fastlz_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {\r
- /* for short block, choose fastlz1 */\r
- if (length < 65536)\r
- return fastlz1_compress(input, inputOffset, length, output, outputOffset);\r
-\r
- /* else... */\r
- return fastlz2_compress(input, inputOffset, length, output, outputOffset);\r
- }\r
-\r
- /**\r
- Decompress a block of compressed data and returns the size of the \r
- decompressed block. If error occurs, e.g. the compressed data is \r
- corrupted or the output buffer is not large enough, then 0 (zero) \r
- will be returned instead.\r
-\r
- The input buffer and the output buffer can not overlap.\r
-\r
- Decompression is memory safe and guaranteed not to write the output buffer\r
- more than what is specified in maxout.\r
- */\r
- public static int fastlz_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,\r
- int maxout) {\r
- // magic identifier for compression level\r
- int level = (((int) input[0] & 0xff) >>> 5) + 1;\r
-\r
- if (level == 1)\r
- return fastlz1_decompress(input, inputOffset, length, output, outputOffset, maxout);\r
- if (level == 2)\r
- return fastlz2_decompress(input, inputOffset, length, output, outputOffset, maxout);\r
-\r
- // unknown level, trigger error\r
- return 0;\r
- }\r
-\r
- /**\r
- Compress a block of data in the input buffer and returns the size of \r
- compressed block. The size of input buffer is specified by length. The \r
- minimum input buffer size is 16.\r
-\r
- The output buffer must be at least 5% larger than the input buffer \r
- and can not be smaller than 66 bytes.\r
-\r
- If the input is not compressible, the return value might be larger than\r
- length (input buffer size).\r
-\r
- The input buffer and the output buffer can not overlap.\r
-\r
- Compression level can be specified in parameter level. At the moment, \r
- only level 1 and level 2 are supported.\r
- Level 1 is the fastest compression and generally useful for short data.\r
- Level 2 is slightly slower but it gives better compression ratio.\r
-\r
- Note that the compressed data, regardless of the level, can always be\r
- decompressed using the function fastlz_decompress above.\r
- */\r
- public static int fastlz_compress_level(int level, byte[] input, int inputOffset, int length, byte[] output,\r
- int outputOffset) {\r
- if (level == 1)\r
- return fastlz1_compress(input, inputOffset, length, output, outputOffset);\r
- if (level == 2)\r
- return fastlz2_compress(input, inputOffset, length, output, outputOffset);\r
-\r
- return 0;\r
- }\r
-\r
-\r
- private static int fastlz1_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {\r
- int ip = inputOffset;\r
- int ip_bound = inputOffset + length - 2;\r
- int ip_limit = inputOffset + length - 12;\r
- int op = outputOffset;\r
-\r
- int[] htab = new int[HASH_SIZE];\r
- int hslot;\r
- int hval;\r
-\r
- int copy;\r
-\r
- // sanity check\r
- if(length < 4)\r
- {\r
- if(length != 0)\r
- {\r
- // create literal copy only\r
- output[op++] = (byte) (length-1);\r
- ip_bound++;\r
- while(ip <= ip_bound)\r
- output[op++] = input[ip++];\r
- return length+1;\r
- }\r
- else\r
- return 0;\r
- }\r
-\r
- // initializes hash table\r
- for (hslot = 0; hslot < HASH_SIZE; ++hslot)\r
- htab[hslot] = ip;\r
-\r
- // we start with literal copy\r
- copy = 2;\r
- output[op++] = MAX_COPY-1;\r
- output[op++] = input[ip++];\r
- output[op++] = input[ip++];\r
-\r
- // main loop\r
- while (ip < ip_limit) {\r
- int ref;\r
- int distance;\r
-\r
- // minimum match length\r
- int len = 3;\r
-\r
- // comparison starting-point\r
- int anchor = ip;\r
-\r
- // find potential match\r
- hval = HASH_FUNCTION(input, ip);\r
- hslot = hval;\r
- ref = htab[hval];\r
-\r
- // calculate distance to the match\r
- distance = anchor - ref;\r
-\r
- // update hash table\r
- htab[hslot] = anchor;\r
-\r
- // is this a match? check the first 3 bytes\r
- if (distance==0 ||\r
- (distance >= MAX_DISTANCE) ||\r
- input[ref++] != input[ip++] ||\r
- input[ref++] != input[ip++] ||\r
- input[ref++] != input[ip++])\r
- {\r
- output[op++] = input[anchor++];\r
- ip = anchor;\r
- copy++;\r
- if (copy == MAX_COPY) {\r
- copy = 0;\r
- output[op++] = MAX_COPY-1;\r
- }\r
- } else {\r
- // last matched byte\r
- ip = anchor + len;\r
-\r
- // distance is biased\r
- --distance;\r
-\r
- if(distance == 0) {\r
- // zero distance means a run\r
- byte x = input[ip - 1];\r
- while (ip < ip_bound)\r
- if (input[ref++] != x) break;\r
- else ip++;\r
- } else {\r
- for (;;) {\r
- // safe because the outer check against ip limit\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- while (ip < ip_bound)\r
- if (input[ref++] != input[ip++]) break;\r
- break;\r
- }\r
- }\r
-\r
- // if we have copied something, adjust the copy count\r
- if (copy != 0)\r
- // copy is biased, '0' means 1 byte copy\r
- output[op-copy-1] = (byte)(copy-1);\r
- else\r
- // back, to overwrite the copy count\r
- op--;\r
-\r
- // reset literal counter\r
- copy = 0;\r
-\r
- // length is biased, '1' means a match of 3 bytes\r
- ip -= 3;\r
- len = ip - anchor;\r
-\r
- // encode the match\r
- if (len > MAX_LEN - 2)\r
- while (len > MAX_LEN - 2) {\r
- output[op++] = (byte) ((7 << 5) + (distance >>> 8));\r
- output[op++] = (byte) (MAX_LEN - 2 - 7 -2); \r
- output[op++] = (byte) (distance & 0xff);\r
- len -= MAX_LEN - 2;\r
- }\r
-\r
- if (len < 7) {\r
- output[op++] = (byte) ((len << 5) + (distance >>> 8));\r
- output[op++] = (byte) (distance & 255);\r
- } else {\r
- output[op++] = (byte) ((7 << 5) + (distance >>> 8));\r
- output[op++] = (byte) (len - 7);\r
- output[op++] = (byte) (distance & 255);\r
- }\r
-\r
- // update the hash at match boundary\r
- hval = HASH_FUNCTION(input, ip);\r
- htab[hval] = ip++;\r
- hval = HASH_FUNCTION(input, ip);\r
- htab[hval] = ip++;\r
-\r
- // assuming literal copy\r
- output[op++] = MAX_COPY-1;\r
- }\r
- }\r
-\r
- // left-over as literal copy\r
- ip_bound++;\r
- while (ip <= ip_bound) {\r
- output[op++] = input[ip++];\r
- copy++;\r
- if (copy == MAX_COPY) {\r
- copy = 0;\r
- output[op++] = MAX_COPY-1;\r
- }\r
- }\r
-\r
- // if we have copied something, adjust the copy length\r
- if (copy != 0)\r
- output[op - copy - 1] = (byte) (copy - 1);\r
- else\r
- --op;\r
-\r
- return op - outputOffset;\r
- }\r
-\r
- private static int fastlz2_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset)\r
- {\r
- int ip = inputOffset;\r
- int ip_bound = ip + length - 2;\r
- int ip_limit = ip + length - 12;\r
- int op = outputOffset;\r
-\r
- int[] htab = new int[HASH_SIZE];\r
- int hslot;\r
- int hval;\r
-\r
- int copy;\r
-\r
- // sanity check\r
- if (length < 4) {\r
- if (length != 0) {\r
- // create literal copy only\r
- output[op++] = (byte) (length - 1);\r
- ip_bound++;\r
- while(ip <= ip_bound)\r
- output[op++] = input[ip++];\r
- return length + 1;\r
- } else\r
- return 0;\r
- }\r
-\r
- // initializes hash table\r
- for (hslot = 0; hslot < HASH_SIZE; ++hslot)\r
- htab[hslot] = ip;\r
-\r
- // we start with literal copy\r
- copy = 2;\r
- output[op++] = MAX_COPY-1;\r
- output[op++] = input[ip++];\r
- output[op++] = input[ip++];\r
-\r
- // main loop\r
- while (ip < ip_limit) {\r
- int ref;\r
- int distance;\r
-\r
- // minimum match length\r
- int len = 3;\r
-\r
- // comparison starting-point\r
- int anchor = ip;\r
-\r
- // check for a run\r
- if (input[ip] == input[ip-1] && FASTLZ_READU16(input, ip-1) == FASTLZ_READU16(input, ip+1)) {\r
- distance = 1;\r
- ip += 3;\r
- ref = anchor - 1 + 3;\r
-\r
- // NOTE: cannot do this goto, so copied the code here from line 487\r
- //goto match;\r
-\r
- // last matched byte\r
- ip = anchor + len;\r
-\r
- // distance is biased\r
- --distance;\r
-\r
- if (distance == 0) {\r
- // zero distance means a run\r
- byte x = input[ip - 1];\r
- while (ip < ip_bound)\r
- if (input[ref++] != x) break;\r
- else ++ip;\r
- } else {\r
- for (;;) {\r
- // safe because the outer check against ip limit\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- while (ip < ip_bound)\r
- if (input[ref++] != input[ip++]) break;\r
- break;\r
- }\r
- }\r
-\r
- // if we have copied something, adjust the copy count\r
- if (copy != 0)\r
- // copy is biased, '0' means 1 byte copy\r
- output[op - copy - 1] = (byte) (copy - 1);\r
- else\r
- // back, to overwrite the copy count\r
- --op;\r
-\r
- // reset literal counter\r
- copy = 0;\r
-\r
- // length is biased, '1' means a match of 3 bytes\r
- ip -= 3;\r
- len = ip - anchor;\r
-\r
- // encode the match\r
- if (distance < MAX_DISTANCE2) {\r
- if (len < 7) {\r
- output[op++] = (byte) ((len << 5) + (distance >>> 8));\r
- output[op++] = (byte) (distance & 255);\r
- } else {\r
- output[op++] = (byte) ((7 << 5) + (distance >>> 8));\r
- for (len -= 7; len >= 255; len -= 255)\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) len;\r
- output[op++] = (byte) (distance & 255);\r
- }\r
- } else {\r
- // far away, but not yet in the another galaxy...\r
- if(len < 7) {\r
- distance -= MAX_DISTANCE2;\r
- output[op++] = (byte) ((len << 5) + 31);\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) (distance >>> 8);\r
- output[op++] = (byte) (distance & 255);\r
- } else {\r
- distance -= MAX_DISTANCE2;\r
- output[op++] = (byte) ((7 << 5) + 31);\r
- for (len -=7; len >= 255; len -= 255)\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) len;\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) (distance >>> 8);\r
- output[op++] = (byte) (distance & 255);\r
- }\r
- }\r
-\r
- // update the hash at match boundary\r
- hval = HASH_FUNCTION(input, ip);\r
- htab[hval] = ip++;\r
- hval = HASH_FUNCTION(input, ip);\r
- htab[hval] = ip++;\r
-\r
- // assuming literal copy\r
- output[op++] = MAX_COPY-1;\r
-\r
- continue;\r
- }\r
-\r
- // find potential match\r
- hval = HASH_FUNCTION(input, ip);\r
- hslot = hval;\r
- ref = htab[hval];\r
-\r
- // calculate distance to the match\r
- distance = anchor - ref;\r
-\r
- // update hash table\r
- htab[hslot] = anchor;\r
-\r
- // is this a match? check the first 3 bytes\r
- if (distance==0 || \r
- (distance >= MAX_FARDISTANCE) ||\r
- input[ref++] != input[ip++] ||\r
- input[ref++] != input[ip++] ||\r
- input[ref++] != input[ip++])\r
- {\r
- output[op++] = input[anchor++];\r
- ip = anchor;\r
- copy++;\r
- if (copy == MAX_COPY) {\r
- copy = 0;\r
- output[op++] = MAX_COPY-1;\r
- }\r
- } else {\r
- // far, needs at least 5-byte match\r
- if (distance >= MAX_DISTANCE2) {\r
- if (input[ip++] != input[ref++] || input[ip++] != input[ref++]) {\r
- // LITERAL COPY, code copied from above\r
- output[op++] = input[anchor++];\r
- ip = anchor;\r
- copy++;\r
- if (copy == MAX_COPY) {\r
- copy = 0;\r
- output[op++] = MAX_COPY-1;\r
- }\r
- continue;\r
- }\r
- len += 2;\r
- }\r
-\r
- // NOTE: all of the following code within this block is copied above\r
- // to line 357\r
- //match:\r
-\r
- // last matched byte\r
- ip = anchor + len;\r
-\r
- // distance is biased\r
- --distance;\r
-\r
- if (distance == 0) {\r
- // zero distance means a run\r
- byte x = input[ip - 1];\r
- while (ip < ip_bound)\r
- if (input[ref++] != x) break;\r
- else ++ip;\r
- } else {\r
- for (;;) {\r
- // safe because the outer check against ip limit\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- if(input[ref++] != input[ip++]) break;\r
- while (ip < ip_bound)\r
- if (input[ref++] != input[ip++]) break;\r
- break;\r
- }\r
- }\r
-\r
- // if we have copied something, adjust the copy count\r
- if (copy != 0)\r
- // copy is biased, '0' means 1 byte copy\r
- output[op - copy - 1] = (byte) (copy - 1);\r
- else\r
- // back, to overwrite the copy count\r
- --op;\r
-\r
- // reset literal counter\r
- copy = 0;\r
-\r
- // length is biased, '1' means a match of 3 bytes\r
- ip -= 3;\r
- len = ip - anchor;\r
-\r
- // encode the match\r
- if (distance < MAX_DISTANCE2) {\r
- if (len < 7) {\r
- output[op++] = (byte) ((len << 5) + (distance >>> 8));\r
- output[op++] = (byte) (distance & 255);\r
- } else {\r
- output[op++] = (byte) ((7 << 5) + (distance >>> 8));\r
- for (len -= 7; len >= 255; len -= 255)\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) len;\r
- output[op++] = (byte) (distance & 255);\r
- }\r
- } else {\r
- // far away, but not yet in the another galaxy...\r
- if(len < 7) {\r
- distance -= MAX_DISTANCE2;\r
- output[op++] = (byte) ((len << 5) + 31);\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) (distance >>> 8);\r
- output[op++] = (byte) (distance & 255);\r
- } else {\r
- distance -= MAX_DISTANCE2;\r
- output[op++] = (byte) ((7 << 5) + 31);\r
- for (len -=7; len >= 255; len -= 255)\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) len;\r
- output[op++] = (byte) 255;\r
- output[op++] = (byte) (distance >>> 8);\r
- output[op++] = (byte) (distance & 255);\r
- }\r
- }\r
-\r
- // update the hash at match boundary\r
- hval = HASH_FUNCTION(input, ip);\r
- htab[hval] = ip++;\r
- hval = HASH_FUNCTION(input, ip);\r
- htab[hval] = ip++;\r
-\r
- // assuming literal copy\r
- output[op++] = MAX_COPY-1;\r
- }\r
- }\r
-\r
- // left-over as literal copy\r
- ip_bound++;\r
- while (ip <= ip_bound) {\r
- output[op++] = input[ip++];\r
- copy++;\r
- if (copy == MAX_COPY) {\r
- copy = 0;\r
- output[op++] = MAX_COPY-1;\r
- }\r
- }\r
-\r
- // if we have copied something, adjust the copy length\r
- if (copy != 0)\r
- output[op-copy-1] = (byte) (copy - 1);\r
- else\r
- op--;\r
-\r
- // marker for fastlz2\r
- output[outputOffset] |= (1 << 5);\r
-\r
- return op - outputOffset;\r
- }\r
-\r
- // ------------------------------------------------------------------------\r
-\r
- private static int fastlz1_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,\r
- int maxout) {\r
- int ip = inputOffset;\r
- int ip_limit = ip + length;\r
- int op = outputOffset;\r
- int op_limit = op + maxout;\r
- int ctrl = (int) input[ip++] & 31;\r
- boolean loop = true;\r
-\r
- do {\r
- int ref = op;\r
- int len = ctrl >>> 5;\r
- int ofs = (ctrl & 31) << 8;\r
-\r
- if (ctrl >= 32) {\r
- len--;\r
- ref -= ofs;\r
- if (len == 7-1)\r
- len += (int) input[ip++] & 0xff;\r
- ref -= (int) input[ip++] & 0xff;\r
-\r
- if (FASTLZ_SAFE) {\r
- if (op + len + 3 > op_limit)\r
- return 0;\r
-\r
- if (ref-1 < outputOffset)\r
- return 0;\r
- }\r
-\r
- if (ip < ip_limit)\r
- ctrl = (int) input[ip++] & 0xff;\r
- else\r
- loop = false;\r
-\r
- if (ref == op) {\r
- // optimize copy for a run\r
- byte b = output[ref-1];\r
- output[op++] = b;\r
- output[op++] = b;\r
- output[op++] = b;\r
- for(; len > 0; --len)\r
- output[op++] = b;\r
- } else {\r
- /* copy from reference */\r
- ref--;\r
- output[op++] = output[ref++];\r
- output[op++] = output[ref++];\r
- output[op++] = output[ref++];\r
- for (; len > 0; --len)\r
- output[op++] = output[ref++];\r
- }\r
- } else {\r
- ctrl++;\r
- if (FASTLZ_SAFE) {\r
- if (op + ctrl > op_limit)\r
- return 0;\r
- if (ip + ctrl > ip_limit)\r
- return 0;\r
- }\r
-\r
- output[op++] = input[ip++];\r
- for (--ctrl; ctrl > 0; --ctrl)\r
- output[op++] = input[ip++];\r
-\r
- loop = ip < ip_limit;\r
- if (loop)\r
- ctrl = (int) input[ip++] & 0xff;\r
- }\r
- } while(loop);\r
-\r
- return op - outputOffset;\r
- }\r
-\r
- private static int fastlz2_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,\r
- int maxout) {\r
- int ip = inputOffset;\r
- int ip_limit = ip + length;\r
- int op = outputOffset;\r
- int op_limit = op + maxout;\r
- int ctrl = (int) input[ip++] & 31;\r
- boolean loop = true;\r
-\r
- do {\r
- int ref = op;\r
- int len = ctrl >>> 5;\r
- int ofs = (ctrl & 31) << 8;\r
-\r
- if (ctrl >= 32) {\r
- int code;\r
- len--;\r
- ref -= ofs;\r
- if (len == 7-1) {\r
- do {\r
- code = (int) input[ip++] & 0xff;\r
- len += code;\r
- } while (code==255);\r
- }\r
- code = (int) input[ip++] & 0xff;\r
- ref -= code;\r
-\r
- // match from 16-bit distance\r
- if (code == 255) {\r
- if (ofs == (31 << 8)) {\r
- ofs = ((int) input[ip++] & 0xff) << 8;\r
- ofs += (int) input[ip++] & 0xff;\r
- ref = op - ofs - MAX_DISTANCE2;\r
- }\r
- }\r
-\r
- if (FASTLZ_SAFE) {\r
- if (op + len + 3 > op_limit)\r
- return 0;\r
-\r
- if (ref-1 < outputOffset)\r
- return 0;\r
- }\r
-\r
- if (ip < ip_limit)\r
- ctrl = (int) input[ip++] & 0xff;\r
- else\r
- loop = false;\r
-\r
- if (ref == op) {\r
- /* optimize copy for a run */\r
- byte b = output[ref-1];\r
- output[op++] = b;\r
- output[op++] = b;\r
- output[op++] = b;\r
- for (; len > 0; --len)\r
- output[op++] = b;\r
- } else {\r
- /* copy from reference */\r
- --ref;\r
- output[op++] = output[ref++];\r
- output[op++] = output[ref++];\r
- output[op++] = output[ref++];\r
- for (; len > 0; --len)\r
- output[op++] = output[ref++];\r
- }\r
- } else {\r
- ++ctrl;\r
- if (FASTLZ_SAFE) {\r
- if (op + ctrl > op_limit)\r
- return 0;\r
- if (ip + ctrl > ip_limit)\r
- return 0;\r
- }\r
-\r
- output[op++] = input[ip++]; \r
- for (--ctrl; ctrl > 0; --ctrl)\r
- output[op++] = input[ip++];\r
-\r
- loop = ip < ip_limit;\r
- if (loop)\r
- ctrl = (int) input[ip++] & 0xff;\r
- }\r
- } while (loop);\r
-\r
- return op - outputOffset;\r
- }\r
-\r
-}\r
+/*******************************************************************************
+ * Copyright (c) 2007, 2011 Association for Decentralized Information Management
+ * in Industry THTH ry.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * VTT Technical Research Centre of Finland - initial API and implementation
+ *******************************************************************************/
+package org.simantics.fastlz.java;
+
+
+/**
+ * @author Tuukka Lehtonen
+ */
+public class FastLZImpl {
+
+ /** 256 + 8 */
+ private static final int MAX_LEN = 264;
+ private static final int MAX_COPY = 32;
+ // For level 1 compression
+ private static final int MAX_DISTANCE = 8192;
+ // For level 2 compression
+ private static final int MAX_DISTANCE2 = 8191;
+ private static final int MAX_FARDISTANCE = (65535 + MAX_DISTANCE2 - 1);
+
+ private static final boolean FASTLZ_SAFE = false;
+
+ public static final int FASTLZ_VERSION = 0x000100;
+
+ public static final int FASTLZ_VERSION_MAJOR = 0;
+ public static final int FASTLZ_VERSION_MINOR = 1;
+ public static final int FASTLZ_VERSION_REVISION = 0;
+
+ public static final String FASTLZ_VERSION_STRING = "0.1.0";
+
+ private static final int HASH_LOG = 13;
+ private static final int HASH_SIZE = (1 << HASH_LOG);
+ private static final int HASH_MASK = (HASH_SIZE - 1);
+
+ private static final int FASTLZ_READU16(byte[] array, int offset) {
+ final int b0 = array[offset] & 0xff;
+ final int b1 = array[offset + 1] & 0xff;
+ return b0 | (b1 << 8);
+ }
+
+ /**
+ * @param array
+ * @param offset
+ * @return
+ */
+ private static final int HASH_FUNCTION(byte[] array, int offset) {
+ final int b0 = array[offset] & 0xff;
+ final int b1 = array[offset+1] & 0xff;
+ final int b2 = array[offset+2] & 0xff;
+ int v = b0 | (b1 << 8);
+ final int v2 = b1 | (b2 << 8);
+ v ^= v2 ^ (v >>> (16 - HASH_LOG));
+ v &= HASH_MASK;
+ return v;
+ }
+
+ /**
+ Compress a block of data in the input buffer and returns the size of
+ compressed block. The size of input buffer is specified by length. The
+ minimum input buffer size is 16.
+
+ The output buffer must be at least 5% larger than the input buffer
+ and can not be smaller than 66 bytes.
+
+ If the input is not compressible, the return value might be larger than
+ length (input buffer size).
+
+ The input buffer and the output buffer can not overlap.
+ */
+ public static int fastlz_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {
+ /* for short block, choose fastlz1 */
+ if (length < 65536)
+ return fastlz1_compress(input, inputOffset, length, output, outputOffset);
+
+ /* else... */
+ return fastlz2_compress(input, inputOffset, length, output, outputOffset);
+ }
+
+ /**
+ Decompress a block of compressed data and returns the size of the
+ decompressed block. If error occurs, e.g. the compressed data is
+ corrupted or the output buffer is not large enough, then 0 (zero)
+ will be returned instead.
+
+ The input buffer and the output buffer can not overlap.
+
+ Decompression is memory safe and guaranteed not to write the output buffer
+ more than what is specified in maxout.
+ */
+ public static int fastlz_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
+ int maxout) {
+ // magic identifier for compression level
+ int level = (((int) input[0] & 0xff) >>> 5) + 1;
+
+ if (level == 1)
+ return fastlz1_decompress(input, inputOffset, length, output, outputOffset, maxout);
+ if (level == 2)
+ return fastlz2_decompress(input, inputOffset, length, output, outputOffset, maxout);
+
+ // unknown level, trigger error
+ return 0;
+ }
+
+ /**
+ Compress a block of data in the input buffer and returns the size of
+ compressed block. The size of input buffer is specified by length. The
+ minimum input buffer size is 16.
+
+ The output buffer must be at least 5% larger than the input buffer
+ and can not be smaller than 66 bytes.
+
+ If the input is not compressible, the return value might be larger than
+ length (input buffer size).
+
+ The input buffer and the output buffer can not overlap.
+
+ Compression level can be specified in parameter level. At the moment,
+ only level 1 and level 2 are supported.
+ Level 1 is the fastest compression and generally useful for short data.
+ Level 2 is slightly slower but it gives better compression ratio.
+
+ Note that the compressed data, regardless of the level, can always be
+ decompressed using the function fastlz_decompress above.
+ */
+ public static int fastlz_compress_level(int level, byte[] input, int inputOffset, int length, byte[] output,
+ int outputOffset) {
+ if (level == 1)
+ return fastlz1_compress(input, inputOffset, length, output, outputOffset);
+ if (level == 2)
+ return fastlz2_compress(input, inputOffset, length, output, outputOffset);
+
+ return 0;
+ }
+
+
+ private static int fastlz1_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset) {
+ int ip = inputOffset;
+ int ip_bound = inputOffset + length - 2;
+ int ip_limit = inputOffset + length - 12;
+ int op = outputOffset;
+
+ int[] htab = new int[HASH_SIZE];
+ int hslot;
+ int hval;
+
+ int copy;
+
+ // sanity check
+ if(length < 4)
+ {
+ if(length != 0)
+ {
+ // create literal copy only
+ output[op++] = (byte) (length-1);
+ ip_bound++;
+ while(ip <= ip_bound)
+ output[op++] = input[ip++];
+ return length+1;
+ }
+ else
+ return 0;
+ }
+
+ // initializes hash table
+ for (hslot = 0; hslot < HASH_SIZE; ++hslot)
+ htab[hslot] = ip;
+
+ // we start with literal copy
+ copy = 2;
+ output[op++] = MAX_COPY-1;
+ output[op++] = input[ip++];
+ output[op++] = input[ip++];
+
+ // main loop
+ while (ip < ip_limit) {
+ int ref;
+ int distance;
+
+ // minimum match length
+ int len = 3;
+
+ // comparison starting-point
+ int anchor = ip;
+
+ // find potential match
+ hval = HASH_FUNCTION(input, ip);
+ hslot = hval;
+ ref = htab[hval];
+
+ // calculate distance to the match
+ distance = anchor - ref;
+
+ // update hash table
+ htab[hslot] = anchor;
+
+ // is this a match? check the first 3 bytes
+ if (distance==0 ||
+ (distance >= MAX_DISTANCE) ||
+ input[ref++] != input[ip++] ||
+ input[ref++] != input[ip++] ||
+ input[ref++] != input[ip++])
+ {
+ output[op++] = input[anchor++];
+ ip = anchor;
+ copy++;
+ if (copy == MAX_COPY) {
+ copy = 0;
+ output[op++] = MAX_COPY-1;
+ }
+ } else {
+ // last matched byte
+ ip = anchor + len;
+
+ // distance is biased
+ --distance;
+
+ if(distance == 0) {
+ // zero distance means a run
+ byte x = input[ip - 1];
+ while (ip < ip_bound)
+ if (input[ref++] != x) break;
+ else ip++;
+ } else {
+ for (;;) {
+ // safe because the outer check against ip limit
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ while (ip < ip_bound)
+ if (input[ref++] != input[ip++]) break;
+ break;
+ }
+ }
+
+ // if we have copied something, adjust the copy count
+ if (copy != 0)
+ // copy is biased, '0' means 1 byte copy
+ output[op-copy-1] = (byte)(copy-1);
+ else
+ // back, to overwrite the copy count
+ op--;
+
+ // reset literal counter
+ copy = 0;
+
+ // length is biased, '1' means a match of 3 bytes
+ ip -= 3;
+ len = ip - anchor;
+
+ // encode the match
+ if (len > MAX_LEN - 2)
+ while (len > MAX_LEN - 2) {
+ output[op++] = (byte) ((7 << 5) + (distance >>> 8));
+ output[op++] = (byte) (MAX_LEN - 2 - 7 -2);
+ output[op++] = (byte) (distance & 0xff);
+ len -= MAX_LEN - 2;
+ }
+
+ if (len < 7) {
+ output[op++] = (byte) ((len << 5) + (distance >>> 8));
+ output[op++] = (byte) (distance & 255);
+ } else {
+ output[op++] = (byte) ((7 << 5) + (distance >>> 8));
+ output[op++] = (byte) (len - 7);
+ output[op++] = (byte) (distance & 255);
+ }
+
+ // update the hash at match boundary
+ hval = HASH_FUNCTION(input, ip);
+ htab[hval] = ip++;
+ hval = HASH_FUNCTION(input, ip);
+ htab[hval] = ip++;
+
+ // assuming literal copy
+ output[op++] = MAX_COPY-1;
+ }
+ }
+
+ // left-over as literal copy
+ ip_bound++;
+ while (ip <= ip_bound) {
+ output[op++] = input[ip++];
+ copy++;
+ if (copy == MAX_COPY) {
+ copy = 0;
+ output[op++] = MAX_COPY-1;
+ }
+ }
+
+ // if we have copied something, adjust the copy length
+ if (copy != 0)
+ output[op - copy - 1] = (byte) (copy - 1);
+ else
+ --op;
+
+ return op - outputOffset;
+ }
+
+ private static int fastlz2_compress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset)
+ {
+ int ip = inputOffset;
+ int ip_bound = ip + length - 2;
+ int ip_limit = ip + length - 12;
+ int op = outputOffset;
+
+ int[] htab = new int[HASH_SIZE];
+ int hslot;
+ int hval;
+
+ int copy;
+
+ // sanity check
+ if (length < 4) {
+ if (length != 0) {
+ // create literal copy only
+ output[op++] = (byte) (length - 1);
+ ip_bound++;
+ while(ip <= ip_bound)
+ output[op++] = input[ip++];
+ return length + 1;
+ } else
+ return 0;
+ }
+
+ // initializes hash table
+ for (hslot = 0; hslot < HASH_SIZE; ++hslot)
+ htab[hslot] = ip;
+
+ // we start with literal copy
+ copy = 2;
+ output[op++] = MAX_COPY-1;
+ output[op++] = input[ip++];
+ output[op++] = input[ip++];
+
+ // main loop
+ while (ip < ip_limit) {
+ int ref;
+ int distance;
+
+ // minimum match length
+ int len = 3;
+
+ // comparison starting-point
+ int anchor = ip;
+
+ // check for a run
+ if (input[ip] == input[ip-1] && FASTLZ_READU16(input, ip-1) == FASTLZ_READU16(input, ip+1)) {
+ distance = 1;
+ ip += 3;
+ ref = anchor - 1 + 3;
+
+ // NOTE: cannot do this goto, so copied the code here from line 487
+ //goto match;
+
+ // last matched byte
+ ip = anchor + len;
+
+ // distance is biased
+ --distance;
+
+ if (distance == 0) {
+ // zero distance means a run
+ byte x = input[ip - 1];
+ while (ip < ip_bound)
+ if (input[ref++] != x) break;
+ else ++ip;
+ } else {
+ for (;;) {
+ // safe because the outer check against ip limit
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ while (ip < ip_bound)
+ if (input[ref++] != input[ip++]) break;
+ break;
+ }
+ }
+
+ // if we have copied something, adjust the copy count
+ if (copy != 0)
+ // copy is biased, '0' means 1 byte copy
+ output[op - copy - 1] = (byte) (copy - 1);
+ else
+ // back, to overwrite the copy count
+ --op;
+
+ // reset literal counter
+ copy = 0;
+
+ // length is biased, '1' means a match of 3 bytes
+ ip -= 3;
+ len = ip - anchor;
+
+ // encode the match
+ if (distance < MAX_DISTANCE2) {
+ if (len < 7) {
+ output[op++] = (byte) ((len << 5) + (distance >>> 8));
+ output[op++] = (byte) (distance & 255);
+ } else {
+ output[op++] = (byte) ((7 << 5) + (distance >>> 8));
+ for (len -= 7; len >= 255; len -= 255)
+ output[op++] = (byte) 255;
+ output[op++] = (byte) len;
+ output[op++] = (byte) (distance & 255);
+ }
+ } else {
+ // far away, but not yet in the another galaxy...
+ if(len < 7) {
+ distance -= MAX_DISTANCE2;
+ output[op++] = (byte) ((len << 5) + 31);
+ output[op++] = (byte) 255;
+ output[op++] = (byte) (distance >>> 8);
+ output[op++] = (byte) (distance & 255);
+ } else {
+ distance -= MAX_DISTANCE2;
+ output[op++] = (byte) ((7 << 5) + 31);
+ for (len -=7; len >= 255; len -= 255)
+ output[op++] = (byte) 255;
+ output[op++] = (byte) len;
+ output[op++] = (byte) 255;
+ output[op++] = (byte) (distance >>> 8);
+ output[op++] = (byte) (distance & 255);
+ }
+ }
+
+ // update the hash at match boundary
+ hval = HASH_FUNCTION(input, ip);
+ htab[hval] = ip++;
+ hval = HASH_FUNCTION(input, ip);
+ htab[hval] = ip++;
+
+ // assuming literal copy
+ output[op++] = MAX_COPY-1;
+
+ continue;
+ }
+
+ // find potential match
+ hval = HASH_FUNCTION(input, ip);
+ hslot = hval;
+ ref = htab[hval];
+
+ // calculate distance to the match
+ distance = anchor - ref;
+
+ // update hash table
+ htab[hslot] = anchor;
+
+ // is this a match? check the first 3 bytes
+ if (distance==0 ||
+ (distance >= MAX_FARDISTANCE) ||
+ input[ref++] != input[ip++] ||
+ input[ref++] != input[ip++] ||
+ input[ref++] != input[ip++])
+ {
+ output[op++] = input[anchor++];
+ ip = anchor;
+ copy++;
+ if (copy == MAX_COPY) {
+ copy = 0;
+ output[op++] = MAX_COPY-1;
+ }
+ } else {
+ // far, needs at least 5-byte match
+ if (distance >= MAX_DISTANCE2) {
+ if (input[ip++] != input[ref++] || input[ip++] != input[ref++]) {
+ // LITERAL COPY, code copied from above
+ output[op++] = input[anchor++];
+ ip = anchor;
+ copy++;
+ if (copy == MAX_COPY) {
+ copy = 0;
+ output[op++] = MAX_COPY-1;
+ }
+ continue;
+ }
+ len += 2;
+ }
+
+ // NOTE: all of the following code within this block is copied above
+ // to line 357
+ //match:
+
+ // last matched byte
+ ip = anchor + len;
+
+ // distance is biased
+ --distance;
+
+ if (distance == 0) {
+ // zero distance means a run
+ byte x = input[ip - 1];
+ while (ip < ip_bound)
+ if (input[ref++] != x) break;
+ else ++ip;
+ } else {
+ for (;;) {
+ // safe because the outer check against ip limit
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ if(input[ref++] != input[ip++]) break;
+ while (ip < ip_bound)
+ if (input[ref++] != input[ip++]) break;
+ break;
+ }
+ }
+
+ // if we have copied something, adjust the copy count
+ if (copy != 0)
+ // copy is biased, '0' means 1 byte copy
+ output[op - copy - 1] = (byte) (copy - 1);
+ else
+ // back, to overwrite the copy count
+ --op;
+
+ // reset literal counter
+ copy = 0;
+
+ // length is biased, '1' means a match of 3 bytes
+ ip -= 3;
+ len = ip - anchor;
+
+ // encode the match
+ if (distance < MAX_DISTANCE2) {
+ if (len < 7) {
+ output[op++] = (byte) ((len << 5) + (distance >>> 8));
+ output[op++] = (byte) (distance & 255);
+ } else {
+ output[op++] = (byte) ((7 << 5) + (distance >>> 8));
+ for (len -= 7; len >= 255; len -= 255)
+ output[op++] = (byte) 255;
+ output[op++] = (byte) len;
+ output[op++] = (byte) (distance & 255);
+ }
+ } else {
+ // far away, but not yet in the another galaxy...
+ if(len < 7) {
+ distance -= MAX_DISTANCE2;
+ output[op++] = (byte) ((len << 5) + 31);
+ output[op++] = (byte) 255;
+ output[op++] = (byte) (distance >>> 8);
+ output[op++] = (byte) (distance & 255);
+ } else {
+ distance -= MAX_DISTANCE2;
+ output[op++] = (byte) ((7 << 5) + 31);
+ for (len -=7; len >= 255; len -= 255)
+ output[op++] = (byte) 255;
+ output[op++] = (byte) len;
+ output[op++] = (byte) 255;
+ output[op++] = (byte) (distance >>> 8);
+ output[op++] = (byte) (distance & 255);
+ }
+ }
+
+ // update the hash at match boundary
+ hval = HASH_FUNCTION(input, ip);
+ htab[hval] = ip++;
+ hval = HASH_FUNCTION(input, ip);
+ htab[hval] = ip++;
+
+ // assuming literal copy
+ output[op++] = MAX_COPY-1;
+ }
+ }
+
+ // left-over as literal copy
+ ip_bound++;
+ while (ip <= ip_bound) {
+ output[op++] = input[ip++];
+ copy++;
+ if (copy == MAX_COPY) {
+ copy = 0;
+ output[op++] = MAX_COPY-1;
+ }
+ }
+
+ // if we have copied something, adjust the copy length
+ if (copy != 0)
+ output[op-copy-1] = (byte) (copy - 1);
+ else
+ op--;
+
+ // marker for fastlz2
+ output[outputOffset] |= (1 << 5);
+
+ return op - outputOffset;
+ }
+
+ // ------------------------------------------------------------------------
+
+ private static int fastlz1_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
+ int maxout) {
+ int ip = inputOffset;
+ int ip_limit = ip + length;
+ int op = outputOffset;
+ int op_limit = op + maxout;
+ int ctrl = (int) input[ip++] & 31;
+ boolean loop = true;
+
+ do {
+ int ref = op;
+ int len = ctrl >>> 5;
+ int ofs = (ctrl & 31) << 8;
+
+ if (ctrl >= 32) {
+ len--;
+ ref -= ofs;
+ if (len == 7-1)
+ len += (int) input[ip++] & 0xff;
+ ref -= (int) input[ip++] & 0xff;
+
+ if (FASTLZ_SAFE) {
+ if (op + len + 3 > op_limit)
+ return 0;
+
+ if (ref-1 < outputOffset)
+ return 0;
+ }
+
+ if (ip < ip_limit)
+ ctrl = (int) input[ip++] & 0xff;
+ else
+ loop = false;
+
+ if (ref == op) {
+ // optimize copy for a run
+ byte b = output[ref-1];
+ output[op++] = b;
+ output[op++] = b;
+ output[op++] = b;
+ for(; len > 0; --len)
+ output[op++] = b;
+ } else {
+ /* copy from reference */
+ ref--;
+ output[op++] = output[ref++];
+ output[op++] = output[ref++];
+ output[op++] = output[ref++];
+ for (; len > 0; --len)
+ output[op++] = output[ref++];
+ }
+ } else {
+ ctrl++;
+ if (FASTLZ_SAFE) {
+ if (op + ctrl > op_limit)
+ return 0;
+ if (ip + ctrl > ip_limit)
+ return 0;
+ }
+
+ output[op++] = input[ip++];
+ for (--ctrl; ctrl > 0; --ctrl)
+ output[op++] = input[ip++];
+
+ loop = ip < ip_limit;
+ if (loop)
+ ctrl = (int) input[ip++] & 0xff;
+ }
+ } while(loop);
+
+ return op - outputOffset;
+ }
+
+ private static int fastlz2_decompress(byte[] input, int inputOffset, int length, byte[] output, int outputOffset,
+ int maxout) {
+ int ip = inputOffset;
+ int ip_limit = ip + length;
+ int op = outputOffset;
+ int op_limit = op + maxout;
+ int ctrl = (int) input[ip++] & 31;
+ boolean loop = true;
+
+ do {
+ int ref = op;
+ int len = ctrl >>> 5;
+ int ofs = (ctrl & 31) << 8;
+
+ if (ctrl >= 32) {
+ int code;
+ len--;
+ ref -= ofs;
+ if (len == 7-1) {
+ do {
+ code = (int) input[ip++] & 0xff;
+ len += code;
+ } while (code==255);
+ }
+ code = (int) input[ip++] & 0xff;
+ ref -= code;
+
+ // match from 16-bit distance
+ if (code == 255) {
+ if (ofs == (31 << 8)) {
+ ofs = ((int) input[ip++] & 0xff) << 8;
+ ofs += (int) input[ip++] & 0xff;
+ ref = op - ofs - MAX_DISTANCE2;
+ }
+ }
+
+ if (FASTLZ_SAFE) {
+ if (op + len + 3 > op_limit)
+ return 0;
+
+ if (ref-1 < outputOffset)
+ return 0;
+ }
+
+ if (ip < ip_limit)
+ ctrl = (int) input[ip++] & 0xff;
+ else
+ loop = false;
+
+ if (ref == op) {
+ /* optimize copy for a run */
+ byte b = output[ref-1];
+ output[op++] = b;
+ output[op++] = b;
+ output[op++] = b;
+ for (; len > 0; --len)
+ output[op++] = b;
+ } else {
+ /* copy from reference */
+ --ref;
+ output[op++] = output[ref++];
+ output[op++] = output[ref++];
+ output[op++] = output[ref++];
+ for (; len > 0; --len)
+ output[op++] = output[ref++];
+ }
+ } else {
+ ++ctrl;
+ if (FASTLZ_SAFE) {
+ if (op + ctrl > op_limit)
+ return 0;
+ if (ip + ctrl > ip_limit)
+ return 0;
+ }
+
+ output[op++] = input[ip++];
+ for (--ctrl; ctrl > 0; --ctrl)
+ output[op++] = input[ip++];
+
+ loop = ip < ip_limit;
+ if (loop)
+ ctrl = (int) input[ip++] & 0xff;
+ }
+ } while (loop);
+
+ return op - outputOffset;
+ }
+
+}