]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.fastlz/native/lz4hc.c
Migrated source code from Simantics SVN
[simantics/platform.git] / bundles / org.simantics.fastlz / native / lz4hc.c
diff --git a/bundles/org.simantics.fastlz/native/lz4hc.c b/bundles/org.simantics.fastlz/native/lz4hc.c
new file mode 100644 (file)
index 0000000..cca755c
--- /dev/null
@@ -0,0 +1,663 @@
+/*\r
+   LZ4 HC - High Compression Mode of LZ4\r
+   Copyright (C) 2011-2012, Yann Collet.\r
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)\r
+\r
+   Redistribution and use in source and binary forms, with or without\r
+   modification, are permitted provided that the following conditions are\r
+   met:\r
+\r
+       * Redistributions of source code must retain the above copyright\r
+   notice, this list of conditions and the following disclaimer.\r
+       * Redistributions in binary form must reproduce the above\r
+   copyright notice, this list of conditions and the following disclaimer\r
+   in the documentation and/or other materials provided with the\r
+   distribution.\r
+\r
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\r
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+\r
+   You can contact the author at :\r
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html\r
+   - LZ4 source repository : http://code.google.com/p/lz4/\r
+*/\r
+\r
+\r
+//**************************************\r
+// CPU Feature Detection\r
+//**************************************\r
+// 32 or 64 bits ?\r
+#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) )   // Detects 64 bits mode\r
+#define LZ4_ARCH64 1\r
+#else\r
+#define LZ4_ARCH64 0\r
+#endif\r
+\r
+// Little Endian or Big Endian ? \r
+#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) )\r
+#define LZ4_BIG_ENDIAN 1\r
+#else\r
+// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.\r
+#endif\r
+\r
+// Unaligned memory access is automatically enabled for "common" CPU, such as x86.\r
+// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected\r
+// If you know your target CPU supports unaligned memory access, you may want to force this option manually to improve performance\r
+#if defined(__ARM_FEATURE_UNALIGNED)\r
+#define LZ4_FORCE_UNALIGNED_ACCESS 1\r
+#endif\r
+\r
+\r
+//**************************************\r
+// Compiler Options\r
+//**************************************\r
+#if __STDC_VERSION__ >= 199901L    // C99\r
+  /* "restrict" is a known keyword */\r
+#else\r
+#define restrict  // Disable restrict\r
+#endif\r
+\r
+#ifdef _MSC_VER\r
+#define inline __forceinline    // Visual is not C99, but supports some kind of inline\r
+#endif\r
+\r
+#ifdef _MSC_VER  // Visual Studio\r
+#define bswap16(x) _byteswap_ushort(x)\r
+#else\r
+#define bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))\r
+#endif\r
+\r
+\r
+//**************************************\r
+// Includes\r
+//**************************************\r
+#include <stdlib.h>   // calloc, free\r
+#include <string.h>   // memset, memcpy\r
+#include "lz4hc.h"\r
+\r
+#define ALLOCATOR(s) calloc(1,s)\r
+#define FREEMEM free\r
+#define MEM_INIT memset\r
+\r
+\r
+//**************************************\r
+// Basic Types\r
+//**************************************\r
+#if defined(_MSC_VER)    // Visual Studio does not support 'stdint' natively\r
+#define BYTE   unsigned __int8\r
+#define U16            unsigned __int16\r
+#define U32            unsigned __int32\r
+#define S32            __int32\r
+#define U64            unsigned __int64\r
+#else\r
+#include <stdint.h>\r
+#define BYTE   uint8_t\r
+#define U16            uint16_t\r
+#define U32            uint32_t\r
+#define S32            int32_t\r
+#define U64            uint64_t\r
+#endif\r
+\r
+#ifndef LZ4_FORCE_UNALIGNED_ACCESS\r
+#pragma pack(push, 1) \r
+#endif\r
+\r
+typedef struct _U16_S { U16 v; } U16_S;\r
+typedef struct _U32_S { U32 v; } U32_S;\r
+typedef struct _U64_S { U64 v; } U64_S;\r
+\r
+#ifndef LZ4_FORCE_UNALIGNED_ACCESS\r
+#pragma pack(pop) \r
+#endif\r
+\r
+#define A64(x) (((U64_S *)(x))->v)\r
+#define A32(x) (((U32_S *)(x))->v)\r
+#define A16(x) (((U16_S *)(x))->v)\r
+\r
+\r
+//**************************************\r
+// Constants\r
+//**************************************\r
+#define MINMATCH 4\r
+\r
+#define DICTIONARY_LOGSIZE 16\r
+#define MAXD (1<<DICTIONARY_LOGSIZE)\r
+#define MAXD_MASK ((U32)(MAXD - 1))\r
+#define MAX_DISTANCE (MAXD - 1)\r
+\r
+#define HASH_LOG (DICTIONARY_LOGSIZE-1)\r
+#define HASHTABLESIZE (1 << HASH_LOG)\r
+#define HASH_MASK (HASHTABLESIZE - 1)\r
+\r
+#define MAX_NB_ATTEMPTS 256\r
+\r
+#define ML_BITS  4\r
+#define ML_MASK  (size_t)((1U<<ML_BITS)-1)\r
+#define RUN_BITS (8-ML_BITS)\r
+#define RUN_MASK ((1U<<RUN_BITS)-1)\r
+\r
+#define COPYLENGTH 8\r
+#define LASTLITERALS 5\r
+#define MFLIMIT (COPYLENGTH+MINMATCH)\r
+#define MINLENGTH (MFLIMIT+1)\r
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)\r
+\r
+\r
+//**************************************\r
+// Architecture-specific macros\r
+//**************************************\r
+#if LZ4_ARCH64 // 64-bit\r
+#define STEPSIZE 8\r
+#define LZ4_COPYSTEP(s,d)              A64(d) = A64(s); d+=8; s+=8;\r
+#define LZ4_COPYPACKET(s,d)            LZ4_COPYSTEP(s,d)\r
+#define UARCH U64\r
+#define AARCH A64\r
+#define HTYPE                                  U32\r
+#define INITBASE(b,s)                  const BYTE* const b = s\r
+#else          // 32-bit\r
+#define STEPSIZE 4\r
+#define LZ4_COPYSTEP(s,d)              A32(d) = A32(s); d+=4; s+=4;\r
+#define LZ4_COPYPACKET(s,d)            LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);\r
+#define UARCH U32\r
+#define AARCH A32\r
+#define HTYPE                                  const BYTE*\r
+#define INITBASE(b,s)              const int b = 0\r
+#endif\r
+\r
+#if defined(LZ4_BIG_ENDIAN)\r
+#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; }\r
+#define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; }\r
+#else          // Little Endian\r
+#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }\r
+#define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }\r
+#endif\r
+\r
+\r
+//************************************************************\r
+// Local Types\r
+//************************************************************\r
+typedef struct \r
+{\r
+       const BYTE* base;\r
+       HTYPE hashTable[HASHTABLESIZE];\r
+       U16 chainTable[MAXD];\r
+       const BYTE* nextToUpdate;\r
+} LZ4HC_Data_Structure;\r
+\r
+\r
+//**************************************\r
+// Macros\r
+//**************************************\r
+#define LZ4_WILDCOPY(s,d,e)            do { LZ4_COPYPACKET(s,d) } while (d<e);\r
+#define LZ4_BLINDCOPY(s,d,l)   { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }\r
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))\r
+#define HASH_VALUE(p)          HASH_FUNCTION(*(U32*)(p))\r
+#define HASH_POINTER(p)                (HashTable[HASH_VALUE(p)] + base)\r
+#define DELTANEXT(p)           chainTable[(size_t)(p) & MAXD_MASK] \r
+#define GETNEXT(p)                     ((p) - (size_t)DELTANEXT(p))\r
+#define ADD_HASH(p)                    { size_t delta = (p) - HASH_POINTER(p); if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; DELTANEXT(p) = (U16)delta; HashTable[HASH_VALUE(p)] = (p) - base; }\r
+\r
+\r
+//**************************************\r
+// Private functions\r
+//**************************************\r
+#if LZ4_ARCH64\r
+\r
+inline static int LZ4_NbCommonBytes (register U64 val)\r
+{\r
+#if defined(LZ4_BIG_ENDIAN)\r
+    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    unsigned long r = 0;\r
+    _BitScanReverse64( &r, val );\r
+    return (int)(r>>3);\r
+    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    return (__builtin_clzll(val) >> 3); \r
+    #else\r
+       int r;\r
+       if (!(val>>32)) { r=4; } else { r=0; val>>=32; }\r
+       if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }\r
+       r += (!val);\r
+       return r;\r
+    #endif\r
+#else\r
+    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    unsigned long r = 0;\r
+    _BitScanForward64( &r, val );\r
+    return (int)(r>>3);\r
+    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    return (__builtin_ctzll(val) >> 3); \r
+    #else\r
+       static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };\r
+       return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];\r
+    #endif\r
+#endif\r
+}\r
+\r
+#else\r
+\r
+inline static int LZ4_NbCommonBytes (register U32 val)\r
+{\r
+#if defined(LZ4_BIG_ENDIAN)\r
+    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    unsigned long r = 0;\r
+    _BitScanReverse( &r, val );\r
+    return (int)(r>>3);\r
+    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    return (__builtin_clz(val) >> 3); \r
+    #else\r
+       int r;\r
+       if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }\r
+       r += (!val);\r
+       return r;\r
+    #endif\r
+#else\r
+    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    unsigned long r = 0;\r
+    _BitScanForward( &r, val );\r
+    return (int)(r>>3);\r
+    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)\r
+    return (__builtin_ctz(val) >> 3); \r
+    #else\r
+       static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };\r
+       return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];\r
+    #endif\r
+#endif\r
+}\r
+\r
+#endif\r
+\r
+\r
+inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base)\r
+{\r
+       MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));\r
+       MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));\r
+       hc4->nextToUpdate = base + LZ4_ARCH64;\r
+       hc4->base = base;\r
+       return 1;\r
+}\r
+\r
+\r
+inline static void* LZ4HC_Create (const BYTE* base)\r
+{\r
+       void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));\r
+\r
+       LZ4HC_Init (hc4, base);\r
+       return hc4;\r
+}\r
+\r
+\r
+inline static int LZ4HC_Free (void** LZ4HC_Data)\r
+{\r
+       FREEMEM(*LZ4HC_Data);\r
+       *LZ4HC_Data = NULL;\r
+       return (1);\r
+}\r
+\r
+\r
+inline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)\r
+{\r
+       U16*   chainTable = hc4->chainTable;\r
+       HTYPE* HashTable  = hc4->hashTable;\r
+       INITBASE(base,hc4->base);\r
+\r
+       while(hc4->nextToUpdate < ip)\r
+       {\r
+               ADD_HASH(hc4->nextToUpdate);\r
+               hc4->nextToUpdate++;\r
+       }\r
+}\r
+\r
+\r
+inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)\r
+{\r
+       U16* const chainTable = hc4->chainTable;\r
+       HTYPE* const HashTable = hc4->hashTable;\r
+       const BYTE* ref;\r
+       INITBASE(base,hc4->base);\r
+       int nbAttempts=MAX_NB_ATTEMPTS;\r
+       int ml=0;\r
+\r
+       // HC4 match finder\r
+       LZ4HC_Insert(hc4, ip);\r
+       ref = HASH_POINTER(ip);\r
+       while ((ref > (ip-MAX_DISTANCE)) && (nbAttempts))\r
+       {\r
+               nbAttempts--;\r
+               if (*(ref+ml) == *(ip+ml))\r
+               if (*(U32*)ref == *(U32*)ip)\r
+               {\r
+                       const BYTE* reft = ref+MINMATCH;\r
+                       const BYTE* ipt = ip+MINMATCH;\r
+\r
+                       while (ipt<matchlimit-(STEPSIZE-1))\r
+                       {\r
+                               UARCH diff = AARCH(reft) ^ AARCH(ipt);\r
+                               if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }\r
+                               ipt += LZ4_NbCommonBytes(diff);\r
+                               goto _endCount;\r
+                       }\r
+                       if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }\r
+                       if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }\r
+                       if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;\r
+_endCount:\r
+\r
+                       if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; }\r
+               }\r
+               ref = GETNEXT(ref);\r
+       }\r
+\r
+       return ml;\r
+}\r
+\r
+\r
+inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)\r
+{\r
+       U16* const  chainTable = hc4->chainTable;\r
+       HTYPE* const HashTable = hc4->hashTable;\r
+       INITBASE(base,hc4->base);\r
+       const BYTE*  ref;\r
+       int nbAttempts = MAX_NB_ATTEMPTS;\r
+       int delta = ip-startLimit;\r
+\r
+       // First Match\r
+       LZ4HC_Insert(hc4, ip);\r
+       ref = HASH_POINTER(ip);\r
+\r
+       while ((ref > ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts))\r
+       {\r
+               nbAttempts--;\r
+               if (*(startLimit + longest) == *(ref - delta + longest))\r
+               if (*(U32*)ref == *(U32*)ip)\r
+               {\r
+                       const BYTE* reft = ref+MINMATCH;\r
+                       const BYTE* ipt = ip+MINMATCH;\r
+                       const BYTE* startt = ip;\r
+\r
+                       while (ipt<matchlimit-(STEPSIZE-1))\r
+                       {\r
+                               UARCH diff = AARCH(reft) ^ AARCH(ipt);\r
+                               if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }\r
+                               ipt += LZ4_NbCommonBytes(diff);\r
+                               goto _endCount;\r
+                       }\r
+                       if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }\r
+                       if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }\r
+                       if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;\r
+_endCount:\r
+\r
+                       reft = ref;\r
+                       while ((startt>startLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;}\r
+\r
+                       if ((ipt-startt) > longest)\r
+                       {\r
+                               longest = ipt-startt;\r
+                               *matchpos = reft;\r
+                               *startpos = startt;\r
+                       }\r
+               }\r
+               ref = GETNEXT(ref);\r
+       }\r
+\r
+       return longest;\r
+}\r
+\r
+\r
+inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref)\r
+{\r
+       int length, len; \r
+       BYTE* token;\r
+\r
+       // Encode Literal length\r
+       length = *ip - *anchor;\r
+       token = (*op)++;\r
+       if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } \r
+       else *token = (length<<ML_BITS);\r
+\r
+       // Copy Literals\r
+       LZ4_BLINDCOPY(*anchor, *op, length);\r
+\r
+       // Encode Offset\r
+       LZ4_WRITE_LITTLEENDIAN_16(*op,*ip-ref);\r
+\r
+       // Encode MatchLength\r
+       len = (int)(ml-MINMATCH);\r
+       if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } \r
+       else *token += len;     \r
+\r
+       // Prepare next loop\r
+       *ip += ml;\r
+       *anchor = *ip; \r
+\r
+       return 0;\r
+}\r
+\r
+\r
+//****************************\r
+// Compression CODE\r
+//****************************\r
+\r
+int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx,\r
+                                const char* source, \r
+                                char* dest,\r
+                                int isize)\r
+{      \r
+       const BYTE* ip = (const BYTE*) source;\r
+       const BYTE* anchor = ip;\r
+       const BYTE* const iend = ip + isize;\r
+       const BYTE* const mflimit = iend - MFLIMIT;\r
+       const BYTE* const matchlimit = (iend - LASTLITERALS);\r
+\r
+       BYTE* op = (BYTE*) dest;\r
+\r
+       int     ml, ml2, ml3, ml0;\r
+       const BYTE* ref=NULL;\r
+       const BYTE* start2=NULL;\r
+       const BYTE* ref2=NULL;\r
+       const BYTE* start3=NULL;\r
+       const BYTE* ref3=NULL;\r
+       const BYTE* start0;\r
+       const BYTE* ref0;\r
+\r
+       ip++;\r
+\r
+       // Main Loop\r
+       while (ip < mflimit)\r
+       {\r
+               ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));\r
+               if (!ml) { ip++; continue; }\r
+\r
+               // saved, in case we would skip too much\r
+               start0 = ip;\r
+               ref0 = ref;\r
+               ml0 = ml;\r
+\r
+_Search2:\r
+               if (ip+ml < mflimit)\r
+                       ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);\r
+               else ml2=ml;\r
+\r
+               if (ml2 == ml)  // No better match\r
+               {\r
+                       LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);\r
+                       continue;\r
+               }\r
+\r
+               if (start0 < ip)\r
+               {\r
+                       if (start2 < ip + ml0)   // empirical\r
+                       {\r
+                               ip = start0;\r
+                               ref = ref0;\r
+                               ml = ml0;\r
+                       }\r
+               }\r
+\r
+               // Here, start0==ip\r
+               if ((start2 - ip) < 3)   // First Match too small : removed\r
+               {\r
+                       ml = ml2;\r
+                       ip = start2;\r
+                       ref =ref2;\r
+                       goto _Search2;\r
+               }\r
+\r
+_Search3:\r
+               // Currently we have :\r
+               // ml2 > ml1, and\r
+               // ip1+3 <= ip2 (usually < ip1+ml1)\r
+               if ((start2 - ip) < OPTIMAL_ML)\r
+               {\r
+                       int correction;\r
+                       int new_ml = ml;\r
+                       if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;\r
+                       if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH;\r
+                       correction = new_ml - (start2 - ip);\r
+                       if (correction > 0)\r
+                       {\r
+                               start2 += correction;\r
+                               ref2 += correction;\r
+                               ml2 -= correction;\r
+                       }\r
+               }\r
+               // Now, we have start2 = ip+new_ml, with new_ml=min(ml, OPTIMAL_ML=18)\r
+\r
+               if (start2 + ml2 < mflimit)\r
+                       ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);\r
+               else ml3=ml2;\r
+\r
+               if (ml3 == ml2) // No better match : 2 sequences to encode\r
+               {\r
+                       // ip & ref are known; Now for ml\r
+                       if (start2 < ip+ml)\r
+                       {\r
+                               if ((start2 - ip) < OPTIMAL_ML)\r
+                               {\r
+                                       int correction;\r
+                                       if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;\r
+                                       if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;\r
+                                       correction = ml - (start2 - ip);\r
+                                       if (correction > 0)\r
+                                       {\r
+                                               start2 += correction;\r
+                                               ref2 += correction;\r
+                                               ml2 -= correction;\r
+                                       }\r
+                               }\r
+                               else\r
+                               {\r
+                                       ml = start2 - ip;\r
+                               }\r
+                       }\r
+                       // Now, encode 2 sequences\r
+                       LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);\r
+                       ip = start2;\r
+                       LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2);\r
+                       continue;\r
+               }\r
+\r
+               if (start3 < ip+ml+3) // Not enough space for match 2 : remove it\r
+               {\r
+                       if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1\r
+                       {\r
+                               if (start2 < ip+ml)\r
+                               {\r
+                                       int correction = (ip+ml) - start2;\r
+                                       start2 += correction;\r
+                                       ref2 += correction;\r
+                                       ml2 -= correction;\r
+                                       if (ml2 < MINMATCH)\r
+                                       {\r
+                                               start2 = start3;\r
+                                               ref2 = ref3;\r
+                                               ml2 = ml3;\r
+                                       }\r
+                               }\r
+\r
+                               LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);\r
+                               ip  = start3;\r
+                               ref = ref3;\r
+                               ml  = ml3;\r
+\r
+                               start0 = start2;\r
+                               ref0 = ref2;\r
+                               ml0 = ml2;\r
+                               goto _Search2;\r
+                       }\r
+\r
+                       start2 = start3;\r
+                       ref2 = ref3;\r
+                       ml2 = ml3;\r
+                       goto _Search3;\r
+               }\r
+\r
+               // OK, now we have 3 ascending matches; let's write at least the first one\r
+               // ip & ref are known; Now for ml\r
+               if (start2 < ip+ml)\r
+               {\r
+                       if ((start2 - ip) < (int)ML_MASK)\r
+                       {\r
+                               int correction;\r
+                               if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;\r
+                               if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;\r
+                               correction = ml - (start2 - ip);\r
+                               if (correction > 0)\r
+                               {\r
+                                       start2 += correction;\r
+                                       ref2 += correction;\r
+                                       ml2 -= correction;\r
+                               }\r
+                       }\r
+                       else\r
+                       {\r
+                               ml = start2 - ip;\r
+                       }\r
+               }\r
+               LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);\r
+\r
+               ip = start2;\r
+               ref = ref2;\r
+               ml = ml2;\r
+\r
+               start2 = start3;\r
+               ref2 = ref3;\r
+               ml2 = ml3;\r
+\r
+               goto _Search3;\r
+\r
+       }\r
+\r
+       // Encode Last Literals\r
+       {\r
+               int lastRun = iend - anchor;\r
+               if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } \r
+               else *op++ = (lastRun<<ML_BITS);\r
+               memcpy(op, anchor, iend - anchor);\r
+               op += iend-anchor;\r
+       } \r
+\r
+       // End\r
+       return (int) (((char*)op)-dest);\r
+}\r
+\r
+\r
+int LZ4_compressHC(const char* source, \r
+                                char* dest,\r
+                                int isize)\r
+{\r
+       void* ctx = LZ4HC_Create((const BYTE*)source);\r
+       int result = LZ4_compressHCCtx(ctx, source, dest, isize);\r
+       LZ4HC_Free (&ctx);\r
+\r
+       return result;\r
+}\r
+\r
+\r