Initial release
diff --git a/lib/LICENSE b/lib/LICENSE
new file mode 100644
index 0000000..3549585
--- /dev/null
+++ b/lib/LICENSE
@@ -0,0 +1,26 @@
+ZSTD Library
+Copyright (c) 2014-2015, Yann Collet
+All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 0000000..4d83816
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,114 @@
+# ################################################################
+# ZSTD library - Makefile
+# Copyright (C) Yann Collet 2015
+# All rights reserved.
+# 
+# BSD license
+
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+# You can contact the author at :
+#  - ZSTD source repository : https://github.com/Cyan4973/zstd
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version numbers
+VERSION?= 0
+LIBVER_MAJOR=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_MINOR=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_PATCH=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER  = $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
+
+DESTDIR?=
+PREFIX ?= /usr
+CFLAGS ?= -O3
+CFLAGS += -I. -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
+
+LIBDIR ?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+
+
+# OS X linker doesn't support -soname, and use different extension
+# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
+ifeq ($(shell uname), Darwin)
+	SHARED_EXT = dylib
+	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
+	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
+	SONAME_FLAGS = -install_name $(PREFIX)/lib/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+else
+	SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT = so
+	SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
+endif
+
+default: libzstd
+
+all: libzstd
+
+libzstd: zstd.c
+	@echo compiling static library
+	@$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
+	@$(AR) rcs libzstd.a zstd.o
+	@echo compiling dynamic library $(LIBVER)
+	@$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
+	@echo creating versioned links
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
+
+clean:
+	@rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
+	@echo Cleaning library completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+libzstd.pc: libzstd.pc.in Makefile
+	@echo creating pkgconfig
+	@sed -e 's|@PREFIX@|$(PREFIX)|' \
+            -e 's|@LIBDIR@|$(LIBDIR)|' \
+            -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+            -e 's|@VERSION@|$(VERSION)|' \
+             $< >$@
+
+install: libzstd libzstd.pc
+	@install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/
+	@install -m 755 libzstd.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
+	@cp -a libzstd.$(SHARED_EXT_MAJOR) $(DESTDIR)$(LIBDIR)
+	@cp -a libzstd.$(SHARED_EXT) $(DESTDIR)$(LIBDIR)
+	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
+	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
+	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@echo zstd static and shared library installed
+
+uninstall:
+	@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+	@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+	@rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/libzstd.pc
+	@[ -x $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
+	@[ -f $(DESTDIR)$(LIBDIR)/libzstd.a ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.a
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@echo zstd libraries successfully uninstalled
+
+endif
diff --git a/lib/fse.c b/lib/fse.c
new file mode 100755
index 0000000..526f822
--- /dev/null
+++ b/lib/fse.c
@@ -0,0 +1,1573 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  Generic function type & suffix (C template emulation)
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "fse_static.h"
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+typedef  int64_t S64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+static unsigned FSE_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U32 FSE_read32(const void* memPtr)
+{
+    U32 val32;
+    memcpy(&val32, memPtr, 4);
+    return val32;
+}
+
+static U32 FSE_readLE32(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read32(memPtr);
+    else
+    {
+        const BYTE* p = memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+static void FSE_writeLE32(void* memPtr, U32 val32)
+{
+    if (FSE_isLittleEndian())
+    {
+        memcpy(memPtr, &val32, 4);
+    }
+    else
+    {
+        BYTE* p = memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+static U64 FSE_read64(const void* memPtr)
+{
+    U64 val64;
+    memcpy(&val64, memPtr, 8);
+    return val64;
+}
+
+static U64 FSE_readLE64(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read64(memPtr);
+    else
+    {
+        const BYTE* p = memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+static void FSE_writeLE64(void* memPtr, U64 val64)
+{
+    if (FSE_isLittleEndian())
+    {
+        memcpy(memPtr, &val64, 8);
+    }
+    else
+    {
+        BYTE* p = memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+static size_t FSE_readLEST(const void* memPtr)
+{
+    if (sizeof(size_t)==4)
+        return FSE_readLE32(memPtr);
+    else
+        return FSE_readLE64(memPtr);
+}
+
+static void FSE_writeLEST(void* memPtr, size_t val)
+{
+    if (sizeof(size_t)==4)
+        FSE_writeLE32(memPtr, (U32)val);
+    else
+        FSE_writeLE64(memPtr, (U64)val);
+}
+
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef struct
+{
+    int  deltaFindState;
+    U16  maxState;
+    BYTE minBitsOut;
+    /* one byte padding */
+} FSE_symbolCompressionTransform;
+
+typedef struct
+{
+    U32 fakeTable[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];   /* compatible with FSE_compressU16() */
+} CTable_max_t;
+
+
+/****************************************************************
+*  Internal functions
+****************************************************************/
+FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
+
+#define FSE_GENERATE_STRING(STRING) #STRING,
+static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
+
+const char* FSE_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+static short FSE_abs(short a)
+{
+    return a<0? -a : a;
+}
+
+
+/****************************************************************
+*  Header bitstream management
+****************************************************************/
+size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
+    return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
+}
+
+static size_t FSE_writeHeader_generic (void* header, size_t headerBufferSize,
+                                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                                       unsigned safeWrite)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    bitStream = 0;
+    bitCount  = 0;
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while (remaining>1)   /* stops at 1 */
+    {
+        if (previous0)
+        {
+            unsigned start = charnum;
+            while (!normalizedCounter[charnum]) charnum++;
+            while (charnum >= start+24)
+            {
+                start+=24;
+                bitStream += 0xFFFF<<bitCount;
+                if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (charnum >= start+3)
+            {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (charnum-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16)
+            {
+                if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+            }
+        }
+        {
+            short count = normalizedCounter[charnum++];
+            const short max = (short)((2*threshold-1)-remaining);
+            remaining -= FSE_abs(count);
+            if (remaining<0) return (size_t)-FSE_ERROR_GENERIC;
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previous0 = (count==1);
+            while (remaining<threshold) nbBits--, threshold>>=1;
+        }
+        if (bitCount>16)
+        {
+            if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+        }
+    }
+
+    /* flush remaining bitStream */
+    if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC;   /* Too many symbols written (a bit too late?) */
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeHeader (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported */
+
+    if (headerBufferSize < FSE_headerBound(maxSymbolValue, tableLog))
+        return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
+}
+
+
+size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    bitStream = FSE_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                ip+=2;
+                bitStream = FSE_readLE32(ip) >> bitCount;
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_GENERIC;
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            ip += bitCount>>3;
+            bitCount &= 7;
+            bitStream = FSE_readLE32(ip) >> bitCount;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            ip += bitCount>>3;
+            bitCount &= 7;
+            bitStream = FSE_readLE32(ip) >> bitCount;
+        }
+    }
+    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
+    *maxSVPtr = charnum-1;
+
+    ip += bitCount>0;
+    if ((size_t)(ip-istart) >= hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* arguably a bit late , tbd */
+    return ip-istart;
+}
+
+
+/****************************************************************
+*  FSE Compression Code
+****************************************************************/
+/*
+CTable is a variable size structure which contains :
+    U16 tableLog;
+    U16 maxSymbolValue;
+    U16 nextStateNumber[1 << tableLog];                         // This size is variable
+    FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];  // This size is variable
+Allocation is manual, since C standard does not support variable-size structures.
+*/
+
+size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t));   /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return size;
+}
+
+void* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return malloc(size);
+}
+
+void  FSE_freeCTable (void* CTable)
+{
+    free(CTable);
+}
+
+/* Emergency distribution strategy (fallback); compression will suffer a lot ; consider increasing table size */
+static void FSE_emergencyDistrib(short* normalizedCounter, int maxSymbolValue, short points)
+{
+    int s=0;
+    while (points)
+    {
+        if (normalizedCounter[s] > 1)
+        {
+            normalizedCounter[s]--;
+            points--;
+        }
+        s++;
+        if (s>maxSymbolValue) s=0;
+    }
+}
+
+/* fallback distribution (corner case); compression will suffer a bit ; consider increasing table size */
+void FSE_distribNpts(short* normalizedCounter, int maxSymbolValue, short points)
+{
+    int s;
+    int rank[5] = {0};
+    int fallback=0;
+
+    /* Sort 4 largest (they'll absorb normalization rounding) */
+    for (s=1; s<=maxSymbolValue; s++)
+    {
+        int i, b=3;
+        if (b>=s) b=s-1;
+        while ((b>=0) && (normalizedCounter[s]>normalizedCounter[rank[b]])) b--;
+        for (i=3; i>b; i--) rank[i+1] = rank[i];
+        rank[b+1]=s;
+    }
+
+    /* Distribute points */
+    s = 0;
+    while (points)
+    {
+        short limit = normalizedCounter[rank[s+1]]+1;
+        if (normalizedCounter[rank[s]] >= limit + points )
+        {
+            normalizedCounter[rank[s]] -= points;
+            break;
+        }
+        points -= normalizedCounter[rank[s]] - limit;
+        normalizedCounter[rank[s]] = limit;
+        s++;
+        if (s==3)
+        {
+            short reduction = points>>2;
+            if (fallback)
+            {
+                FSE_emergencyDistrib(normalizedCounter, maxSymbolValue, points);    /* Fallback mode */
+                return;
+            }
+            if (reduction < 1) reduction=1;
+            if (reduction >= normalizedCounter[rank[3]]) reduction=normalizedCounter[rank[3]]-1;
+            fallback = (reduction==0);
+            normalizedCounter[rank[3]]-=reduction;
+            points-=reduction;
+            s=0;
+        }
+    }
+}
+
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    U32 tableLog = maxTableLog;
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2;   /* Accuracy can be reduced */
+    if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+
+typedef struct
+{
+    U32 id;
+    U32 count;
+} rank_t;
+
+int FSE_compareRankT(const void* r1, const void* r2)
+{
+    const rank_t* R1 = r1;
+    const rank_t* R2 = r2;
+
+    return 2 * (R1->count < R2->count) - 1;
+}
+
+static void FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
+{
+    rank_t rank[FSE_MAX_SYMBOL_VALUE+1];
+    U32 s;
+
+    /* Init */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        rank[s].id = s;
+        rank[s].count = count[s];
+        if (norm[s] <= 1) rank[s].count = 0;
+    }
+
+    /* Sort according to count */
+    qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
+
+    while(pointsToRemove)
+    {
+        int newRank = 1;
+        norm[rank[0].id]--;
+        rank[0].count = (rank[0].count * 3) >> 2;
+        if (norm[rank[0].id] == 1) rank[0].count = 0;
+        while (rank[newRank].count > rank[newRank-1].count)
+        {
+            rank_t r = rank[newRank-1];
+            rank[newRank-1] = rank[newRank];
+            rank[newRank] = r;
+            newRank++;
+        }
+        pointsToRemove--;
+    }
+}
+
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported size */
+    if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC;   /* Too small tableLog, compression potentially impossible */
+
+    {
+        U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        U64 const scale = 62 - tableLog;
+        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            if (count[s] == total) return 0;
+            if (count[s] == 0)
+            {
+                normalizedCounter[s]=0;
+                continue;
+            }
+            if (count[s] <= lowThreshold)
+            {
+                normalizedCounter[s] = -1;
+                stillToDistribute--;
+            }
+            else
+            {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8)
+                {
+                    U64 restToBeat;
+                    restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP)
+                {
+                    largestP=proba;
+                    largest=s;
+                }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+            }
+        }
+        //if ((int)normalizedCounter[largest] <= -stillToDistribute+8)
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
+        {
+            /* largest cant accommodate that amount */
+            FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
+            //FSE_distribNpts(normalizedCounter, maxSymbolValue, (short)(-stillToDistribute));   /* Fallback */
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        int s;
+        for (s=0; s<=maxSymbolValue; s++)
+            printf("%3i: %4i \n", s, normalizedCounter[s]);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    U16* tableU16 = ( (U16*) CTable) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)CTable)+1) + (tableSize>>1));
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+    if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        symbolTT[s].minBitsOut = (BYTE)nbBits;
+        symbolTT[s].deltaFindState = s-1;
+        symbolTT[s].maxState = (U16)( (tableSize*2) - 1);   /* ensures state <= maxState */
+    }
+
+    return 0;
+}
+
+
+/* fake CTable, for rle (100% always same symbol) input */
+size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
+{
+    const unsigned tableSize = 1;
+    U16* tableU16 = ( (U16*) CTable) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)CTable + 2);
+
+    /* safety checks */
+    if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be 4 bytes aligned */
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    {
+        symbolTT[symbolValue].minBitsOut = 0;
+        symbolTT[symbolValue].deltaFindState = 0;
+        symbolTT[symbolValue].maxState = (U16)(2*tableSize-1);   /* ensures state <= maxState */
+    }
+
+    return 0;
+}
+
+
+void FSE_initCStream(FSE_CStream_t* bitC, void* start)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;   /* reserved for unusedBits */
+    bitC->startPtr = (char*)start;
+    bitC->ptr = bitC->startPtr;
+}
+
+void FSE_initCState(FSE_CState_t* statePtr, const void* CTable)
+{
+    const U32 tableLog = ( (U16*) CTable) [0];
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = ((const U16*) CTable) + 2;
+    statePtr->symbolTT = (const U32*)CTable + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+    statePtr->stateLog = tableLog;
+}
+
+void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF };   /* up to 25 bits */
+    bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+void FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
+{
+    const FSE_symbolCompressionTransform* const symbolTT = (const FSE_symbolCompressionTransform*) statePtr->symbolTT;
+    const U16* const stateTable = (const U16*) statePtr->stateTable;
+    int nbBitsOut  = symbolTT[symbol].minBitsOut;
+    nbBitsOut -= (int)((symbolTT[symbol].maxState - statePtr->value) >> 31);
+    FSE_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT[symbol].deltaFindState];
+}
+
+void FSE_flushBits(FSE_CStream_t* bitC)
+{
+    size_t nbBytes = bitC->bitPos >> 3;
+    FSE_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->bitPos &= 7;
+    bitC->ptr += nbBytes;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    FSE_addBits(bitC, statePtr->value, statePtr->stateLog);
+    FSE_flushBits(bitC);
+}
+
+
+size_t FSE_closeCStream(FSE_CStream_t* bitC)
+{
+    char* endPtr;
+
+    FSE_addBits(bitC, 1, 1);
+    FSE_flushBits(bitC);
+
+    endPtr = bitC->ptr;
+    endPtr += bitC->bitPos > 0;
+
+    return (endPtr - bitC->startPtr);
+}
+
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const void* CTable)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip;
+    const BYTE* const iend = istart + srcSize;
+
+    FSE_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+
+    /* init */
+    (void)dstSize;   /* objective : ensure it fits into dstBuffer (Todo) */
+    FSE_initCStream(&bitC, dst);
+    FSE_initCState(&CState1, CTable);
+    CState2 = CState1;
+
+    ip=iend;
+
+    /* join to even */
+    if (srcSize & 1)
+    {
+        FSE_encodeByte(&bitC, &CState1, *--ip);
+        FSE_flushBits(&bitC);
+    }
+
+    /* join to mod 4 */
+    if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2))   /* test bit 2 */
+    {
+        FSE_encodeByte(&bitC, &CState2, *--ip);
+        FSE_encodeByte(&bitC, &CState1, *--ip);
+        FSE_flushBits(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while (ip>istart)
+    {
+        FSE_encodeByte(&bitC, &CState2, *--ip);
+
+        if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_flushBits(&bitC);
+
+        FSE_encodeByte(&bitC, &CState1, *--ip);
+
+        if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 )   /* this test must be static */
+        {
+            FSE_encodeByte(&bitC, &CState2, *--ip);
+            FSE_encodeByte(&bitC, &CState1, *--ip);
+        }
+
+        FSE_flushBits(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return FSE_closeCStream(&bitC);
+}
+
+
+static size_t FSE_compressRLE (BYTE *out, BYTE symbol)
+{
+    *out=symbol;
+    return 1;
+}
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    U32   count[FSE_MAX_SYMBOL_VALUE+1];
+    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
+    CTable_max_t CTable;
+    size_t errorCode;
+
+    /* early out */
+    if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
+    if (srcSize <= 1) return srcSize;  /* Uncompressed or RLE */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode == srcSize) return FSE_compressRLE (ostart, *istart);
+    if (errorCode < ((srcSize * 7) >> 10)) return 0;   /* Heuristic : not compressible enough */
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* Write table description header */
+    errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    op += errorCode;
+
+    /* Compress */
+    errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, &CTable);
+
+    /* check compressibility */
+    if ( (size_t)(op-ostart) >= srcSize-1 )
+        return 0;
+
+    return op-ostart;
+}
+
+
+size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+typedef struct
+{
+    U16  newState;
+    BYTE symbol;
+    BYTE nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+/* Specific corner case : RLE compression */
+size_t FSE_decompressRLE(void* dst, size_t originalSize,
+                   const void* cSrc, size_t cSrcSize)
+{
+    if (cSrcSize != 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+    memset(dst, *(BYTE*)cSrc, originalSize);
+    return originalSize;
+}
+
+
+size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
+{
+    U32* const base32 = DTable;
+    FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
+
+    /* Sanity check */
+    if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    base32[0] = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
+{
+    U32* const base32 = DTable;
+    FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+    if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    /* Build Decoding Table */
+    base32[0] = nbBits;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+
+/* FSE_initDStream
+ * Initialize a FSE_DStream_t.
+ * srcBuffer must point at the beginning of an FSE block.
+ * The function result is the size of the FSE_block (== srcSize).
+ * If srcSize is too small, the function will return an errorCode;
+ */
+size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    if (srcSize >=  sizeof(bitD_t))
+    {
+        U32 contain32;
+        bitD->start = (char*)srcBuffer;
+        bitD->ptr   = (char*)srcBuffer + srcSize - sizeof(bitD_t);
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        contain32 = ((BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[6]) << (sizeof(bitD_t)*8 - 16);
+            case 6: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[5]) << (sizeof(bitD_t)*8 - 24);
+            case 5: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[4]) << (sizeof(bitD_t)*8 - 32);
+            case 4: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(bitD_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/* FSE_readBits
+ * Read next n bits from the bitContainer.
+ * Use the fast variant *only* if n > 0.
+ * Note : for this function to work properly on 32-bits, don't read more than maxNbBits==25
+ * return : value extracted.
+ */
+bitD_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    bitD_t value = ((bitD->bitContainer << bitD->bitsConsumed) >> 1) >> (((sizeof(bitD_t)*8)-1)-nbBits);
+    bitD->bitsConsumed += nbBits;
+    return value;
+}
+
+bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 */
+{
+    bitD_t value = (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD_t)*8)-nbBits);
+    bitD->bitsConsumed += nbBits;
+    return value;
+}
+
+unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
+{
+    if (bitD->ptr >= bitD->start + sizeof(bitD_t))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        return 0;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD_t)*8) return 1;
+        if (bitD->bitsConsumed == sizeof(bitD_t)*8) return 2;
+        return 3;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        if (bitD->ptr - nbBytes < bitD->start)
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* note : necessarily ptr > start */
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* note : necessarily srcSize > sizeof(bitD) */
+        return (bitD->ptr == bitD->start);
+    }
+}
+
+
+void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable)
+{
+    const U32* const base32 = DTable;
+    DStatePtr->state = FSE_readBits(bitD, base32[0]);
+    FSE_reloadDStream(bitD);
+    DStatePtr->table = base32 + 1;
+}
+
+BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    bitD_t lowBits = FSE_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    bitD_t lowBits = FSE_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/* FSE_endOfDStream
+   Tells if bitD has reached end of bitStream or not */
+
+unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
+{
+    return FSE_reloadDStream((FSE_DStream_t*)bitD)==2;
+}
+
+unsigned FSE_endOfDState(const FSE_DState_t* statePtr)
+{
+    return statePtr->state == 0;
+}
+
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const void* DTable, unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    FSE_DStream_t bitD;
+    FSE_DState_t state1, state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, DTable);
+    FSE_initDState(&state2, &bitD, DTable);
+
+
+    /* 2 symbols per loop */
+    while (!FSE_reloadDStream(&bitD) && (op<olimit))
+    {
+        *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD_t)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+
+        if (FSE_MAX_TABLELOG*4+7 < sizeof(bitD_t)*8)    /* This test must be static */
+        {
+            *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+            *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+        }
+    }
+
+    /* tail */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
+        return op-ostart;
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const void* DTable, size_t fastMode)
+{
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 0);
+}
+
+
+size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    FSE_decode_t DTable[FSE_MAX_TABLESIZE];
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    unsigned tableLog;
+    size_t errorCode, fastMode;
+
+    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readHeader (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    fastMode = FSE_buildDTable (DTable, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(fastMode)) return fastMode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable, fastMode);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+/*
+  2nd part of the file
+  designed to be included
+  for type-specific functions (template equivalent in C)
+  Objective is to write such functions only once, for better maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr, unsigned safe)
+{
+    const FSE_FUNCTION_TYPE* ip = source;
+    const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned max=0;
+    int s;
+
+    U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+
+    /* safety checks */
+    if (!sourceSize)
+    {
+        memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC;   /* maxSymbolValue too large : unsupported */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;            /* 0 == default */
+
+    if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
+    {
+        /* check input values, to avoid count table overflow */
+        while (ip < iend-3)
+        {
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
+        }
+    }
+    else
+    {
+        U32 cached = FSE_read32(ip); ip += 4;
+        while (ip < iend-15)
+        {
+            U32 c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
+
+    for (s=0; s<=(int)maxSymbolValue; s++)
+    {
+        count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+        if (count[s] > max) max = count[s];
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+    return (int)max;
+}
+
+/* hidden fast variant (unsafe) */
+size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
+{
+    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
+}
+
+size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
+{
+    if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
+    {
+        *maxSymbolValuePtr = 255;
+        return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
+    }
+    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 1);
+}
+
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
+(void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    const unsigned tableSize = 1 << tableLog;
+    const unsigned tableMask = tableSize - 1;
+    U16* tableU16 = ( (U16*) CTable) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)CTable) + 1 + (tableLog ? tableSize>>1 : 1) );
+    const unsigned step = FSE_tableStep(tableSize);
+    unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
+    U32 position = 0;
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];
+    U32 highThreshold = tableSize-1;
+    unsigned symbol;
+    unsigned i;
+
+    /* safety checks */
+    if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    /* header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* For explanations on how to distribute symbol values over the table :
+    *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+    /* symbol start positions */
+    cumul[0] = 0;
+    for (i=1; i<=maxSymbolValue+1; i++)
+    {
+        if (normalizedCounter[i-1]==-1)   /* Low prob symbol */
+        {
+            cumul[i] = cumul[i-1] + 1;
+            tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
+        }
+        else
+            cumul[i] = cumul[i-1] + normalizedCounter[i-1];
+    }
+    cumul[maxSymbolValue+1] = tableSize+1;
+
+    /* Spread symbols */
+    for (symbol=0; symbol<=maxSymbolValue; symbol++)
+    {
+        int nbOccurences;
+        for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
+        {
+            tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* Lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* Must have gone through all positions */
+
+    /* Build table */
+    for (i=0; i<tableSize; i++)
+    {
+        FSE_FUNCTION_TYPE s = tableSymbol[i];
+        tableU16[cumul[s]++] = (U16) (tableSize+i);   // Table U16 : sorted by symbol order; gives next state value
+    }
+
+    // Build Symbol Transformation Table
+    {
+        unsigned s;
+        unsigned total = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            switch (normalizedCounter[s])
+            {
+            case 0:
+                break;
+            case -1:
+            case 1:
+                symbolTT[s].minBitsOut = (BYTE)tableLog;
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                symbolTT[s].maxState = (U16)( (tableSize*2) - 1);   /* ensures state <= maxState */
+                break;
+            default :
+                symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
+                symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                total +=  normalizedCounter[s];
+                symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
+
+void* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return malloc( ((size_t)1<<tableLog) * sizeof (FSE_DECODE_TYPE) );
+}
+
+void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (void* DTable)
+{
+    free(DTable);
+}
+
+
+size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
+(void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    U32* const base32 = DTable;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= 1 << (tableLog-1);
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+
+    /* Init, lay down lowprob symbols */
+    base32[0] = tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = tableDecode[i].symbol;
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    return noLarge;
+}
diff --git a/lib/fse.h b/lib/fse.h
new file mode 100755
index 0000000..1526f0f
--- /dev/null
+++ b/lib/fse.h
@@ -0,0 +1,380 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    // size_t, ptrdiff_t
+
+
+/******************************************
+*  FSE simple functions
+******************************************/
+size_t FSE_compress(void* dst, size_t maxDstSize,
+              const void* src, size_t srcSize);
+size_t FSE_decompress(void* dst, size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*
+FSE_compress():
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated, and sized to handle worst case situations.
+    Worst case size evaluation is provided by FSE_compressBound().
+    return : size of compressed data
+    Special values : if result == 0, data is uncompressible => Nothing is stored within cSrc !!
+                     if result == 1, data is one constant element x srcSize times. Use RLE compression.
+                     if FSE_isError(result), it's an error code.
+
+FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    ** Important ** : This function doesn't decompress uncompressed nor RLE data !
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSE_isError()
+*/
+
+
+size_t FSE_decompressRLE(void* dst, size_t originalSize,
+                   const void* cSrc, size_t cSrcSize);
+/*
+FSE_decompressRLE():
+    Decompress specific RLE corner case (equivalent to memset()).
+    cSrcSize must be == 1. originalSize must be exact.
+    return : size of regenerated data (==originalSize)
+             or an error code, which can be tested using FSE_isError()
+
+Note : there is no function provided for uncompressed data, as it's just a simple memcpy()
+*/
+
+
+/******************************************
+*  Tool functions
+******************************************/
+size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/******************************************
+*  FSE advanced functions
+******************************************/
+/*
+FSE_compress2():
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    return : size of compressed data
+             or -1 if there is an error
+*/
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/******************************************
+   FSE detailed API
+******************************************/
+/*
+int FSE_compress(char* dest, const char* source, int inputSize) does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeHeader()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table
+
+int FSE_decompress(char* dest, int originalSize, const char* compressed) performs:
+1. read normalized counters with readHeader()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table
+
+The following API allows triggering specific sub-functions.
+*/
+
+/* *** COMPRESSION *** */
+
+size_t FSE_count(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
+
+unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
+size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t total, unsigned maxSymbolValue);
+
+size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog);
+size_t FSE_writeHeader (void* headerBuffer, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+void*  FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+void   FSE_freeCTable (void* CTable);
+size_t FSE_buildCTable(void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize, const void* src, size_t srcSize, const void* CTable);
+
+/*
+The first step is to count all symbols. FSE_count() provides one quick way to do this job.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have '*maxSymbolValuePtr+1' cells.
+'source' is a table of char of size 'sourceSize'. All values within 'src' MUST be <= *maxSymbolValuePtr
+*maxSymbolValuePtr will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol which frequency is >= 1.
+You can use input 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can optionally call FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error(typically, invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeHeader().
+'header' buffer must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'header'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()) (for example, buffer size too small).
+
+'normalizedCounter' can then be used to create the compression tables 'CTable'.
+The space required by 'CTable' must be already allocated. Its size is provided by FSE_sizeof_CTable().
+'CTable' must be aligned of 4 bytes boundaries.
+You can then use FSE_buildCTable() to fill 'CTable'.
+In both cases, if there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'source', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'source' is assumed to be a table of char of size 'sourceSize'
+The function returns the size of compressed data (without header), or -1 if failed.
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize);
+
+void*  FSE_createDTable(unsigned tableLog);
+void   FSE_freeDTable(void* DTable);
+size_t FSE_buildDTable (void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const void* DTable, size_t fastMode);
+
+/*
+If the block is RLE compressed, or uncompressed, use the relevant specific functions.
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by reading a header with FSE_readHeader().
+'normalizedCounter' must be already allocated, and have at least '*maxSymbolValuePtr+1' cells of short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readHeader will provide 'tableLog' and 'maxSymbolValue' stored into the header.
+The result of FSE_readHeader() is the number of bytes read from 'header'.
+The following values have special meaning :
+return 2 : there is only a single symbol value. The value is provided into the second byte of header.
+return 1 : data is uncompressed
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to create the decompression tables 'DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'DTable' must be already allocated and properly aligned.
+One can create a DTable using FSE_createDTable().
+The function will return 1 if DTable is compatible with fastMode, 0 otherwise.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+'DTable' can then be used to decompress 'compressed', with FSE_decompress_usingDTable().
+Only trigger fastMode if it was authorized by result of FSE_buildDTable(), otherwise decompression will fail.
+cSrcSize must be correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+*/
+
+
+/******************************************
+*  FSE streaming compression API
+******************************************/
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+} FSE_CStream_t;
+
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+void   FSE_initCStream(FSE_CStream_t* bitC, void* dstBuffer);
+void   FSE_initCState(FSE_CState_t* CStatePtr, const void* CTable);
+
+void   FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned char symbol);
+void   FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits);
+void   FSE_flushBits(FSE_CStream_t* bitC);
+
+void   FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+size_t FSE_closeCStream(FSE_CStream_t* bitC);
+
+/*
+These functions are inner components of FSE_compress_usingCTable().
+They allow creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a lifo stack.
+
+You will need a few variables to track your CStream. They are :
+
+void* CTable;           // Provided by FSE_buildCTable()
+FSE_CStream_t bitC;     // bitStream tracking structure
+FSE_CState_t state;     // State tracking structure
+
+
+The first thing to do is to init the bitStream, and the state.
+    FSE_initCStream(&bitC, dstBuffer);
+    FSE_initState(&state, CTable);
+
+You can then encode your input data, byte after byte.
+FSE_encodeByte() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    FSE_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    FSE_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+You must then close the bitStream if you opened it with FSE_initCStream().
+It's possible to embed some user-info into the header, as an optionalId [0-31].
+The function returns the size in bytes of CStream.
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = FSE_closeCStream(&bitStream, optionalId);
+*/
+
+
+/******************************************
+*  FSE streaming decompression API
+******************************************/
+//typedef unsigned int bitD_t;
+typedef size_t bitD_t;
+
+typedef struct
+{
+    bitD_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} FSE_DStream_t;
+
+typedef struct
+{
+    bitD_t      state;
+    const void* table;
+} FSE_DState_t;
+
+
+size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+void   FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable);
+
+unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
+bitD_t        FSE_readBits(FSE_DStream_t* bitD, unsigned nbBits);
+unsigned int  FSE_reloadDStream(FSE_DStream_t* bitD);
+
+unsigned FSE_endOfDStream(const FSE_DStream_t* bitD);
+unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary elements.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+FSE_DStream_t DStream;    // Stream context
+FSE_DState_t DState;      // State context. Multiple ones are possible
+const void* DTable;       // Decoding table, provided by FSE_buildDTable()
+U32 tableLog;             // Provided by FSE_readHeader()
+
+The first thing to do is to init the bitStream.
+    errorCode = FSE_initDStream(&DStream, &optionalId, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s) (multiple ones are possible) :
+    errorCode = FSE_initDState(&DState, &DStream, DTable, tableLog);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a lifo stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25
+    unsigned int bitField = FSE_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size is controlled by bitD_t==32 bits).
+Reading data from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+FSE_reloadDStream() result tells if there is still some more data to read from DStream.
+0 : there is still some data left into the DStream.
+1 Dstream reached end of buffer, but is not yet fully extracted. It will not load data from memory any more.
+2 Dstream reached its exact end, corresponding in general to decompression completed.
+3 Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer(1), progress slowly if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    FSE_reloadDStream(&DStream) >= 2
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    FSE_endOfDStream(&DStream);
+Check also the states. There might be some entropy left there, still able to decode some high probability symbol.
+    FSE_endOfDState(&DState);
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/fse_static.h b/lib/fse_static.h
new file mode 100755
index 0000000..7d400a5
--- /dev/null
+++ b/lib/fse_static.h
@@ -0,0 +1,108 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Tool functions
+******************************************/
+#define FSE_MAX_HEADERSIZE 512
+#define FSE_COMPRESSBOUND(size) (size + (size>>7) + FSE_MAX_HEADERSIZE)   /* Macro can be useful for static allocation */
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* You can statically allocate a CTable as a table of U32 using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   ((1<<maxTableLog)+1)
+
+
+/******************************************
+*  FSE supported API for DLL
+******************************************/
+#include "fse.h"
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define FSE_LIST_ERRORS(ITEM) \
+        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
+        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) \
+        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
+        ITEM(FSE_ERROR_corruptionDetected) \
+        ITEM(FSE_ERROR_maxCode)
+
+#define FSE_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+size_t FSE_countFast(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
+/* same as FSE_count(), but won't check if input really respect that all values within src are <= *maxSymbolValuePtr */
+
+size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits);
+/* create a fake CTable, designed to not compress an input where each element uses nbBits */
+
+size_t FSE_buildCTable_rle (void* CTable, unsigned char symbolValue);
+/* create a fake CTable, designed to compress a single identical value */
+
+size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits);
+/* create a fake DTable, designed to read an uncompressed bitstream where each element uses nbBits */
+
+size_t FSE_buildDTable_rle (void* DTable, unsigned char symbolValue);
+/* create a fake DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE streaming API
+******************************************/
+bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
+
+unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
+/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in
new file mode 100644
index 0000000..28afc3a
--- /dev/null
+++ b/lib/libzstd.pc.in
@@ -0,0 +1,14 @@
+#   ZSTD - standard compression algorithm
+#   Copyright (C) 2014-2015, Yann Collet.
+#   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: zstd
+Description: lossless compression algorithm library
+URL: https://github.com/Cyan4973/zstd
+Version: @VERSION@
+Libs: -L@LIBDIR@ -lzstd
+Cflags: -I@INCLUDEDIR@
diff --git a/lib/zstd.c b/lib/zstd.c
new file mode 100644
index 0000000..1fc1dcc
--- /dev/null
+++ b/lib/zstd.c
@@ -0,0 +1,1719 @@
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/****************************************************************
+*  Tuning parameters
+*****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect */
+#define ZSTD_MEMORY_USAGE 17
+
+
+/**************************************
+   CPU Feature Detection
+**************************************/
+/*
+ * Automated efficient unaligned memory access detection
+ * Based on known hardware architectures
+ * This list will be updated thanks to feedbacks
+ */
+#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__i386__) || defined(__x86_64__) \
+    || defined(_M_IX86) || defined(_M_X64) \
+    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
+    || (defined(_M_ARM) && (_M_ARM >= 7))
+#  define ZSTD_UNALIGNED_ACCESS 1
+#else
+#  define ZSTD_UNALIGNED_ACCESS 0
+#endif
+
+
+/********************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+#include <immintrin.h>   /* AVX2 intrinsics */
+#include "zstd_static.h"
+#if defined(__clang__) || defined(__GNUC__)
+#  include "fse.c"        /* unfortunately due GCC/Clang inlining limitations, this include runs noticeably faster */
+#else
+#  include "fse_static.h"
+#endif
+
+
+/********************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/********************************************************
+*  Basic Types
+*********************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+
+/********************************************************
+*  Constants
+*********************************************************/
+static const U32 ZSTD_magicNumber = 0xFD2FB51C;
+
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+
+#define KB *(1<<10)
+#define MB *(1<<20)
+
+#define BLOCKSIZE (128 KB)                 // define, for static allocation
+static const size_t g_maxBlockSize = 128 KB;   //((size_t)1 << 22) - 1;
+static const U32 g_maxDistance = 512 KB;
+static const U32 g_searchStrength = 8;
+
+#define WORKPLACESIZE (BLOCKSIZE*11/4)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff ((1<<Offbits)-1)
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/********************************************************
+*  Memory operations
+*********************************************************/
+static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
+static unsigned ZSTD_64bits(void) { return sizeof(void*)==8; }
+
+static unsigned ZSTD_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U32 ZSTD_readBE32(const void* memPtr)
+{
+    const BYTE* p = (const BYTE*)memPtr;
+    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
+}
+
+static void ZSTD_writeBE32(void* memPtr, U32 value)
+{
+    BYTE* const p = (BYTE* const) memPtr;
+    p[0] = (BYTE)(value>>24);
+    p[1] = (BYTE)(value>>16);
+    p[2] = (BYTE)(value>>8);
+    p[3] = (BYTE)(value>>0);
+}
+
+static U16    ZSTD_read16(const void* p) { return *(U16*)p; }
+
+static U32    ZSTD_read32(const void* p) { return *(U32*)p; }
+
+static size_t ZSTD_read_ARCH(const void* p) { return *(size_t*)p; }
+
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = src;
+    BYTE* op = dst;
+    BYTE* const oend = op + length;
+    while (op < oend) COPY8(op, ip);
+}
+
+static size_t ZSTD_writeProgressive(void* ptr, size_t value)
+{
+    BYTE* const bStart = ptr;
+    BYTE* byte = bStart;
+
+    do
+    {
+        BYTE l = value & 127;
+        value >>= 7;
+        if (value) l += 128;
+        *byte++ = l;
+    } while (value);
+
+    return byte - bStart;
+}
+
+
+static size_t ZSTD_readProgressive(size_t* result, const void* ptr)
+{
+    const BYTE* const bStart = ptr;
+    const BYTE* byte = bStart;
+    size_t r = 0;
+    U32 shift = 0;
+
+    do
+    {
+        r += (*byte & 127) << shift;
+        shift += 7;
+    } while (*byte++ & 128);
+
+    *result = r;
+    return byte - bStart;
+}
+
+
+/**************************************
+*  Local structures
+***************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct
+{
+    const BYTE* base;
+    U32 current;
+    BYTE* workplace;
+#ifdef _INCLUDED_IMM
+    __m256i justToBeAligned;
+#endif
+    U32   hashTable[HASH_TABLESIZE];
+} refTables_t;
+
+
+ZSTD_cctx_t ZSTD_createCCtx(void)
+{
+    refTables_t* srt = (refTables_t *) malloc( sizeof(refTables_t) );
+    srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
+    return (ZSTD_cctx_t)srt;
+}
+
+
+void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
+{
+    refTables_t* srt = ctx;
+    srt->base = NULL;
+    memset(srt->hashTable, 0, HASH_TABLESIZE*4);
+}
+
+
+size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
+{
+    refTables_t *srt = (refTables_t *) (ctx);
+    free(srt->workplace);
+    free(srt);
+    return 0;
+}
+
+
+/**************************************
+*  Error Management
+**************************************/
+/* tells if a return value is an error code */
+unsigned ZSTD_isError(size_t code)
+{
+    return (code > (size_t)(-ZSTD_ERROR_maxCode));
+}
+
+#define ZSTD_GENERATE_STRING(STRING) #STRING,
+static const char* ZSTD_errorStrings[] = { ZSTD_LIST_ERRORS(ZSTD_GENERATE_STRING) };
+
+/* provides error code string (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ZSTD_isError(code)) return ZSTD_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+
+/**************************************
+*  Tool functions
+**************************************/
+unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
+
+static unsigned ZSTD_highbit(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   // GCC Intrinsic
+    return 31 - __builtin_clz(val);
+#   else   // Software version
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+static unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (ZSTD_isLittleEndian())
+    {
+        if (ZSTD_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (ZSTD_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz(val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+static unsigned ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1)))
+    {
+        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if (ZSTD_64bits()) if ((pIn<(pInLimit-3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+/********************************************************
+*  Compression
+*********************************************************/
+size_t ZSTD_compressBound(size_t srcSize)   /* maximum compressed size */
+{
+    return FSE_compressBound(srcSize) + 12;
+}
+
+
+static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = dst;
+
+    /* at this stage : dstSize >= FSE_compressBound(srcSize) > (ZSTD_blockHeaderSize+1) (checked by ZSTD_compressLiterals()) */
+    (void)maxDstSize;
+
+    ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
+
+    // Build header
+    {
+        ostart[0]  = (BYTE)(srcSize>>16);
+        ostart[1]  = (BYTE)(srcSize>>8);
+        ostart[2]  = (BYTE)srcSize;
+        ostart[0] += (BYTE)(bt_rle<<6);
+    }
+
+    return ZSTD_blockHeaderSize+1;
+}
+
+
+static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = dst;
+
+    if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
+
+    // Build header
+    {
+        ostart[0] = (BYTE)(srcSize>>16);
+        ostart[1] = (BYTE)(srcSize>>8);
+        ostart[2] = (BYTE)srcSize;
+        ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
+    }
+
+    return ZSTD_blockHeaderSize+srcSize;
+}
+
+
+/* return : size of CStream in bits */
+static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
+                                          const void* src, size_t srcSize,
+                                          const void* CTable)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    FSE_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    // init
+    (void)dstSize;   // objective : ensure it fits into dstBuffer (Todo)
+    FSE_initCStream(&bitC, dst);
+    FSE_initCState(&CState1, CTable);
+    CState2 = CState1;
+
+    /* Note : at this stage, srcSize > LITERALS_NOENTROPY (checked by ZSTD_compressLiterals()) */
+    // join to mod 2
+    if (srcSize & 1)
+    {
+        FSE_encodeByte(&bitC, &CState1, *ip++);
+        FSE_flushBits(&bitC);
+    }
+
+    // join to mod 4
+    if ((sizeof(size_t)*8 > LitFSELog*4+7 ) && (srcSize & 2))   // test bit 2
+    {
+        FSE_encodeByte(&bitC, &CState2, *ip++);
+        FSE_encodeByte(&bitC, &CState1, *ip++);
+        FSE_flushBits(&bitC);
+    }
+
+    // 2 or 4 encoding per loop
+    while (ip<iend)
+    {
+        FSE_encodeByte(&bitC, &CState2, *ip++);
+
+        if (sizeof(size_t)*8 < LitFSELog*2+7 )   // this test must be static
+            FSE_flushBits(&bitC);
+
+        FSE_encodeByte(&bitC, &CState1, *ip++);
+
+        if (sizeof(size_t)*8 > LitFSELog*4+7 )   // this test must be static
+        {
+            FSE_encodeByte(&bitC, &CState2, *ip++);
+            FSE_encodeByte(&bitC, &CState1, *ip++);
+        }
+
+        FSE_flushBits(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return FSE_closeCStream(&bitC);
+}
+
+
+size_t ZSTD_minGain(size_t srcSize)
+{
+    return (srcSize >> 6) + 1;
+}
+
+
+static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
+                                     const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    BYTE* const oend = ostart + dstSize;
+
+    U32 maxSymbolValue = 256;
+    U32 tableLog = LitFSELog;
+    U32 count[256];
+    S16 norm[256];
+    U32 CTable[ FSE_CTABLE_SIZE_U32(LitFSELog, 256) ];
+    size_t errorCode;
+    const size_t minGain = ZSTD_minGain(srcSize);
+
+    // early out
+    if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    // Scan input and build symbol stats
+    errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    if (errorCode == srcSize) return 1;
+    if (errorCode < ((srcSize * 7) >> 10)) return 0;
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+
+    // Write table description header
+    errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    op += errorCode;
+
+    // Compress
+    errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    op += errorCode;
+
+    // check compressibility
+    if ( (size_t)(op-ostart) >= srcSize-minGain)
+        return 0;
+
+    // Build header
+    {
+        size_t totalSize;
+        totalSize  = op - ostart - ZSTD_blockHeaderSize;
+        ostart[0]  = (BYTE)(totalSize>>16);
+        ostart[1]  = (BYTE)(totalSize>>8);
+        ostart[2]  = (BYTE)totalSize;
+        ostart[0] += (BYTE)(bt_compressed<<6); /* is a block, is compressed */
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
+                        const BYTE* op_lit_start, const BYTE* op_lit,
+                        const BYTE* op_litLength_start, const BYTE* op_litLength,
+                        const BYTE* op_matchLength_start,
+                        const U32*  op_offset_start,
+                        const BYTE* op_dumps_start, const BYTE* op_dumps,
+                        size_t srcSize, size_t lastLLSize
+                        )
+{
+    FSE_CStream_t blockStream;
+    U32 count[256];
+    S16 norm[256];
+    size_t mostFrequent;
+    U32 max = 255;
+    U32 tableLog = 11;
+    const size_t nbSeq = op_litLength - op_litLength_start;
+    U32 CTable_LitLength  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
+    U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
+    U32 LLtype, Offtype, MLtype;
+    BYTE* op;
+    const U32* op_offset = op_offset_start + nbSeq;
+    const BYTE* op_matchLength = op_matchLength_start + nbSeq;
+    BYTE offsetBits_start[BLOCKSIZE / 4];
+    BYTE* offsetBitsPtr = offsetBits_start;
+    const size_t minGain = ZSTD_minGain(srcSize);
+    const size_t maxCSize = srcSize - minGain;
+    const size_t minSeqSize = 1 /*lastL*/ + 2 /*dHead*/ + 2 /*dumpsIn*/ + 5 /*SeqHead*/ + 3 /*SeqIn*/ + 1 /*margin*/ + ZSTD_blockHeaderSize;
+    const size_t maxLSize = maxCSize > minSeqSize ? maxCSize - minSeqSize : 0;
+    BYTE* seqHead;
+
+
+    /* init */
+    op = dst;
+
+    /* Encode literals */
+    {
+        size_t cSize;
+        size_t litSize = op_lit - op_lit_start;
+        if (litSize <= LITERAL_NOENTROPY) cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
+        else
+        {
+            cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize);
+            if (cSize == 1) cSize = ZSTD_compressRle (op, maxDstSize, op_lit_start, litSize);
+            else if (cSize == 0)
+            {
+                if (litSize >= maxLSize) return 0;   /* block not compressible enough */
+                cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
+            }
+        }
+        if (ZSTD_isError(cSize)) return cSize;
+        op += cSize;
+    }
+
+    /* Encode Sequences */
+
+    /* seqHeader */
+    op += ZSTD_writeProgressive(op, lastLLSize);
+    seqHead = op;
+
+    /* dumps */
+    {
+        size_t dumpsLength = op_dumps- op_dumps_start;
+        if (dumpsLength < 512)
+        {
+            op[0] = (BYTE)(dumpsLength >> 8);
+            op[1] = (BYTE)(dumpsLength);
+            op += 2;
+        }
+        else
+        {
+            op[0] = 2;
+            op[1] = (BYTE)(dumpsLength>>8);
+            op[2] = (BYTE)(dumpsLength);
+            op += 3;
+        }
+        memcpy(op, op_dumps_start, dumpsLength);
+        op += dumpsLength;
+    }
+
+    /* Encoding table of Literal Lengths */
+    max = MaxLL;
+    mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
+    if (mostFrequent == nbSeq)
+    {
+        *op++ = *op_litLength_start;
+        FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+        LLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_LitLength, LLbits);
+        LLtype = bt_raw;
+    }
+    else
+    {
+        tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog);
+        FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+        LLtype = bt_compressed;
+    }
+
+    /* Encoding table of Offsets */
+    {
+        /* create OffsetBits */
+        size_t i;
+        max = MaxOff;
+        for (i=0; i<nbSeq; i++)
+        {
+#if 1
+            offsetBits_start[i] = (BYTE)ZSTD_highbit(op_offset_start[i]) + 1;
+            if (op_offset_start[i]==0) offsetBits_start[i]=0;
+#else
+            U32 offset = op_offset_start[i];
+            U32 r;
+            r = ZSTD_highbit(offset) + 1;
+            if (offset==0) r = 0;
+            offsetBits_start[i] = (BYTE)r;
+#endif
+        }
+        offsetBitsPtr += nbSeq;
+        mostFrequent = FSE_countFast(count, offsetBits_start, nbSeq, &max);
+    }
+    if (mostFrequent == nbSeq)
+    {
+        *op++ = *offsetBits_start;
+        FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+        Offtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
+        Offtype = bt_raw;
+    }
+    else
+    {
+        tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog);
+        FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+        Offtype = bt_compressed;
+    }
+
+    /* Encoding Table of MatchLengths */
+    max = MaxML;
+    mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
+    if (mostFrequent == nbSeq)
+    {
+        *op++ = *op_matchLength_start;
+        FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+        MLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_MatchLength, MLbits);
+        MLtype = bt_raw;
+    }
+    else
+    {
+        tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog);
+        FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+        MLtype = bt_compressed;
+    }
+
+    seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    // Encoding
+    {
+        FSE_CState_t stateMatchLength;
+        FSE_CState_t stateOffsetBits;
+        FSE_CState_t stateLitLength;
+
+        FSE_initCStream(&blockStream, op);
+        FSE_initCState(&stateMatchLength, CTable_MatchLength);
+        FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
+        FSE_initCState(&stateLitLength, CTable_LitLength);
+
+        while (op_litLength > op_litLength_start)
+        {
+            BYTE matchLength = *(--op_matchLength);
+            U32  offset = *(--op_offset);
+            BYTE offCode = *(--offsetBitsPtr);                              /* 32b*/  /* 64b*/
+            U32 nbBits = (offCode-1) * (!!offCode);
+            BYTE litLength = *(--op_litLength);                             /* (7)*/  /* (7)*/
+            FSE_encodeByte(&blockStream, &stateMatchLength, matchLength);   /* 17 */  /* 17 */
+            if (ZSTD_32bits()) FSE_flushBits(&blockStream);                 /*  7 */
+            FSE_addBits(&blockStream, offset, nbBits);                      /* 32 */  /* 42 */
+            if (ZSTD_32bits()) FSE_flushBits(&blockStream);                 /*  7 */
+            FSE_encodeByte(&blockStream, &stateOffsetBits, offCode);        /* 16 */  /* 51 */
+            FSE_encodeByte(&blockStream, &stateLitLength, litLength);       /* 26 */  /* 61 */
+            FSE_flushBits(&blockStream);                                    /*  7 */  /*  7 */
+        }
+
+        FSE_flushCState(&blockStream, &stateMatchLength);
+        FSE_flushCState(&blockStream, &stateOffsetBits);
+        FSE_flushCState(&blockStream, &stateLitLength);
+    }
+
+    op += FSE_closeCStream(&blockStream);
+
+    /* check compressibility */
+    if ((size_t)(op-dst) >= maxCSize) return 0;
+
+    return op - dst;
+}
+
+
+static size_t ZSTD_encode(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
+                         size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
+{
+    const BYTE* const dumpStart = op_dumps;
+    const BYTE* const l_end = op_lit + litLength;
+
+
+    /* copy Literals */
+    while (op_lit<l_end) COPY8(op_lit, srcLit);
+
+    /* literal Length */
+    if (litLength >= MaxLL)
+    {
+        *op_ll++ = MaxLL;
+        if (litLength<255 + MaxLL)
+            *op_dumps++ = (BYTE)(litLength - MaxLL);
+        else
+        {
+            *op_dumps++ = 255;
+            *(U32*)op_dumps = (U32)litLength; op_dumps += 3;   /* store direct result */
+        }
+    }
+    else *op_ll = (BYTE)litLength;
+
+    /*  match offset */
+    *op_offset = (U32)offset;
+
+    /* match Length */
+    if (matchLength >= MaxML)
+    {
+        *op_ml++ = MaxML;
+        if (matchLength<255 + MaxML)
+            *op_dumps++ = (BYTE)(matchLength - MaxML);
+        else
+        {
+            *op_dumps++ = 255;
+            *(U32*)op_dumps = (U32)matchLength; op_dumps += 3;   /* store direct result */
+        }
+    }
+    else *op_ml = (BYTE)matchLength;
+
+    return op_dumps - dumpStart;
+}
+
+
+static const U32 hashMask = (1<<HASH_LOG)-1;
+static const U64 prime5bytes =         889523592379ULL;
+static const U64 prime6bytes =      227718039650203ULL;
+static const U64 prime7bytes =    58295818150454627ULL;
+static const U64 prime8bytes = 14923729446516375013ULL;
+
+//static U32   ZSTD_hashPtr(const void* p) { return (U32) _bextr_u64(*(U64*)p * prime7bytes, (56-HASH_LOG), HASH_LOG); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) << 8 >> (64-HASH_LOG)); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & ((1<<HASH_LOG)-1); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( ((*(U64*)p & 0xFFFFFFFFFFFFFF) * prime7bytes) >> (64-HASH_LOG)); }
+
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime8bytes) >> (64-HASH_LOG)); }
+static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime6bytes) >> (48-HASH_LOG)) & HASH_MASK; }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime5bytes) >> (40-HASH_LOG)) & HASH_MASK; }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); }
+
+static void  ZSTD_addPtr(U32* table, const BYTE* p, const BYTE* start) { table[ZSTD_hashPtr(p)] = (U32)(p-start); }
+
+static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start)
+{
+    U32 h = ZSTD_hashPtr(p);
+    const BYTE* r;
+    r = table[h] + start;
+    //table[h] = (U32)(p - start);
+    ZSTD_addPtr(table, p, start);
+    return r;
+}
+
+static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
+{
+    return *(U32*)match == *(U32*)ip;
+}
+
+
+static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    // Local Variables
+    refTables_t* srt = (refTables_t*) ctx;
+    U32*  HashTable = srt->hashTable;
+    BYTE* workplace = srt->workplace;
+    const BYTE* const base = srt->base;
+
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart + 1;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 16;
+
+    BYTE *op_l = workplace, *op_l_start = op_l;
+    BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
+    BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
+    U32  *op_offset = (U32*)(op_ml + (srcSize >> 2) + 4), *op_offset_start = op_offset;
+    BYTE *op_dumps = (BYTE*)(op_offset + (srcSize >> 2) + 4), *op_dumps_start = op_dumps;
+    size_t prevOffset=0, offset=0;
+    size_t lastLLSize;
+
+
+    /* Main Search Loop */
+    while (ip < ilimit)
+    {
+        const BYTE* match = (BYTE*) ZSTD_updateMatch(HashTable, ip, base);
+
+        if (!ZSTD_checkMatch(match,ip)) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
+
+        /* catch up */
+        while ((ip>anchor) && (match>base) && (ip[-1] == match[-1])) { ip--; match--; }
+
+        {
+            size_t litLength = ip-anchor;
+            size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
+            size_t offsetCode;
+            if (litLength) prevOffset = offset;
+            offsetCode = ip-match;
+            if (offsetCode == prevOffset) offsetCode = 0;
+            prevOffset = offset;
+            offset = ip-match;
+            op_dumps += ZSTD_encode(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
+            op_l += litLength;
+
+            /* Fill Table */
+            ZSTD_addPtr(HashTable, ip+1, base);
+            ip += matchLength + MINMATCH;
+            if (ip<=iend-8) ZSTD_addPtr(HashTable, ip-2, base);
+            anchor = ip;
+        }
+    }
+
+    /* Last Literals */
+    lastLLSize = iend - anchor;
+    memcpy(op_l, anchor, lastLLSize);
+    op_l += lastLLSize;
+
+    /* Finale compression stage */
+    return ZSTD_compressEntropy(dst, maxDstSize,
+        op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
+        srcSize, lastLLSize);
+}
+
+
+/* this should be auto-vectorized by compiler */
+void ZSTD_limitCtx(void* ctx, const U32 limit)
+{
+    refTables_t* srt = (refTables_t*) ctx;
+    U32* h = srt->hashTable;
+    int i;
+
+#ifdef _INCLUDED_IMM   /* <immintrin.h> */
+    /* AVX2 version */
+    const __m256i limit8 = _mm256_set1_epi32(limit);
+    for (i=0; i<HASH_TABLESIZE; i+=8)
+    {
+        __m256i src =_mm256_loadu_si256((const __m256i*)(h+i));
+                src = _mm256_max_epu32(src, limit8);
+        _mm256_storeu_si256((__m256i*)(h+i), src);
+    }
+#else
+    for (i=0; i<HASH_TABLESIZE; ++i)
+    {
+        h[i] = h[i] > limit ? h[i] : limit;
+    }
+#endif
+}
+
+
+size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
+{
+    // Sanity check
+    if (maxDstSize < 4) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    // Init
+    ZSTD_resetCCtx(ctx);
+
+    // Header
+    ZSTD_writeBE32(dst, ZSTD_magicNumber);
+
+    return 4;
+}
+
+
+size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    refTables_t* ctx = (refTables_t*) cctx;
+    const BYTE* const istart = src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+    //U32 limit = 4 * BLOCKSIZE;
+    //const U32 updateRate = 2 * BLOCKSIZE;
+
+    // Init
+    if (ctx->base==NULL) ctx->base = src, ctx->current=0;
+    if (src != ctx->base + ctx->current)   // not contiguous
+    {
+            ZSTD_resetCCtx(ctx);
+            ctx->base = src;
+            ctx->current = 0;
+    }
+    ctx->current += (U32)srcSize;
+
+    while (srcSize)
+    {
+        size_t cSize;
+        size_t blockSize = BLOCKSIZE;
+        if (blockSize > srcSize) blockSize = srcSize;
+
+        /*
+        // update hash table
+        if (g_maxDistance <= BLOCKSIZE)   // static test
+        {
+            ZSTD_resetCCtx(ctx);
+            ctx->base = ip;
+            ctx->current=0;
+        }
+        else if (ip >= istart + limit)
+        {
+            limit += updateRate;
+            ZSTD_limitCtx(ctx, limit - g_maxDistance);
+        }
+        */
+
+        // compress
+        if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+        cSize = ZSTD_compressBlock(ctx, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
+        if (cSize == 0)
+        {
+            cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize);
+            if (ZSTD_isError(cSize)) return cSize;
+        }
+        else
+        {
+            if (ZSTD_isError(cSize)) return cSize;
+            op[0] = (BYTE)(cSize>>16);
+            op[1] = (BYTE)(cSize>>8);
+            op[2] = (BYTE)cSize;
+            op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
+            cSize += 3;
+        }
+        op += cSize;
+        maxDstSize -= cSize;
+        ip += blockSize;
+        srcSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
+{
+    BYTE* op = dst;
+
+    // Sanity check
+    (void)ctx;
+    if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    // End of frame
+    op[0] = (BYTE)(bt_end << 6);
+    op[1] = 0;
+    op[2] = 0;
+
+    return 3;
+}
+
+
+static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+
+    // Header
+    {
+        size_t headerSize = ZSTD_compressBegin(ctx, dst, maxDstSize);
+        if(ZSTD_isError(headerSize)) return headerSize;
+        op += headerSize;
+        maxDstSize -= headerSize;
+    }
+
+    // Compression
+    {
+        size_t cSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize);
+        if (ZSTD_isError(cSize)) return cSize;
+        op += cSize;
+        maxDstSize -= cSize;
+    }
+
+    // Close frame
+    {
+        size_t endSize = ZSTD_compressEnd(ctx, op, maxDstSize);
+        if(ZSTD_isError(endSize)) return endSize;
+        op += endSize;
+    }
+
+    return (op - ostart);
+}
+
+
+size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    void* ctx;
+    size_t r;
+
+    ctx = ZSTD_createCCtx();
+    r = ZSTD_compressCCtx(ctx, dst, maxDstSize, src, srcSize);
+    ZSTD_freeCCtx(ctx);
+    return r;
+}
+
+
+/**************************************************************
+*   Decompression code
+**************************************************************/
+
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = headerFlags >> 6;
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/* force inline : 'fast' really needs to be evaluated at compile time */
+FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
+                       void* const dst, size_t maxDstSize,
+                 const void* src, size_t srcSize,
+                 const void* DTable, U32 fast)
+{
+    BYTE* op = (BYTE*) dst;
+    BYTE* const olimit = op;
+    BYTE* const oend = op + maxDstSize;
+    FSE_DStream_t bitD;
+    FSE_DState_t state1, state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, src, srcSize);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+
+    FSE_initDState(&state1, &bitD, DTable);
+    FSE_initDState(&state2, &bitD, DTable);
+    op = oend;
+
+    // 2 symbols per loop
+    while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
+    {
+        *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if (LitFSELog*2+7 > sizeof(size_t)*8)    // This test must be static
+            FSE_reloadDStream(&bitD);
+
+        *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+
+        if (LitFSELog*4+7 < sizeof(size_t)*8)    // This test must be static
+        {
+            *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+            *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+        }
+    }
+
+    /* tail */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
+        return oend-op;
+
+    if (op==olimit) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-ZSTD_ERROR_GENERIC;
+}
+
+static size_t ZSTD_decompressLiterals_usingDTable(
+                       void* const dst, size_t maxDstSize,
+                 const void* src, size_t srcSize,
+                 const void* DTable, U32 fast)
+{
+    if (fast) return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 1);
+    return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 0);
+}
+
+static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
+                                const void* src, size_t srcSize)
+{
+    /* assumed : blockType == blockCompressed */
+    const BYTE* ip = src;
+    short norm[256];
+    void* DTable = ctx;
+    U32 maxSymbolValue = 255;
+    U32 tableLog;
+    U32 fastMode;
+    size_t errorCode;
+
+    if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize;   // too small input size
+
+    errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    ip += errorCode;
+    srcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (DTable, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    fastMode = (U32)errorCode;
+
+    return ZSTD_decompressLiterals_usingDTable (dst, maxDstSize, ip, srcSize, DTable, fastMode);
+}
+
+
+size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                                void* dst, size_t maxDstSize,
+                          const BYTE** litPtr,
+                          const void* src, size_t srcSize)
+{
+    const BYTE* const istart = src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = dst;
+    BYTE* const oend = ostart + maxDstSize;
+    blockProperties_t litbp;
+
+    size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
+    if (ZSTD_isError(litcSize)) return litcSize;
+    if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_wrongLBlockSize;
+    ip += ZSTD_blockHeaderSize;
+
+    switch(litbp.blockType)
+    {
+    case bt_raw: *litPtr = ip; ip+= litcSize; break;
+    case bt_rle:
+        {
+            size_t rleSize = litbp.origSize;
+            memset(oend - rleSize, *ip, rleSize);
+            *litPtr = oend - rleSize;
+            ip++;
+            break;
+        }
+    case bt_compressed:
+        {
+            size_t cSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
+            if (ZSTD_isError(cSize)) return cSize;
+            *litPtr = oend - cSize;
+            ip += litcSize;
+            break;
+        }
+    default:
+        return (size_t)-ZSTD_ERROR_GENERIC;
+    }
+
+    return ip-istart;
+}
+
+
+size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
+                               void* DTableLL, void* DTableML, void* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* SeqHead */
+    ip += ZSTD_readProgressive(lastLLPtr, ip);
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t errorCode;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            errorCode = FSE_readHeader(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+            ip += errorCode;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            FSE_buildDTable_rle(DTableOffb, *ip++); break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            errorCode = FSE_readHeader(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+            ip += errorCode;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            errorCode = FSE_readHeader(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+            ip += errorCode;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+#define ZSTD_prefetch(p) { const BYTE pByte = *(volatile const BYTE*)p; }
+
+FORCE_INLINE size_t ZSTD_decompressBlock(void* ctx, void* dst, size_t maxDstSize,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* ip = src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode;
+    size_t lastLLSize;
+    const BYTE* dumps;
+    const BYTE* litPtr;
+    const BYTE* litEnd;
+    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    const size_t dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    void* DTableML = ctx;
+    void* DTableLL = ((U32*)ctx) + FSE_DTABLE_SIZE_U32(MLFSELog);
+    void* DTableOffb = ((U32*)DTableLL) + FSE_DTABLE_SIZE_U32(LLFSELog);
+
+    /* blockType == blockCompressed, srcSize is trusted */
+
+    /* literal sub-block */
+    errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, src, srcSize);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&lastLLSize, &dumps,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    /* end pos */
+    if ((litPtr>=ostart) && (litPtr<=oend))
+        litEnd = oend - lastLLSize;
+    else
+        litEnd = ip - lastLLSize;
+    ip += errorCode;
+
+    /* decompression */
+    {
+        FSE_DStream_t DStream;
+        FSE_DState_t stateLL, stateOffb, stateML;
+        size_t prevOffset = 0, offset = 0;
+        size_t qutt=0;
+
+        FSE_initDStream(&DStream, ip, iend-ip);
+        FSE_initDState(&stateLL, &DStream, DTableLL);
+        FSE_initDState(&stateOffb, &DStream, DTableOffb);
+        FSE_initDState(&stateML, &DStream, DTableML);
+
+        while (FSE_reloadDStream(&DStream)<2)
+        {
+            U32 nbBits, offsetCode;
+            const BYTE* match;
+            size_t litLength;
+            size_t matchLength;
+            size_t newOffset;
+
+_another_round:
+
+            /* Literals */
+            litLength = FSE_decodeSymbol(&stateLL, &DStream);
+            if (litLength) prevOffset = offset;
+            if (litLength == MaxLL)
+            {
+                BYTE add = *dumps++;
+                if (add < 255) litLength += add;
+                else
+                {
+                    litLength = (*(U32*)dumps) & 0xFFFFFF;
+                    dumps += 3;
+                }
+            }
+            if (((size_t)(litPtr - op) < 8) || ((size_t)(oend-(litPtr+litLength)) < 8))
+                memmove(op, litPtr, litLength);   /* overwrite risk */
+            else
+                ZSTD_wildcopy(op, litPtr, litLength);
+            op += litLength;
+            litPtr += litLength;
+
+            /* Offset */
+            offsetCode = FSE_decodeSymbol(&stateOffb, &DStream);
+            if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
+            nbBits = offsetCode - 1;
+            if (offsetCode==0) nbBits = 0;   /* cmove */
+            newOffset = FSE_readBits(&DStream, nbBits);
+            if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
+            newOffset += (size_t)1 << nbBits;
+            if (offsetCode==0) newOffset = prevOffset;
+            match = op - newOffset;
+            prevOffset = offset;
+            offset = newOffset;
+
+            /* MatchLength */
+            matchLength = FSE_decodeSymbol(&stateML, &DStream);
+            if (matchLength == MaxML)
+            {
+                BYTE add = *dumps++;
+                if (add < 255) matchLength += add;
+                else
+                {
+                    matchLength = (*(U32*)dumps) & 0xFFFFFF;
+                    dumps += 3;
+                }
+            }
+            matchLength += MINMATCH;
+
+            /* copy Match */
+            {
+                BYTE* const endMatch = op + matchLength;
+                U64 saved[2];
+
+                if ((size_t)(litPtr - endMatch) < 12)
+                {
+                    qutt = endMatch + 12 - litPtr;
+                    if ((litPtr + qutt) > oend) qutt = oend-litPtr;
+                    memcpy(saved, litPtr, qutt);
+                }
+
+                if (offset < 8)
+                {
+                    const size_t dec64 = dec64table[offset];
+                    op[0] = match[0];
+                    op[1] = match[1];
+                    op[2] = match[2];
+                    op[3] = match[3];
+                    match += dec32table[offset];
+                    ZSTD_copy4(op+4, match);
+                    match -= dec64;
+                } else { ZSTD_copy8(op, match); }
+
+                if (endMatch > oend-12)
+                {
+                    if (op < oend-16)
+                    {
+                        ZSTD_wildcopy(op+8, match+8, (oend-8) - (op+8));
+                        match += (oend-8) - op;
+                        op = oend-8;
+                    }
+                    while (op<endMatch) *op++ = *match++;
+                }
+                else
+                    ZSTD_wildcopy(op+8, match+8, matchLength-8);   /* works even if matchLength < 8 */
+
+                op = endMatch;
+
+                if ((size_t)(litPtr - endMatch) < 12)
+                    memcpy((void*)litPtr, saved, qutt);
+            }
+        }
+
+        /* check if reached exact end */
+        if (FSE_reloadDStream(&DStream) > 2) return (size_t)-ZSTD_ERROR_GENERIC;   /* requested too much : data is corrupted */
+        if (!FSE_endOfDState(&stateLL) && !FSE_endOfDState(&stateML) && !FSE_endOfDState(&stateOffb)) goto _another_round;   /* some ultra-compressible sequence remain ! */
+        if (litPtr != litEnd) goto _another_round;   /* literals not entirely spent */
+
+        /* last literal segment */
+        if (op != litPtr) memmove(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    size_t errorCode=0;
+    blockProperties_t blockProperties;
+
+    /* Header */
+    if (srcSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    while (1)
+    {
+        size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(blockSize))
+            return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (ip+blockSize > iend)
+            return (size_t)-ZSTD_ERROR_wrongSrcSize;
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
+            break;
+        case bt_raw :
+            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+        if (blockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(errorCode)) return errorCode;
+        op += errorCode;
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
+    return ZSTD_decompressDCtx(ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/******************************
+*  Streaming Decompression API
+******************************/
+
+typedef struct
+{
+    U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
+    size_t expected;
+    blockType_t bType;
+    U32 started;
+} dctx_t;
+
+
+ZSTD_dctx_t ZSTD_createDCtx(void)
+{
+    dctx_t* dctx = malloc(sizeof(dctx_t));
+    dctx->expected = 4 + ZSTD_blockHeaderSize;   // Frame Header + Block Header
+    dctx->started = 0;
+    return (ZSTD_dctx_t)dctx;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+
+size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
+{
+    return ((dctx_t*)dctx)->expected;
+}
+
+size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t* ctx = (dctx_t*)dctx;
+    size_t cSize = srcSize - ZSTD_blockHeaderSize;
+    size_t rSize;
+
+    // Sanity check
+    if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+
+    // Decompress
+    if (!ctx->started)
+    {
+        // Just check correct magic header
+        U32 magicNumber = ZSTD_readBE32(src);
+        if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
+        rSize = 0;
+    }
+    else
+    {
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, cSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, cSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */
+            break;
+        case bt_end :
+            rSize = 0;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+    }
+
+    // Prepare next block
+    {
+        const BYTE* header = src;
+        blockProperties_t bp;
+        size_t blockSize;
+        header += cSize;
+        blockSize = ZSTD_getcBlockSize(header, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->started = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize + ZSTD_blockHeaderSize;
+            ctx->bType = bp.blockType;
+            ctx->started = 1;
+        }
+    }
+
+    return rSize;
+}
+
+
diff --git a/lib/zstd.h b/lib/zstd.h
new file mode 100644
index 0000000..47ce21f
--- /dev/null
+++ b/lib/zstd.h
@@ -0,0 +1,93 @@
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**************************************
+*  Includes
+**************************************/
+#include <stddef.h>   /* size_t */
+
+
+/**************************************
+*  Version
+**************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    0    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+unsigned ZSTD_versionNumber (void);
+
+
+/**************************************
+*  Simple one-step functions
+**************************************/
+size_t ZSTD_compress(   void* dst, size_t maxDstSize,
+                  const void* src, size_t srcSize);
+
+size_t ZSTD_decompress( void* dst, size_t maxOriginalSize,
+                  const void* src, size_t compressedSize);
+
+/*
+ZSTD_compress() :
+    Compresses 'srcSize' bytes from buffer 'src' into buffer 'dst', of maximum size 'dstSize'.
+    Destination buffer should be sized to handle worst cases situations (input data not compressible).
+    Worst case size evaluation is provided by function ZSTD_compressBound().
+    return : the number of bytes written into buffer 'dst'
+             or an error code if it fails (which can be tested using ZSTD_isError())
+
+ZSTD_decompress() :
+    compressedSize : is obviously the source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTD_isError())
+*/
+
+
+/**************************************
+*  Tool functions
+**************************************/
+size_t      ZSTD_compressBound(size_t srcSize);   /* maximum compressed size */
+
+/* Error Management */
+unsigned    ZSTD_isError(size_t code);         /* tells if a return value is an error code */
+const char* ZSTD_getErrorName(size_t code);    /* provides error code string (useful for debugging) */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/zstd_static.h b/lib/zstd_static.h
new file mode 100755
index 0000000..952a006
--- /dev/null
+++ b/lib/zstd_static.h
@@ -0,0 +1,80 @@
+/*

+    zstd - standard compression library

+    Header File for static linking only

+    Copyright (C) 2014-2015, Yann Collet.

+

+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

+

+    Redistribution and use in source and binary forms, with or without

+    modification, are permitted provided that the following conditions are

+    met:

+    * Redistributions of source code must retain the above copyright

+    notice, this list of conditions and the following disclaimer.

+    * Redistributions in binary form must reproduce the above

+    copyright notice, this list of conditions and the following disclaimer

+    in the documentation and/or other materials provided with the

+    distribution.

+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+

+    You can contact the author at :

+    - zstd source repository : https://github.com/Cyan4973/zstd

+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c

+*/

+#pragma once

+

+#if defined (__cplusplus)

+extern "C" {

+#endif

+

+/**************************************

+*  Includes

+**************************************/

+#include "zstd.h"

+

+

+/**************************************

+*  Streaming functions

+**************************************/

+typedef void* ZSTD_cctx_t;

+ZSTD_cctx_t ZSTD_createCCtx(void);

+size_t      ZSTD_freeCCtx(ZSTD_cctx_t cctx);

+

+size_t ZSTD_compressBegin(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize);

+size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);

+size_t ZSTD_compressEnd(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize);

+

+typedef void* ZSTD_dctx_t;

+ZSTD_dctx_t ZSTD_createDCtx(void);

+size_t      ZSTD_freeDCtx(ZSTD_dctx_t dctx);

+

+size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx);

+size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);

+

+

+/**************************************

+*  Error management

+**************************************/

+#define ZSTD_LIST_ERRORS(ITEM) \

+        ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \

+        ITEM(ZSTD_ERROR_wrongMagicNumber) \

+        ITEM(ZSTD_ERROR_wrongSrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \

+        ITEM(ZSTD_ERROR_wrongLBlockSize) \

+        ITEM(ZSTD_ERROR_maxCode)

+

+#define ZSTD_GENERATE_ENUM(ENUM) ENUM,

+typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes;   /* exposed list of errors; static linking only */

+

+

+#if defined (__cplusplus)

+}

+#endif
\ No newline at end of file