Initial release
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..35be5a8
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,14 @@
+# Set the default behavior
+* text eol=lf
+
+# Explicitly declare source files
+*.c text eol=lf
+*.h text eol=lf
+
+# Denote files that should not be modified.
+*.odt binary
+# Visual Studio
+*.sln binary
+*.suo binary
+*.vcxproj* binary
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9fa3b1b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,18 @@
+# Object files
+*.o
+*.ko
+
+# Libraries
+*.lib
+*.a
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..f302fab
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,14 @@
+language: c
+compiler: gcc
+script: make test-travis
+before_install:
+  - sudo apt-get update  -qq
+  - sudo apt-get install -qq gcc-multilib
+  - sudo apt-get install -qq valgrind
+
+env:
+  - ZSTD_TRAVIS_CI_ENV=travis-install
+  - ZSTD_TRAVIS_CI_ENV=test-all  
+
+matrix:
+  fast_finish: true
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..d718849
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,93 @@
+# ################################################################
+# zstd - Makefile
+# Copyright (C) Yann Collet 2014-2015
+# All rights reserved.
+# 
+# BSD license
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+# You can contact the author at :
+#  - zstd source repository : https://github.com/Cyan4973/zstd
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version number
+export VERSION=0
+export RELEASE=r$(VERSION)
+
+DESTDIR?=
+PREFIX ?= /usr
+
+LIBDIR ?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+PRGDIR  = programs
+ZSTDDIR = lib
+
+# Select test target for Travis CI's Build Matrix
+ifneq (,$(filter test-%,$(ZSTD_TRAVIS_CI_ENV)))
+TRAVIS_TARGET=prg-travis
+else
+TRAVIS_TARGET=$(ZSTD_TRAVIS_CI_ENV)
+endif
+
+
+default: zstdprograms
+
+all: 
+	@cd $(ZSTDDIR); $(MAKE) -e all
+	@cd $(PRGDIR); $(MAKE) -e all
+
+zstdprograms:
+	@cd $(PRGDIR); $(MAKE) -e
+
+clean:
+	@cd $(PRGDIR); $(MAKE) clean
+	@cd $(ZSTDDIR); $(MAKE) clean
+#	@cd examples; $(MAKE) clean
+	@echo Cleaning completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+install:
+	@cd $(ZSTDDIR); $(MAKE) -e install
+	@cd $(PRGDIR); $(MAKE) -e install
+
+uninstall:
+	@cd $(ZSTDDIR); $(MAKE) uninstall
+	@cd $(PRGDIR); $(MAKE) uninstall
+
+travis-install:
+	sudo $(MAKE) install
+
+test:
+	@cd $(PRGDIR); $(MAKE) -e test
+
+test-travis: $(TRAVIS_TARGET)
+
+prg-travis:
+	@cd $(PRGDIR); $(MAKE) -e $(TRAVIS_TARGET)
+
+endif
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..27d2b8a
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,3 @@
+r0
+initial release
+
diff --git a/README.md b/README.md
new file mode 100755
index 0000000..8bb27f7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,44 @@
+ **ZSTD**, short for Z-Standard, is a new lossless compression algorithm, which provides both good compression ratio _and_ speed for your standard compression needs. "Standard" translates into everyday situations which neither look for highest possible ratio (which LZMA and ZPAQ cover) nor extreme speeds (which LZ4 covers).
+
+It is provided as a BSD-license package, hosted on Github.
+
+|Branch      |Status   |
+|------------|---------|
+|master      | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=master)](https://travis-ci.org/Cyan4973/zstd) |
+|dev         | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=dev)](https://travis-ci.org/Cyan4973/zstd) |
+
+For a taste of its performance, here are a few benchmark numbers, completed on a Core i5-4300U @ 1.9 GHz, using [fsbench 0.14.3](http://encode.ru/threads/1371-Filesystem-benchmark?p=34029&viewfull=1#post34029), an open-source benchmark program by m^2.
+
+|Name           | Ratio | C.speed | D.speed |
+|---------------|-------|---------|---------|
+|               |       |   MB/s  |  MB/s   |
+| zlib 1.2.8 -6 | 3.099 |    18   |  275    |
+| **ZSTD**      |**2.872**|**201**|**498**  |
+| zlib 1.2.8 -1 | 2.730 |    58   |   250   |
+| LZ4 HC r127   | 2.720 |   26    |  1720   |
+| QuickLZ 1.5.1b6|2.237 |  323    |  373    |
+| LZO 2.06      | 2.106 |  351    |  510    |
+| Snappy 1.1.0  | 2.091 |  238    |  964    |
+| LZ4 r127      | 2.084 |  370    | 1590    |
+| LZF 3.6       | 2.077 |  220    |  502    |
+
+An interesting feature of ZSTD is that it can qualify as both a reasonably strong compressor and a fast one.
+
+ZSTD delivers high decompression speed, at around ~500 MB/s per core.
+Obviously, your exact mileage will vary depending on your target system.
+
+ZSTD compression speed, on the other hand, can be configured to fit different situations.
+The first, fast, derivative offers ~200 MB/s per core, which is suitable for a few real-time scenarios.
+But similar to LZ4, ZSTD can offer derivatives trading compression time for compression ratio, while keeping decompression properties intact. "Offline compression", where compression time is of little importance because the content is only compressed once and decompressed many times, is therefore within the scope.
+
+Note that high compression derivatives still have to be developed.
+It's a complex area which will certainly benefit the contributions from a few experts.
+
+
+Another property ZSTD is developed for is configurable memory requirement, with the objective to fit into low-memory configurations, or servers handling many connections in parallel.
+
+ZSTD development is starting. So consider current results merely as early ones. The implementation will gradually evolve and improve overtime, especially during this first year. This is a phase which will depend a lot on user feedback, since these feedback will be key in deciding next priorities or features to add.
+
+The "master" branch is reserved for stable release and betas.
+The "dev" branch is the one where all contributions will be merged. If you plan to propose a patch, please commit into the "dev" branch. Direct commit to "master" are not permitted.
+Feature branches will also exist, typically to introduce new requirements, and be temporarily available for testing before merge into "dev" branch.
diff --git a/lib/LICENSE b/lib/LICENSE
new file mode 100644
index 0000000..3549585
--- /dev/null
+++ b/lib/LICENSE
@@ -0,0 +1,26 @@
+ZSTD Library
+Copyright (c) 2014-2015, Yann Collet
+All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 0000000..4d83816
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,114 @@
+# ################################################################
+# ZSTD library - Makefile
+# Copyright (C) Yann Collet 2015
+# All rights reserved.
+# 
+# BSD license
+
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+# You can contact the author at :
+#  - ZSTD source repository : https://github.com/Cyan4973/zstd
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version numbers
+VERSION?= 0
+LIBVER_MAJOR=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_MINOR=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER_PATCH=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
+LIBVER  = $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
+
+DESTDIR?=
+PREFIX ?= /usr
+CFLAGS ?= -O3
+CFLAGS += -I. -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
+
+LIBDIR ?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+
+
+# OS X linker doesn't support -soname, and use different extension
+# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
+ifeq ($(shell uname), Darwin)
+	SHARED_EXT = dylib
+	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
+	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
+	SONAME_FLAGS = -install_name $(PREFIX)/lib/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+else
+	SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT = so
+	SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
+endif
+
+default: libzstd
+
+all: libzstd
+
+libzstd: zstd.c
+	@echo compiling static library
+	@$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
+	@$(AR) rcs libzstd.a zstd.o
+	@echo compiling dynamic library $(LIBVER)
+	@$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
+	@echo creating versioned links
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
+
+clean:
+	@rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
+	@echo Cleaning library completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+libzstd.pc: libzstd.pc.in Makefile
+	@echo creating pkgconfig
+	@sed -e 's|@PREFIX@|$(PREFIX)|' \
+            -e 's|@LIBDIR@|$(LIBDIR)|' \
+            -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+            -e 's|@VERSION@|$(VERSION)|' \
+             $< >$@
+
+install: libzstd libzstd.pc
+	@install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/
+	@install -m 755 libzstd.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
+	@cp -a libzstd.$(SHARED_EXT_MAJOR) $(DESTDIR)$(LIBDIR)
+	@cp -a libzstd.$(SHARED_EXT) $(DESTDIR)$(LIBDIR)
+	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
+	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
+	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@echo zstd static and shared library installed
+
+uninstall:
+	@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+	@rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+	@rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/libzstd.pc
+	@[ -x $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
+	@[ -f $(DESTDIR)$(LIBDIR)/libzstd.a ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.a
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@echo zstd libraries successfully uninstalled
+
+endif
diff --git a/lib/fse.c b/lib/fse.c
new file mode 100755
index 0000000..526f822
--- /dev/null
+++ b/lib/fse.c
@@ -0,0 +1,1573 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+    - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/****************************************************************
+*  Tuning parameters
+****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#define FSE_MAX_MEMORY_USAGE 14
+#define FSE_DEFAULT_MEMORY_USAGE 13
+
+/* FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#define FSE_MAX_SYMBOL_VALUE 255
+
+
+/****************************************************************
+*  Generic function type & suffix (C template emulation)
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/****************************************************************
+*  Compiler specifics
+****************************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/****************************************************************
+*  Includes
+****************************************************************/
+#include <stdlib.h>     /* malloc, free, qsort */
+#include <string.h>     /* memcpy, memset */
+#include <stdio.h>      /* printf (debug) */
+#include "fse_static.h"
+
+
+/****************************************************************
+*  Basic Types
+*****************************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+typedef  int64_t S64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+typedef   signed long long  S64;
+#endif
+
+
+/****************************************************************
+*  Memory I/O
+*****************************************************************/
+static unsigned FSE_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U32 FSE_read32(const void* memPtr)
+{
+    U32 val32;
+    memcpy(&val32, memPtr, 4);
+    return val32;
+}
+
+static U32 FSE_readLE32(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read32(memPtr);
+    else
+    {
+        const BYTE* p = memPtr;
+        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
+    }
+}
+
+static void FSE_writeLE32(void* memPtr, U32 val32)
+{
+    if (FSE_isLittleEndian())
+    {
+        memcpy(memPtr, &val32, 4);
+    }
+    else
+    {
+        BYTE* p = memPtr;
+        p[0] = (BYTE)val32;
+        p[1] = (BYTE)(val32>>8);
+        p[2] = (BYTE)(val32>>16);
+        p[3] = (BYTE)(val32>>24);
+    }
+}
+
+static U64 FSE_read64(const void* memPtr)
+{
+    U64 val64;
+    memcpy(&val64, memPtr, 8);
+    return val64;
+}
+
+static U64 FSE_readLE64(const void* memPtr)
+{
+    if (FSE_isLittleEndian())
+        return FSE_read64(memPtr);
+    else
+    {
+        const BYTE* p = memPtr;
+        return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
+                     + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
+    }
+}
+
+static void FSE_writeLE64(void* memPtr, U64 val64)
+{
+    if (FSE_isLittleEndian())
+    {
+        memcpy(memPtr, &val64, 8);
+    }
+    else
+    {
+        BYTE* p = memPtr;
+        p[0] = (BYTE)val64;
+        p[1] = (BYTE)(val64>>8);
+        p[2] = (BYTE)(val64>>16);
+        p[3] = (BYTE)(val64>>24);
+        p[4] = (BYTE)(val64>>32);
+        p[5] = (BYTE)(val64>>40);
+        p[6] = (BYTE)(val64>>48);
+        p[7] = (BYTE)(val64>>56);
+    }
+}
+
+static size_t FSE_readLEST(const void* memPtr)
+{
+    if (sizeof(size_t)==4)
+        return FSE_readLE32(memPtr);
+    else
+        return FSE_readLE64(memPtr);
+}
+
+static void FSE_writeLEST(void* memPtr, size_t val)
+{
+    if (sizeof(size_t)==4)
+        FSE_writeLE32(memPtr, (U32)val);
+    else
+        FSE_writeLE64(memPtr, (U64)val);
+}
+
+
+/****************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+
+/****************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/****************************************************************
+*  Complex types
+****************************************************************/
+typedef struct
+{
+    int  deltaFindState;
+    U16  maxState;
+    BYTE minBitsOut;
+    /* one byte padding */
+} FSE_symbolCompressionTransform;
+
+typedef struct
+{
+    U32 fakeTable[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];   /* compatible with FSE_compressU16() */
+} CTable_max_t;
+
+
+/****************************************************************
+*  Internal functions
+****************************************************************/
+FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse ( &r, val );
+    return (unsigned) r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
+    return 31 - __builtin_clz (val);
+#   else   /* Software version */
+    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    unsigned r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
+
+#define FSE_GENERATE_STRING(STRING) #STRING,
+static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
+
+const char* FSE_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+static short FSE_abs(short a)
+{
+    return a<0? -a : a;
+}
+
+
+/****************************************************************
+*  Header bitstream management
+****************************************************************/
+size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
+    return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
+}
+
+static size_t FSE_writeHeader_generic (void* header, size_t headerBufferSize,
+                                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                                       unsigned safeWrite)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    bitStream = 0;
+    bitCount  = 0;
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while (remaining>1)   /* stops at 1 */
+    {
+        if (previous0)
+        {
+            unsigned start = charnum;
+            while (!normalizedCounter[charnum]) charnum++;
+            while (charnum >= start+24)
+            {
+                start+=24;
+                bitStream += 0xFFFF<<bitCount;
+                if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (charnum >= start+3)
+            {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (charnum-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16)
+            {
+                if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+            }
+        }
+        {
+            short count = normalizedCounter[charnum++];
+            const short max = (short)((2*threshold-1)-remaining);
+            remaining -= FSE_abs(count);
+            if (remaining<0) return (size_t)-FSE_ERROR_GENERIC;
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previous0 = (count==1);
+            while (remaining<threshold) nbBits--, threshold>>=1;
+        }
+        if (bitCount>16)
+        {
+            if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+        }
+    }
+
+    /* flush remaining bitStream */
+    if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC;   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC;   /* Too many symbols written (a bit too late?) */
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeHeader (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported */
+
+    if (headerBufferSize < FSE_headerBound(maxSymbolValue, tableLog))
+        return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
+}
+
+
+size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                 const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    int previous0 = 0;
+
+    bitStream = FSE_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    while ((remaining>1) && (charnum<=*maxSVPtr))
+    {
+        if (previous0)
+        {
+            unsigned n0 = charnum;
+            while ((bitStream & 0xFFFF) == 0xFFFF)
+            {
+                n0+=24;
+                ip+=2;
+                bitStream = FSE_readLE32(ip) >> bitCount;
+            }
+            while ((bitStream & 3) == 3)
+            {
+                n0+=3;
+                bitStream>>=2;
+                bitCount+=2;
+            }
+            n0 += bitStream & 3;
+            bitCount += 2;
+            if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_GENERIC;
+            while (charnum < n0) normalizedCounter[charnum++] = 0;
+            ip += bitCount>>3;
+            bitCount &= 7;
+            bitStream = FSE_readLE32(ip) >> bitCount;
+        }
+        {
+            const short max = (short)((2*threshold-1)-remaining);
+            short count;
+
+            if ((bitStream & (threshold-1)) < (U32)max)
+            {
+                count = (short)(bitStream & (threshold-1));
+                bitCount   += nbBits-1;
+            }
+            else
+            {
+                count = (short)(bitStream & (2*threshold-1));
+                if (count >= threshold) count -= max;
+                bitCount   += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            remaining -= FSE_abs(count);
+            normalizedCounter[charnum++] = count;
+            previous0 = !count;
+            while (remaining < threshold)
+            {
+                nbBits--;
+                threshold >>= 1;
+            }
+
+            ip += bitCount>>3;
+            bitCount &= 7;
+            bitStream = FSE_readLE32(ip) >> bitCount;
+        }
+    }
+    if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
+    *maxSVPtr = charnum-1;
+
+    ip += bitCount>0;
+    if ((size_t)(ip-istart) >= hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* arguably a bit late , tbd */
+    return ip-istart;
+}
+
+
+/****************************************************************
+*  FSE Compression Code
+****************************************************************/
+/*
+CTable is a variable size structure which contains :
+    U16 tableLog;
+    U16 maxSymbolValue;
+    U16 nextStateNumber[1 << tableLog];                         // This size is variable
+    FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];  // This size is variable
+Allocation is manual, since C standard does not support variable-size structures.
+*/
+
+size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t));   /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return size;
+}
+
+void* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return malloc(size);
+}
+
+void  FSE_freeCTable (void* CTable)
+{
+    free(CTable);
+}
+
+/* Emergency distribution strategy (fallback); compression will suffer a lot ; consider increasing table size */
+static void FSE_emergencyDistrib(short* normalizedCounter, int maxSymbolValue, short points)
+{
+    int s=0;
+    while (points)
+    {
+        if (normalizedCounter[s] > 1)
+        {
+            normalizedCounter[s]--;
+            points--;
+        }
+        s++;
+        if (s>maxSymbolValue) s=0;
+    }
+}
+
+/* fallback distribution (corner case); compression will suffer a bit ; consider increasing table size */
+void FSE_distribNpts(short* normalizedCounter, int maxSymbolValue, short points)
+{
+    int s;
+    int rank[5] = {0};
+    int fallback=0;
+
+    /* Sort 4 largest (they'll absorb normalization rounding) */
+    for (s=1; s<=maxSymbolValue; s++)
+    {
+        int i, b=3;
+        if (b>=s) b=s-1;
+        while ((b>=0) && (normalizedCounter[s]>normalizedCounter[rank[b]])) b--;
+        for (i=3; i>b; i--) rank[i+1] = rank[i];
+        rank[b+1]=s;
+    }
+
+    /* Distribute points */
+    s = 0;
+    while (points)
+    {
+        short limit = normalizedCounter[rank[s+1]]+1;
+        if (normalizedCounter[rank[s]] >= limit + points )
+        {
+            normalizedCounter[rank[s]] -= points;
+            break;
+        }
+        points -= normalizedCounter[rank[s]] - limit;
+        normalizedCounter[rank[s]] = limit;
+        s++;
+        if (s==3)
+        {
+            short reduction = points>>2;
+            if (fallback)
+            {
+                FSE_emergencyDistrib(normalizedCounter, maxSymbolValue, points);    /* Fallback mode */
+                return;
+            }
+            if (reduction < 1) reduction=1;
+            if (reduction >= normalizedCounter[rank[3]]) reduction=normalizedCounter[rank[3]]-1;
+            fallback = (reduction==0);
+            normalizedCounter[rank[3]]-=reduction;
+            points-=reduction;
+            s=0;
+        }
+    }
+}
+
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    U32 tableLog = maxTableLog;
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2;   /* Accuracy can be reduced */
+    if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+
+typedef struct
+{
+    U32 id;
+    U32 count;
+} rank_t;
+
+int FSE_compareRankT(const void* r1, const void* r2)
+{
+    const rank_t* R1 = r1;
+    const rank_t* R2 = r2;
+
+    return 2 * (R1->count < R2->count) - 1;
+}
+
+static void FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
+{
+    rank_t rank[FSE_MAX_SYMBOL_VALUE+1];
+    U32 s;
+
+    /* Init */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        rank[s].id = s;
+        rank[s].count = count[s];
+        if (norm[s] <= 1) rank[s].count = 0;
+    }
+
+    /* Sort according to count */
+    qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
+
+    while(pointsToRemove)
+    {
+        int newRank = 1;
+        norm[rank[0].id]--;
+        rank[0].count = (rank[0].count * 3) >> 2;
+        if (norm[rank[0].id] == 1) rank[0].count = 0;
+        while (rank[newRank].count > rank[newRank-1].count)
+        {
+            rank_t r = rank[newRank-1];
+            rank[newRank-1] = rank[newRank];
+            rank[newRank] = r;
+            newRank++;
+        }
+        pointsToRemove--;
+    }
+}
+
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;   /* Unsupported size */
+    if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC;   /* Too small tableLog, compression potentially impossible */
+
+    {
+        U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        U64 const scale = 62 - tableLog;
+        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            if (count[s] == total) return 0;
+            if (count[s] == 0)
+            {
+                normalizedCounter[s]=0;
+                continue;
+            }
+            if (count[s] <= lowThreshold)
+            {
+                normalizedCounter[s] = -1;
+                stillToDistribute--;
+            }
+            else
+            {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8)
+                {
+                    U64 restToBeat;
+                    restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP)
+                {
+                    largestP=proba;
+                    largest=s;
+                }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+            }
+        }
+        //if ((int)normalizedCounter[largest] <= -stillToDistribute+8)
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
+        {
+            /* largest cant accommodate that amount */
+            FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
+            //FSE_distribNpts(normalizedCounter, maxSymbolValue, (short)(-stillToDistribute));   /* Fallback */
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        int s;
+        for (s=0; s<=maxSymbolValue; s++)
+            printf("%3i: %4i \n", s, normalizedCounter[s]);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    U16* tableU16 = ( (U16*) CTable) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)CTable)+1) + (tableSize>>1));
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+    if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        symbolTT[s].minBitsOut = (BYTE)nbBits;
+        symbolTT[s].deltaFindState = s-1;
+        symbolTT[s].maxState = (U16)( (tableSize*2) - 1);   /* ensures state <= maxState */
+    }
+
+    return 0;
+}
+
+
+/* fake CTable, for rle (100% always same symbol) input */
+size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
+{
+    const unsigned tableSize = 1;
+    U16* tableU16 = ( (U16*) CTable) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)CTable + 2);
+
+    /* safety checks */
+    if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be 4 bytes aligned */
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    {
+        symbolTT[symbolValue].minBitsOut = 0;
+        symbolTT[symbolValue].deltaFindState = 0;
+        symbolTT[symbolValue].maxState = (U16)(2*tableSize-1);   /* ensures state <= maxState */
+    }
+
+    return 0;
+}
+
+
+void FSE_initCStream(FSE_CStream_t* bitC, void* start)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;   /* reserved for unusedBits */
+    bitC->startPtr = (char*)start;
+    bitC->ptr = bitC->startPtr;
+}
+
+void FSE_initCState(FSE_CState_t* statePtr, const void* CTable)
+{
+    const U32 tableLog = ( (U16*) CTable) [0];
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = ((const U16*) CTable) + 2;
+    statePtr->symbolTT = (const U32*)CTable + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+    statePtr->stateLog = tableLog;
+}
+
+void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
+{
+    static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF };   /* up to 25 bits */
+    bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+void FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
+{
+    const FSE_symbolCompressionTransform* const symbolTT = (const FSE_symbolCompressionTransform*) statePtr->symbolTT;
+    const U16* const stateTable = (const U16*) statePtr->stateTable;
+    int nbBitsOut  = symbolTT[symbol].minBitsOut;
+    nbBitsOut -= (int)((symbolTT[symbol].maxState - statePtr->value) >> 31);
+    FSE_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT[symbol].deltaFindState];
+}
+
+void FSE_flushBits(FSE_CStream_t* bitC)
+{
+    size_t nbBytes = bitC->bitPos >> 3;
+    FSE_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->bitPos &= 7;
+    bitC->ptr += nbBytes;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    FSE_addBits(bitC, statePtr->value, statePtr->stateLog);
+    FSE_flushBits(bitC);
+}
+
+
+size_t FSE_closeCStream(FSE_CStream_t* bitC)
+{
+    char* endPtr;
+
+    FSE_addBits(bitC, 1, 1);
+    FSE_flushBits(bitC);
+
+    endPtr = bitC->ptr;
+    endPtr += bitC->bitPos > 0;
+
+    return (endPtr - bitC->startPtr);
+}
+
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const void* CTable)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip;
+    const BYTE* const iend = istart + srcSize;
+
+    FSE_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+
+    /* init */
+    (void)dstSize;   /* objective : ensure it fits into dstBuffer (Todo) */
+    FSE_initCStream(&bitC, dst);
+    FSE_initCState(&CState1, CTable);
+    CState2 = CState1;
+
+    ip=iend;
+
+    /* join to even */
+    if (srcSize & 1)
+    {
+        FSE_encodeByte(&bitC, &CState1, *--ip);
+        FSE_flushBits(&bitC);
+    }
+
+    /* join to mod 4 */
+    if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2))   /* test bit 2 */
+    {
+        FSE_encodeByte(&bitC, &CState2, *--ip);
+        FSE_encodeByte(&bitC, &CState1, *--ip);
+        FSE_flushBits(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while (ip>istart)
+    {
+        FSE_encodeByte(&bitC, &CState2, *--ip);
+
+        if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_flushBits(&bitC);
+
+        FSE_encodeByte(&bitC, &CState1, *--ip);
+
+        if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 )   /* this test must be static */
+        {
+            FSE_encodeByte(&bitC, &CState2, *--ip);
+            FSE_encodeByte(&bitC, &CState1, *--ip);
+        }
+
+        FSE_flushBits(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return FSE_closeCStream(&bitC);
+}
+
+
+static size_t FSE_compressRLE (BYTE *out, BYTE symbol)
+{
+    *out=symbol;
+    return 1;
+}
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    U32   count[FSE_MAX_SYMBOL_VALUE+1];
+    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
+    CTable_max_t CTable;
+    size_t errorCode;
+
+    /* early out */
+    if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
+    if (srcSize <= 1) return srcSize;  /* Uncompressed or RLE */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
+
+    /* Scan input and build symbol stats */
+    errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode == srcSize) return FSE_compressRLE (ostart, *istart);
+    if (errorCode < ((srcSize * 7) >> 10)) return 0;   /* Heuristic : not compressible enough */
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
+    if (FSE_isError(errorCode)) return errorCode;
+
+    /* Write table description header */
+    errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    op += errorCode;
+
+    /* Compress */
+    errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return errorCode;
+    op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, &CTable);
+
+    /* check compressibility */
+    if ( (size_t)(op-ostart) >= srcSize-1 )
+        return 0;
+
+    return op-ostart;
+}
+
+
+size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
+{
+    return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
+}
+
+
+/*********************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+typedef struct
+{
+    U16  newState;
+    BYTE symbol;
+    BYTE nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+/* Specific corner case : RLE compression */
+size_t FSE_decompressRLE(void* dst, size_t originalSize,
+                   const void* cSrc, size_t cSrcSize)
+{
+    if (cSrcSize != 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+    memset(dst, *(BYTE*)cSrc, originalSize);
+    return originalSize;
+}
+
+
+size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
+{
+    U32* const base32 = DTable;
+    FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
+
+    /* Sanity check */
+    if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    base32[0] = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
+{
+    U32* const base32 = DTable;
+    FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC;             /* min size */
+    if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    /* Build Decoding Table */
+    base32[0] = nbBits;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+
+/* FSE_initDStream
+ * Initialize a FSE_DStream_t.
+ * srcBuffer must point at the beginning of an FSE block.
+ * The function result is the size of the FSE_block (== srcSize).
+ * If srcSize is too small, the function will return an errorCode;
+ */
+size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
+
+    if (srcSize >=  sizeof(bitD_t))
+    {
+        U32 contain32;
+        bitD->start = (char*)srcBuffer;
+        bitD->ptr   = (char*)srcBuffer + srcSize - sizeof(bitD_t);
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        contain32 = ((BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+    }
+    else
+    {
+        U32 contain32;
+        bitD->start = (char*)srcBuffer;
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+            case 7: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[6]) << (sizeof(bitD_t)*8 - 16);
+            case 6: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[5]) << (sizeof(bitD_t)*8 - 24);
+            case 5: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[4]) << (sizeof(bitD_t)*8 - 32);
+            case 4: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[3]) << 24;
+            case 3: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[2]) << 16;
+            case 2: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[1]) <<  8;
+            default:;
+        }
+        contain32 = ((BYTE*)srcBuffer)[srcSize-1];
+        if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC;   /* stop bit not present */
+        bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
+        bitD->bitsConsumed += (U32)(sizeof(bitD_t) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+
+/* FSE_readBits
+ * Read next n bits from the bitContainer.
+ * Use the fast variant *only* if n > 0.
+ * Note : for this function to work properly on 32-bits, don't read more than maxNbBits==25
+ * return : value extracted.
+ */
+bitD_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
+{
+    bitD_t value = ((bitD->bitContainer << bitD->bitsConsumed) >> 1) >> (((sizeof(bitD_t)*8)-1)-nbBits);
+    bitD->bitsConsumed += nbBits;
+    return value;
+}
+
+bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits)   /* only if nbBits >= 1 */
+{
+    bitD_t value = (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD_t)*8)-nbBits);
+    bitD->bitsConsumed += nbBits;
+    return value;
+}
+
+unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
+{
+    if (bitD->ptr >= bitD->start + sizeof(bitD_t))
+    {
+        bitD->ptr -= bitD->bitsConsumed >> 3;
+        bitD->bitsConsumed &= 7;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);
+        return 0;
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        if (bitD->bitsConsumed < sizeof(bitD_t)*8) return 1;
+        if (bitD->bitsConsumed == sizeof(bitD_t)*8) return 2;
+        return 3;
+    }
+    {
+        U32 nbBytes = bitD->bitsConsumed >> 3;
+        if (bitD->ptr - nbBytes < bitD->start)
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* note : necessarily ptr > start */
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = FSE_readLEST(bitD->ptr);   /* note : necessarily srcSize > sizeof(bitD) */
+        return (bitD->ptr == bitD->start);
+    }
+}
+
+
+void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable)
+{
+    const U32* const base32 = DTable;
+    DStatePtr->state = FSE_readBits(bitD, base32[0]);
+    FSE_reloadDStream(bitD);
+    DStatePtr->table = base32 + 1;
+}
+
+BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32  nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    bitD_t lowBits = FSE_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
+{
+    const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    const U32 nbBits = DInfo.nbBits;
+    BYTE symbol = DInfo.symbol;
+    bitD_t lowBits = FSE_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/* FSE_endOfDStream
+   Tells if bitD has reached end of bitStream or not */
+
+unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
+{
+    return FSE_reloadDStream((FSE_DStream_t*)bitD)==2;
+}
+
+unsigned FSE_endOfDState(const FSE_DState_t* statePtr)
+{
+    return statePtr->state == 0;
+}
+
+
+FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const void* DTable, unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    FSE_DStream_t bitD;
+    FSE_DState_t state1, state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize);   /* replaced last arg by maxCompressed Size */
+    if (FSE_isError(errorCode)) return errorCode;
+
+    FSE_initDState(&state1, &bitD, DTable);
+    FSE_initDState(&state2, &bitD, DTable);
+
+
+    /* 2 symbols per loop */
+    while (!FSE_reloadDStream(&bitD) && (op<olimit))
+    {
+        *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD_t)*8)    /* This test must be static */
+            FSE_reloadDStream(&bitD);
+
+        *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+
+        if (FSE_MAX_TABLELOG*4+7 < sizeof(bitD_t)*8)    /* This test must be static */
+        {
+            *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+            *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+        }
+    }
+
+    /* tail */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
+        return op-ostart;
+
+    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-FSE_ERROR_corruptionDetected;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const void* DTable, size_t fastMode)
+{
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 0);
+}
+
+
+size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    short counting[FSE_MAX_SYMBOL_VALUE+1];
+    FSE_decode_t DTable[FSE_MAX_TABLESIZE];
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    unsigned tableLog;
+    size_t errorCode, fastMode;
+
+    if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+
+    /* normal FSE decoding mode */
+    errorCode = FSE_readHeader (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
+    if (FSE_isError(errorCode)) return errorCode;
+    if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;   /* too small input size */
+    ip += errorCode;
+    cSrcSize -= errorCode;
+
+    fastMode = FSE_buildDTable (DTable, counting, maxSymbolValue, tableLog);
+    if (FSE_isError(fastMode)) return fastMode;
+
+    /* always return, even if it is an error code */
+    return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable, fastMode);
+}
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
+
+/*
+  2nd part of the file
+  designed to be included
+  for type-specific functions (template equivalent in C)
+  Objective is to write such functions only once, for better maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr, unsigned safe)
+{
+    const FSE_FUNCTION_TYPE* ip = source;
+    const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned max=0;
+    int s;
+
+    U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+    U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
+
+    /* safety checks */
+    if (!sourceSize)
+    {
+        memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC;   /* maxSymbolValue too large : unsupported */
+    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;            /* 0 == default */
+
+    if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
+    {
+        /* check input values, to avoid count table overflow */
+        while (ip < iend-3)
+        {
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
+            if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
+        }
+    }
+    else
+    {
+        U32 cached = FSE_read32(ip); ip += 4;
+        while (ip < iend-15)
+        {
+            U32 c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = FSE_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
+
+    for (s=0; s<=(int)maxSymbolValue; s++)
+    {
+        count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
+        if (count[s] > max) max = count[s];
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+    return (int)max;
+}
+
+/* hidden fast variant (unsafe) */
+size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
+{
+    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
+}
+
+size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
+{
+    if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
+    {
+        *maxSymbolValuePtr = 255;
+        return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
+    }
+    return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 1);
+}
+
+
+static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
+
+size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
+(void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    const unsigned tableSize = 1 << tableLog;
+    const unsigned tableMask = tableSize - 1;
+    U16* tableU16 = ( (U16*) CTable) + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)CTable) + 1 + (tableLog ? tableSize>>1 : 1) );
+    const unsigned step = FSE_tableStep(tableSize);
+    unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
+    U32 position = 0;
+    FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];
+    U32 highThreshold = tableSize-1;
+    unsigned symbol;
+    unsigned i;
+
+    /* safety checks */
+    if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC;   /* Must be allocated of 4 bytes boundaries */
+
+    /* header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* For explanations on how to distribute symbol values over the table :
+    *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+    /* symbol start positions */
+    cumul[0] = 0;
+    for (i=1; i<=maxSymbolValue+1; i++)
+    {
+        if (normalizedCounter[i-1]==-1)   /* Low prob symbol */
+        {
+            cumul[i] = cumul[i-1] + 1;
+            tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
+        }
+        else
+            cumul[i] = cumul[i-1] + normalizedCounter[i-1];
+    }
+    cumul[maxSymbolValue+1] = tableSize+1;
+
+    /* Spread symbols */
+    for (symbol=0; symbol<=maxSymbolValue; symbol++)
+    {
+        int nbOccurences;
+        for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
+        {
+            tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* Lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* Must have gone through all positions */
+
+    /* Build table */
+    for (i=0; i<tableSize; i++)
+    {
+        FSE_FUNCTION_TYPE s = tableSymbol[i];
+        tableU16[cumul[s]++] = (U16) (tableSize+i);   // Table U16 : sorted by symbol order; gives next state value
+    }
+
+    // Build Symbol Transformation Table
+    {
+        unsigned s;
+        unsigned total = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+        {
+            switch (normalizedCounter[s])
+            {
+            case 0:
+                break;
+            case -1:
+            case 1:
+                symbolTT[s].minBitsOut = (BYTE)tableLog;
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                symbolTT[s].maxState = (U16)( (tableSize*2) - 1);   /* ensures state <= maxState */
+                break;
+            default :
+                symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
+                symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                total +=  normalizedCounter[s];
+                symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
+
+void* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return malloc( ((size_t)1<<tableLog) * sizeof (FSE_DECODE_TYPE) );
+}
+
+void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (void* DTable)
+{
+    free(DTable);
+}
+
+
+size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
+(void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    U32* const base32 = DTable;
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
+    const U32 tableSize = 1 << tableLog;
+    const U32 tableMask = tableSize-1;
+    const U32 step = FSE_tableStep(tableSize);
+    U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
+    U32 position = 0;
+    U32 highThreshold = tableSize-1;
+    const S16 largeLimit= 1 << (tableLog-1);
+    U32 noLarge = 1;
+    U32 s;
+
+    /* Sanity Checks */
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
+    if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
+
+    /* Init, lay down lowprob symbols */
+    base32[0] = tableLog;
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        if (normalizedCounter[s]==-1)
+        {
+            tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+            symbolNext[s] = 1;
+        }
+        else
+        {
+            if (normalizedCounter[s] >= largeLimit) noLarge=0;
+            symbolNext[s] = normalizedCounter[s];
+        }
+    }
+
+    /* Spread symbols */
+    for (s=0; s<=maxSymbolValue; s++)
+    {
+        int i;
+        for (i=0; i<normalizedCounter[s]; i++)
+        {
+            tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+            position = (position + step) & tableMask;
+            while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }
+    }
+
+    if (position!=0) return (size_t)-FSE_ERROR_GENERIC;   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+
+    /* Build Decoding table */
+    {
+        U32 i;
+        for (i=0; i<tableSize; i++)
+        {
+            FSE_FUNCTION_TYPE symbol = tableDecode[i].symbol;
+            U16 nextState = symbolNext[symbol]++;
+            tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
+            tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
+        }
+    }
+
+    return noLarge;
+}
diff --git a/lib/fse.h b/lib/fse.h
new file mode 100755
index 0000000..1526f0f
--- /dev/null
+++ b/lib/fse.h
@@ -0,0 +1,380 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file
+   Copyright (C) 2013-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Includes
+******************************************/
+#include <stddef.h>    // size_t, ptrdiff_t
+
+
+/******************************************
+*  FSE simple functions
+******************************************/
+size_t FSE_compress(void* dst, size_t maxDstSize,
+              const void* src, size_t srcSize);
+size_t FSE_decompress(void* dst, size_t maxDstSize,
+                const void* cSrc, size_t cSrcSize);
+/*
+FSE_compress():
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated, and sized to handle worst case situations.
+    Worst case size evaluation is provided by FSE_compressBound().
+    return : size of compressed data
+    Special values : if result == 0, data is uncompressible => Nothing is stored within cSrc !!
+                     if result == 1, data is one constant element x srcSize times. Use RLE compression.
+                     if FSE_isError(result), it's an error code.
+
+FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'maxDstSize'.
+    ** Important ** : This function doesn't decompress uncompressed nor RLE data !
+    return : size of regenerated data (<= maxDstSize)
+             or an error code, which can be tested using FSE_isError()
+*/
+
+
+size_t FSE_decompressRLE(void* dst, size_t originalSize,
+                   const void* cSrc, size_t cSrcSize);
+/*
+FSE_decompressRLE():
+    Decompress specific RLE corner case (equivalent to memset()).
+    cSrcSize must be == 1. originalSize must be exact.
+    return : size of regenerated data (==originalSize)
+             or an error code, which can be tested using FSE_isError()
+
+Note : there is no function provided for uncompressed data, as it's just a simple memcpy()
+*/
+
+
+/******************************************
+*  Tool functions
+******************************************/
+size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/******************************************
+*  FSE advanced functions
+******************************************/
+/*
+FSE_compress2():
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    return : size of compressed data
+             or -1 if there is an error
+*/
+size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/******************************************
+   FSE detailed API
+******************************************/
+/*
+int FSE_compress(char* dest, const char* source, int inputSize) does the following:
+1. count symbol occurrence from source[] into table count[]
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeHeader()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table
+
+int FSE_decompress(char* dest, int originalSize, const char* compressed) performs:
+1. read normalized counters with readHeader()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table
+
+The following API allows triggering specific sub-functions.
+*/
+
+/* *** COMPRESSION *** */
+
+size_t FSE_count(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
+
+unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
+size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t total, unsigned maxSymbolValue);
+
+size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog);
+size_t FSE_writeHeader (void* headerBuffer, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+void*  FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+void   FSE_freeCTable (void* CTable);
+size_t FSE_buildCTable(void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize, const void* src, size_t srcSize, const void* CTable);
+
+/*
+The first step is to count all symbols. FSE_count() provides one quick way to do this job.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have '*maxSymbolValuePtr+1' cells.
+'source' is a table of char of size 'sourceSize'. All values within 'src' MUST be <= *maxSymbolValuePtr
+*maxSymbolValuePtr will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol which frequency is >= 1.
+You can use input 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can optionally call FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error(typically, invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeHeader().
+'header' buffer must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'header'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()) (for example, buffer size too small).
+
+'normalizedCounter' can then be used to create the compression tables 'CTable'.
+The space required by 'CTable' must be already allocated. Its size is provided by FSE_sizeof_CTable().
+'CTable' must be aligned of 4 bytes boundaries.
+You can then use FSE_buildCTable() to fill 'CTable'.
+In both cases, if there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'source', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'source' is assumed to be a table of char of size 'sourceSize'
+The function returns the size of compressed data (without header), or -1 if failed.
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize);
+
+void*  FSE_createDTable(unsigned tableLog);
+void   FSE_freeDTable(void* DTable);
+size_t FSE_buildDTable (void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const void* DTable, size_t fastMode);
+
+/*
+If the block is RLE compressed, or uncompressed, use the relevant specific functions.
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by reading a header with FSE_readHeader().
+'normalizedCounter' must be already allocated, and have at least '*maxSymbolValuePtr+1' cells of short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readHeader will provide 'tableLog' and 'maxSymbolValue' stored into the header.
+The result of FSE_readHeader() is the number of bytes read from 'header'.
+The following values have special meaning :
+return 2 : there is only a single symbol value. The value is provided into the second byte of header.
+return 1 : data is uncompressed
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to create the decompression tables 'DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'DTable' must be already allocated and properly aligned.
+One can create a DTable using FSE_createDTable().
+The function will return 1 if DTable is compatible with fastMode, 0 otherwise.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+'DTable' can then be used to decompress 'compressed', with FSE_decompress_usingDTable().
+Only trigger fastMode if it was authorized by result of FSE_buildDTable(), otherwise decompression will fail.
+cSrcSize must be correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+*/
+
+
+/******************************************
+*  FSE streaming compression API
+******************************************/
+typedef struct
+{
+    size_t bitContainer;
+    int    bitPos;
+    char*  startPtr;
+    char*  ptr;
+} FSE_CStream_t;
+
+typedef struct
+{
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+void   FSE_initCStream(FSE_CStream_t* bitC, void* dstBuffer);
+void   FSE_initCState(FSE_CState_t* CStatePtr, const void* CTable);
+
+void   FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned char symbol);
+void   FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits);
+void   FSE_flushBits(FSE_CStream_t* bitC);
+
+void   FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+size_t FSE_closeCStream(FSE_CStream_t* bitC);
+
+/*
+These functions are inner components of FSE_compress_usingCTable().
+They allow creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a lifo stack.
+
+You will need a few variables to track your CStream. They are :
+
+void* CTable;           // Provided by FSE_buildCTable()
+FSE_CStream_t bitC;     // bitStream tracking structure
+FSE_CState_t state;     // State tracking structure
+
+
+The first thing to do is to init the bitStream, and the state.
+    FSE_initCStream(&bitC, dstBuffer);
+    FSE_initState(&state, CTable);
+
+You can then encode your input data, byte after byte.
+FSE_encodeByte() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    FSE_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    FSE_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+You must then close the bitStream if you opened it with FSE_initCStream().
+It's possible to embed some user-info into the header, as an optionalId [0-31].
+The function returns the size in bytes of CStream.
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = FSE_closeCStream(&bitStream, optionalId);
+*/
+
+
+/******************************************
+*  FSE streaming decompression API
+******************************************/
+//typedef unsigned int bitD_t;
+typedef size_t bitD_t;
+
+typedef struct
+{
+    bitD_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+} FSE_DStream_t;
+
+typedef struct
+{
+    bitD_t      state;
+    const void* table;
+} FSE_DState_t;
+
+
+size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+void   FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable);
+
+unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
+bitD_t        FSE_readBits(FSE_DStream_t* bitD, unsigned nbBits);
+unsigned int  FSE_reloadDStream(FSE_DStream_t* bitD);
+
+unsigned FSE_endOfDStream(const FSE_DStream_t* bitD);
+unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*
+Let's now decompose FSE_decompress_usingDTable() into its unitary elements.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+FSE_DStream_t DStream;    // Stream context
+FSE_DState_t DState;      // State context. Multiple ones are possible
+const void* DTable;       // Decoding table, provided by FSE_buildDTable()
+U32 tableLog;             // Provided by FSE_readHeader()
+
+The first thing to do is to init the bitStream.
+    errorCode = FSE_initDStream(&DStream, &optionalId, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s) (multiple ones are possible) :
+    errorCode = FSE_initDState(&DState, &DStream, DTable, tableLog);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a lifo stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25
+    unsigned int bitField = FSE_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size is controlled by bitD_t==32 bits).
+Reading data from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+FSE_reloadDStream() result tells if there is still some more data to read from DStream.
+0 : there is still some data left into the DStream.
+1 Dstream reached end of buffer, but is not yet fully extracted. It will not load data from memory any more.
+2 Dstream reached its exact end, corresponding in general to decompression completed.
+3 Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer(1), progress slowly if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    FSE_reloadDStream(&DStream) >= 2
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    FSE_endOfDStream(&DStream);
+Check also the states. There might be some entropy left there, still able to decode some high probability symbol.
+    FSE_endOfDState(&DState);
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/fse_static.h b/lib/fse_static.h
new file mode 100755
index 0000000..7d400a5
--- /dev/null
+++ b/lib/fse_static.h
@@ -0,0 +1,108 @@
+/* ******************************************************************
+   FSE : Finite State Entropy coder
+   header file for static linking (only)
+   Copyright (C) 2013-2015, Yann Collet
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+****************************************************************** */
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/******************************************
+*  Tool functions
+******************************************/
+#define FSE_MAX_HEADERSIZE 512
+#define FSE_COMPRESSBOUND(size) (size + (size>>7) + FSE_MAX_HEADERSIZE)   /* Macro can be useful for static allocation */
+
+
+/******************************************
+*  Static allocation
+******************************************/
+/* You can statically allocate a CTable as a table of U32 using below macro */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   ((1<<maxTableLog)+1)
+
+
+/******************************************
+*  FSE supported API for DLL
+******************************************/
+#include "fse.h"
+
+
+/******************************************
+*  Error Management
+******************************************/
+#define FSE_LIST_ERRORS(ITEM) \
+        ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
+        ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) \
+        ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
+        ITEM(FSE_ERROR_corruptionDetected) \
+        ITEM(FSE_ERROR_maxCode)
+
+#define FSE_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes;  /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
+
+
+/******************************************
+*  FSE advanced API
+******************************************/
+size_t FSE_countFast(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
+/* same as FSE_count(), but won't check if input really respect that all values within src are <= *maxSymbolValuePtr */
+
+size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits);
+/* create a fake CTable, designed to not compress an input where each element uses nbBits */
+
+size_t FSE_buildCTable_rle (void* CTable, unsigned char symbolValue);
+/* create a fake CTable, designed to compress a single identical value */
+
+size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits);
+/* create a fake DTable, designed to read an uncompressed bitstream where each element uses nbBits */
+
+size_t FSE_buildDTable_rle (void* DTable, unsigned char symbolValue);
+/* create a fake DTable, designed to always generate the same symbolValue */
+
+
+/******************************************
+*  FSE streaming API
+******************************************/
+bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
+
+unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
+/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in
new file mode 100644
index 0000000..28afc3a
--- /dev/null
+++ b/lib/libzstd.pc.in
@@ -0,0 +1,14 @@
+#   ZSTD - standard compression algorithm
+#   Copyright (C) 2014-2015, Yann Collet.
+#   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: zstd
+Description: lossless compression algorithm library
+URL: https://github.com/Cyan4973/zstd
+Version: @VERSION@
+Libs: -L@LIBDIR@ -lzstd
+Cflags: -I@INCLUDEDIR@
diff --git a/lib/zstd.c b/lib/zstd.c
new file mode 100644
index 0000000..1fc1dcc
--- /dev/null
+++ b/lib/zstd.c
@@ -0,0 +1,1719 @@
+/*
+    zstd - standard compression library
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/****************************************************************
+*  Tuning parameters
+*****************************************************************/
+/* MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect */
+#define ZSTD_MEMORY_USAGE 17
+
+
+/**************************************
+   CPU Feature Detection
+**************************************/
+/*
+ * Automated efficient unaligned memory access detection
+ * Based on known hardware architectures
+ * This list will be updated thanks to feedbacks
+ */
+#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__i386__) || defined(__x86_64__) \
+    || defined(_M_IX86) || defined(_M_X64) \
+    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
+    || (defined(_M_ARM) && (_M_ARM >= 7))
+#  define ZSTD_UNALIGNED_ACCESS 1
+#else
+#  define ZSTD_UNALIGNED_ACCESS 0
+#endif
+
+
+/********************************************************
+*  Includes
+*********************************************************/
+#include <stdlib.h>      /* calloc */
+#include <string.h>      /* memcpy, memmove */
+#include <stdio.h>       /* debug : printf */
+#include <immintrin.h>   /* AVX2 intrinsics */
+#include "zstd_static.h"
+#if defined(__clang__) || defined(__GNUC__)
+#  include "fse.c"        /* unfortunately due GCC/Clang inlining limitations, this include runs noticeably faster */
+#else
+#  include "fse_static.h"
+#endif
+
+
+/********************************************************
+*  Compiler specifics
+*********************************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    /* For Visual 2005 */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
+#else
+#  define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+
+/********************************************************
+*  Basic Types
+*********************************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef  int16_t S16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef   signed short      S16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+
+/********************************************************
+*  Constants
+*********************************************************/
+static const U32 ZSTD_magicNumber = 0xFD2FB51C;
+
+#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
+#define HASH_TABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASH_TABLESIZE - 1)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define KNUTH 2654435761
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+
+#define KB *(1<<10)
+#define MB *(1<<20)
+
+#define BLOCKSIZE (128 KB)                 // define, for static allocation
+static const size_t g_maxBlockSize = 128 KB;   //((size_t)1 << 22) - 1;
+static const U32 g_maxDistance = 512 KB;
+static const U32 g_searchStrength = 8;
+
+#define WORKPLACESIZE (BLOCKSIZE*11/4)
+#define MINMATCH 4
+#define MLbits   7
+#define LLbits   6
+#define Offbits  5
+#define MaxML  ((1<<MLbits )-1)
+#define MaxLL  ((1<<LLbits )-1)
+#define MaxOff ((1<<Offbits)-1)
+#define LitFSELog  11
+#define MLFSELog   10
+#define LLFSELog   10
+#define OffFSELog   9
+
+#define LITERAL_NOENTROPY 63
+#define COMMAND_NOENTROPY 7   /* to remove */
+
+static const size_t ZSTD_blockHeaderSize = 3;
+static const size_t ZSTD_frameHeaderSize = 4;
+
+
+/********************************************************
+*  Memory operations
+*********************************************************/
+static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
+static unsigned ZSTD_64bits(void) { return sizeof(void*)==8; }
+
+static unsigned ZSTD_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+static U32 ZSTD_readBE32(const void* memPtr)
+{
+    const BYTE* p = (const BYTE*)memPtr;
+    return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
+}
+
+static void ZSTD_writeBE32(void* memPtr, U32 value)
+{
+    BYTE* const p = (BYTE* const) memPtr;
+    p[0] = (BYTE)(value>>24);
+    p[1] = (BYTE)(value>>16);
+    p[2] = (BYTE)(value>>8);
+    p[3] = (BYTE)(value>>0);
+}
+
+static U16    ZSTD_read16(const void* p) { return *(U16*)p; }
+
+static U32    ZSTD_read32(const void* p) { return *(U32*)p; }
+
+static size_t ZSTD_read_ARCH(const void* p) { return *(size_t*)p; }
+
+static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
+
+static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
+#define COPY8(d,s)    { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
+{
+    const BYTE* ip = src;
+    BYTE* op = dst;
+    BYTE* const oend = op + length;
+    while (op < oend) COPY8(op, ip);
+}
+
+static size_t ZSTD_writeProgressive(void* ptr, size_t value)
+{
+    BYTE* const bStart = ptr;
+    BYTE* byte = bStart;
+
+    do
+    {
+        BYTE l = value & 127;
+        value >>= 7;
+        if (value) l += 128;
+        *byte++ = l;
+    } while (value);
+
+    return byte - bStart;
+}
+
+
+static size_t ZSTD_readProgressive(size_t* result, const void* ptr)
+{
+    const BYTE* const bStart = ptr;
+    const BYTE* byte = bStart;
+    size_t r = 0;
+    U32 shift = 0;
+
+    do
+    {
+        r += (*byte & 127) << shift;
+        shift += 7;
+    } while (*byte++ & 128);
+
+    *result = r;
+    return byte - bStart;
+}
+
+
+/**************************************
+*  Local structures
+***************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+
+typedef struct
+{
+    blockType_t blockType;
+    U32 origSize;
+} blockProperties_t;
+
+typedef struct
+{
+    const BYTE* base;
+    U32 current;
+    BYTE* workplace;
+#ifdef _INCLUDED_IMM
+    __m256i justToBeAligned;
+#endif
+    U32   hashTable[HASH_TABLESIZE];
+} refTables_t;
+
+
+ZSTD_cctx_t ZSTD_createCCtx(void)
+{
+    refTables_t* srt = (refTables_t *) malloc( sizeof(refTables_t) );
+    srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
+    return (ZSTD_cctx_t)srt;
+}
+
+
+void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
+{
+    refTables_t* srt = ctx;
+    srt->base = NULL;
+    memset(srt->hashTable, 0, HASH_TABLESIZE*4);
+}
+
+
+size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
+{
+    refTables_t *srt = (refTables_t *) (ctx);
+    free(srt->workplace);
+    free(srt);
+    return 0;
+}
+
+
+/**************************************
+*  Error Management
+**************************************/
+/* tells if a return value is an error code */
+unsigned ZSTD_isError(size_t code)
+{
+    return (code > (size_t)(-ZSTD_ERROR_maxCode));
+}
+
+#define ZSTD_GENERATE_STRING(STRING) #STRING,
+static const char* ZSTD_errorStrings[] = { ZSTD_LIST_ERRORS(ZSTD_GENERATE_STRING) };
+
+/* provides error code string (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (ZSTD_isError(code)) return ZSTD_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+
+/**************************************
+*  Tool functions
+**************************************/
+unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
+
+static unsigned ZSTD_highbit(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304)   // GCC Intrinsic
+    return 31 - __builtin_clz(val);
+#   else   // Software version
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
+}
+
+static unsigned ZSTD_NbCommonBytes (register size_t val)
+{
+    if (ZSTD_isLittleEndian())
+    {
+        if (ZSTD_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (ZSTD_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz(val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+static unsigned ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while ((pIn<pInLimit-(sizeof(size_t)-1)))
+    {
+        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+        pIn += ZSTD_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if (ZSTD_64bits()) if ((pIn<(pInLimit-3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+/********************************************************
+*  Compression
+*********************************************************/
+size_t ZSTD_compressBound(size_t srcSize)   /* maximum compressed size */
+{
+    return FSE_compressBound(srcSize) + 12;
+}
+
+
+static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = dst;
+
+    /* at this stage : dstSize >= FSE_compressBound(srcSize) > (ZSTD_blockHeaderSize+1) (checked by ZSTD_compressLiterals()) */
+    (void)maxDstSize;
+
+    ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
+
+    // Build header
+    {
+        ostart[0]  = (BYTE)(srcSize>>16);
+        ostart[1]  = (BYTE)(srcSize>>8);
+        ostart[2]  = (BYTE)srcSize;
+        ostart[0] += (BYTE)(bt_rle<<6);
+    }
+
+    return ZSTD_blockHeaderSize+1;
+}
+
+
+static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = dst;
+
+    if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
+
+    // Build header
+    {
+        ostart[0] = (BYTE)(srcSize>>16);
+        ostart[1] = (BYTE)(srcSize>>8);
+        ostart[2] = (BYTE)srcSize;
+        ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
+    }
+
+    return ZSTD_blockHeaderSize+srcSize;
+}
+
+
+/* return : size of CStream in bits */
+static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
+                                          const void* src, size_t srcSize,
+                                          const void* CTable)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    FSE_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    // init
+    (void)dstSize;   // objective : ensure it fits into dstBuffer (Todo)
+    FSE_initCStream(&bitC, dst);
+    FSE_initCState(&CState1, CTable);
+    CState2 = CState1;
+
+    /* Note : at this stage, srcSize > LITERALS_NOENTROPY (checked by ZSTD_compressLiterals()) */
+    // join to mod 2
+    if (srcSize & 1)
+    {
+        FSE_encodeByte(&bitC, &CState1, *ip++);
+        FSE_flushBits(&bitC);
+    }
+
+    // join to mod 4
+    if ((sizeof(size_t)*8 > LitFSELog*4+7 ) && (srcSize & 2))   // test bit 2
+    {
+        FSE_encodeByte(&bitC, &CState2, *ip++);
+        FSE_encodeByte(&bitC, &CState1, *ip++);
+        FSE_flushBits(&bitC);
+    }
+
+    // 2 or 4 encoding per loop
+    while (ip<iend)
+    {
+        FSE_encodeByte(&bitC, &CState2, *ip++);
+
+        if (sizeof(size_t)*8 < LitFSELog*2+7 )   // this test must be static
+            FSE_flushBits(&bitC);
+
+        FSE_encodeByte(&bitC, &CState1, *ip++);
+
+        if (sizeof(size_t)*8 > LitFSELog*4+7 )   // this test must be static
+        {
+            FSE_encodeByte(&bitC, &CState2, *ip++);
+            FSE_encodeByte(&bitC, &CState1, *ip++);
+        }
+
+        FSE_flushBits(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return FSE_closeCStream(&bitC);
+}
+
+
+size_t ZSTD_minGain(size_t srcSize)
+{
+    return (srcSize >> 6) + 1;
+}
+
+
+static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
+                                     const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* ip = istart;
+
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    BYTE* const oend = ostart + dstSize;
+
+    U32 maxSymbolValue = 256;
+    U32 tableLog = LitFSELog;
+    U32 count[256];
+    S16 norm[256];
+    U32 CTable[ FSE_CTABLE_SIZE_U32(LitFSELog, 256) ];
+    size_t errorCode;
+    const size_t minGain = ZSTD_minGain(srcSize);
+
+    // early out
+    if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    // Scan input and build symbol stats
+    errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    if (errorCode == srcSize) return 1;
+    if (errorCode < ((srcSize * 7) >> 10)) return 0;
+
+    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
+    errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+
+    // Write table description header
+    errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    op += errorCode;
+
+    // Compress
+    errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    op += errorCode;
+
+    // check compressibility
+    if ( (size_t)(op-ostart) >= srcSize-minGain)
+        return 0;
+
+    // Build header
+    {
+        size_t totalSize;
+        totalSize  = op - ostart - ZSTD_blockHeaderSize;
+        ostart[0]  = (BYTE)(totalSize>>16);
+        ostart[1]  = (BYTE)(totalSize>>8);
+        ostart[2]  = (BYTE)totalSize;
+        ostart[0] += (BYTE)(bt_compressed<<6); /* is a block, is compressed */
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
+                        const BYTE* op_lit_start, const BYTE* op_lit,
+                        const BYTE* op_litLength_start, const BYTE* op_litLength,
+                        const BYTE* op_matchLength_start,
+                        const U32*  op_offset_start,
+                        const BYTE* op_dumps_start, const BYTE* op_dumps,
+                        size_t srcSize, size_t lastLLSize
+                        )
+{
+    FSE_CStream_t blockStream;
+    U32 count[256];
+    S16 norm[256];
+    size_t mostFrequent;
+    U32 max = 255;
+    U32 tableLog = 11;
+    const size_t nbSeq = op_litLength - op_litLength_start;
+    U32 CTable_LitLength  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
+    U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
+    U32 LLtype, Offtype, MLtype;
+    BYTE* op;
+    const U32* op_offset = op_offset_start + nbSeq;
+    const BYTE* op_matchLength = op_matchLength_start + nbSeq;
+    BYTE offsetBits_start[BLOCKSIZE / 4];
+    BYTE* offsetBitsPtr = offsetBits_start;
+    const size_t minGain = ZSTD_minGain(srcSize);
+    const size_t maxCSize = srcSize - minGain;
+    const size_t minSeqSize = 1 /*lastL*/ + 2 /*dHead*/ + 2 /*dumpsIn*/ + 5 /*SeqHead*/ + 3 /*SeqIn*/ + 1 /*margin*/ + ZSTD_blockHeaderSize;
+    const size_t maxLSize = maxCSize > minSeqSize ? maxCSize - minSeqSize : 0;
+    BYTE* seqHead;
+
+
+    /* init */
+    op = dst;
+
+    /* Encode literals */
+    {
+        size_t cSize;
+        size_t litSize = op_lit - op_lit_start;
+        if (litSize <= LITERAL_NOENTROPY) cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
+        else
+        {
+            cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize);
+            if (cSize == 1) cSize = ZSTD_compressRle (op, maxDstSize, op_lit_start, litSize);
+            else if (cSize == 0)
+            {
+                if (litSize >= maxLSize) return 0;   /* block not compressible enough */
+                cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
+            }
+        }
+        if (ZSTD_isError(cSize)) return cSize;
+        op += cSize;
+    }
+
+    /* Encode Sequences */
+
+    /* seqHeader */
+    op += ZSTD_writeProgressive(op, lastLLSize);
+    seqHead = op;
+
+    /* dumps */
+    {
+        size_t dumpsLength = op_dumps- op_dumps_start;
+        if (dumpsLength < 512)
+        {
+            op[0] = (BYTE)(dumpsLength >> 8);
+            op[1] = (BYTE)(dumpsLength);
+            op += 2;
+        }
+        else
+        {
+            op[0] = 2;
+            op[1] = (BYTE)(dumpsLength>>8);
+            op[2] = (BYTE)(dumpsLength);
+            op += 3;
+        }
+        memcpy(op, op_dumps_start, dumpsLength);
+        op += dumpsLength;
+    }
+
+    /* Encoding table of Literal Lengths */
+    max = MaxLL;
+    mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
+    if (mostFrequent == nbSeq)
+    {
+        *op++ = *op_litLength_start;
+        FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+        LLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_LitLength, LLbits);
+        LLtype = bt_raw;
+    }
+    else
+    {
+        tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog);
+        FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
+        LLtype = bt_compressed;
+    }
+
+    /* Encoding table of Offsets */
+    {
+        /* create OffsetBits */
+        size_t i;
+        max = MaxOff;
+        for (i=0; i<nbSeq; i++)
+        {
+#if 1
+            offsetBits_start[i] = (BYTE)ZSTD_highbit(op_offset_start[i]) + 1;
+            if (op_offset_start[i]==0) offsetBits_start[i]=0;
+#else
+            U32 offset = op_offset_start[i];
+            U32 r;
+            r = ZSTD_highbit(offset) + 1;
+            if (offset==0) r = 0;
+            offsetBits_start[i] = (BYTE)r;
+#endif
+        }
+        offsetBitsPtr += nbSeq;
+        mostFrequent = FSE_countFast(count, offsetBits_start, nbSeq, &max);
+    }
+    if (mostFrequent == nbSeq)
+    {
+        *op++ = *offsetBits_start;
+        FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+        Offtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
+        Offtype = bt_raw;
+    }
+    else
+    {
+        tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog);
+        FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
+        Offtype = bt_compressed;
+    }
+
+    /* Encoding Table of MatchLengths */
+    max = MaxML;
+    mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
+    if (mostFrequent == nbSeq)
+    {
+        *op++ = *op_matchLength_start;
+        FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+        MLtype = bt_rle;
+    }
+    else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1))))
+    {
+        FSE_buildCTable_raw(CTable_MatchLength, MLbits);
+        MLtype = bt_raw;
+    }
+    else
+    {
+        tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
+        FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
+        op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog);
+        FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
+        MLtype = bt_compressed;
+    }
+
+    seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    // Encoding
+    {
+        FSE_CState_t stateMatchLength;
+        FSE_CState_t stateOffsetBits;
+        FSE_CState_t stateLitLength;
+
+        FSE_initCStream(&blockStream, op);
+        FSE_initCState(&stateMatchLength, CTable_MatchLength);
+        FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
+        FSE_initCState(&stateLitLength, CTable_LitLength);
+
+        while (op_litLength > op_litLength_start)
+        {
+            BYTE matchLength = *(--op_matchLength);
+            U32  offset = *(--op_offset);
+            BYTE offCode = *(--offsetBitsPtr);                              /* 32b*/  /* 64b*/
+            U32 nbBits = (offCode-1) * (!!offCode);
+            BYTE litLength = *(--op_litLength);                             /* (7)*/  /* (7)*/
+            FSE_encodeByte(&blockStream, &stateMatchLength, matchLength);   /* 17 */  /* 17 */
+            if (ZSTD_32bits()) FSE_flushBits(&blockStream);                 /*  7 */
+            FSE_addBits(&blockStream, offset, nbBits);                      /* 32 */  /* 42 */
+            if (ZSTD_32bits()) FSE_flushBits(&blockStream);                 /*  7 */
+            FSE_encodeByte(&blockStream, &stateOffsetBits, offCode);        /* 16 */  /* 51 */
+            FSE_encodeByte(&blockStream, &stateLitLength, litLength);       /* 26 */  /* 61 */
+            FSE_flushBits(&blockStream);                                    /*  7 */  /*  7 */
+        }
+
+        FSE_flushCState(&blockStream, &stateMatchLength);
+        FSE_flushCState(&blockStream, &stateOffsetBits);
+        FSE_flushCState(&blockStream, &stateLitLength);
+    }
+
+    op += FSE_closeCStream(&blockStream);
+
+    /* check compressibility */
+    if ((size_t)(op-dst) >= maxCSize) return 0;
+
+    return op - dst;
+}
+
+
+static size_t ZSTD_encode(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
+                         size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
+{
+    const BYTE* const dumpStart = op_dumps;
+    const BYTE* const l_end = op_lit + litLength;
+
+
+    /* copy Literals */
+    while (op_lit<l_end) COPY8(op_lit, srcLit);
+
+    /* literal Length */
+    if (litLength >= MaxLL)
+    {
+        *op_ll++ = MaxLL;
+        if (litLength<255 + MaxLL)
+            *op_dumps++ = (BYTE)(litLength - MaxLL);
+        else
+        {
+            *op_dumps++ = 255;
+            *(U32*)op_dumps = (U32)litLength; op_dumps += 3;   /* store direct result */
+        }
+    }
+    else *op_ll = (BYTE)litLength;
+
+    /*  match offset */
+    *op_offset = (U32)offset;
+
+    /* match Length */
+    if (matchLength >= MaxML)
+    {
+        *op_ml++ = MaxML;
+        if (matchLength<255 + MaxML)
+            *op_dumps++ = (BYTE)(matchLength - MaxML);
+        else
+        {
+            *op_dumps++ = 255;
+            *(U32*)op_dumps = (U32)matchLength; op_dumps += 3;   /* store direct result */
+        }
+    }
+    else *op_ml = (BYTE)matchLength;
+
+    return op_dumps - dumpStart;
+}
+
+
+static const U32 hashMask = (1<<HASH_LOG)-1;
+static const U64 prime5bytes =         889523592379ULL;
+static const U64 prime6bytes =      227718039650203ULL;
+static const U64 prime7bytes =    58295818150454627ULL;
+static const U64 prime8bytes = 14923729446516375013ULL;
+
+//static U32   ZSTD_hashPtr(const void* p) { return (U32) _bextr_u64(*(U64*)p * prime7bytes, (56-HASH_LOG), HASH_LOG); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) << 8 >> (64-HASH_LOG)); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & ((1<<HASH_LOG)-1); }
+//static U32   ZSTD_hashPtr(const void* p) { return ( ((*(U64*)p & 0xFFFFFFFFFFFFFF) * prime7bytes) >> (64-HASH_LOG)); }
+
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime8bytes) >> (64-HASH_LOG)); }
+static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime6bytes) >> (48-HASH_LOG)) & HASH_MASK; }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime5bytes) >> (40-HASH_LOG)) & HASH_MASK; }
+//static U32   ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); }
+
+static void  ZSTD_addPtr(U32* table, const BYTE* p, const BYTE* start) { table[ZSTD_hashPtr(p)] = (U32)(p-start); }
+
+static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start)
+{
+    U32 h = ZSTD_hashPtr(p);
+    const BYTE* r;
+    r = table[h] + start;
+    //table[h] = (U32)(p - start);
+    ZSTD_addPtr(table, p, start);
+    return r;
+}
+
+static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
+{
+    return *(U32*)match == *(U32*)ip;
+}
+
+
+static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    // Local Variables
+    refTables_t* srt = (refTables_t*) ctx;
+    U32*  HashTable = srt->hashTable;
+    BYTE* workplace = srt->workplace;
+    const BYTE* const base = srt->base;
+
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart + 1;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 16;
+
+    BYTE *op_l = workplace, *op_l_start = op_l;
+    BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
+    BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
+    U32  *op_offset = (U32*)(op_ml + (srcSize >> 2) + 4), *op_offset_start = op_offset;
+    BYTE *op_dumps = (BYTE*)(op_offset + (srcSize >> 2) + 4), *op_dumps_start = op_dumps;
+    size_t prevOffset=0, offset=0;
+    size_t lastLLSize;
+
+
+    /* Main Search Loop */
+    while (ip < ilimit)
+    {
+        const BYTE* match = (BYTE*) ZSTD_updateMatch(HashTable, ip, base);
+
+        if (!ZSTD_checkMatch(match,ip)) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
+
+        /* catch up */
+        while ((ip>anchor) && (match>base) && (ip[-1] == match[-1])) { ip--; match--; }
+
+        {
+            size_t litLength = ip-anchor;
+            size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
+            size_t offsetCode;
+            if (litLength) prevOffset = offset;
+            offsetCode = ip-match;
+            if (offsetCode == prevOffset) offsetCode = 0;
+            prevOffset = offset;
+            offset = ip-match;
+            op_dumps += ZSTD_encode(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
+            op_l += litLength;
+
+            /* Fill Table */
+            ZSTD_addPtr(HashTable, ip+1, base);
+            ip += matchLength + MINMATCH;
+            if (ip<=iend-8) ZSTD_addPtr(HashTable, ip-2, base);
+            anchor = ip;
+        }
+    }
+
+    /* Last Literals */
+    lastLLSize = iend - anchor;
+    memcpy(op_l, anchor, lastLLSize);
+    op_l += lastLLSize;
+
+    /* Finale compression stage */
+    return ZSTD_compressEntropy(dst, maxDstSize,
+        op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
+        srcSize, lastLLSize);
+}
+
+
+/* this should be auto-vectorized by compiler */
+void ZSTD_limitCtx(void* ctx, const U32 limit)
+{
+    refTables_t* srt = (refTables_t*) ctx;
+    U32* h = srt->hashTable;
+    int i;
+
+#ifdef _INCLUDED_IMM   /* <immintrin.h> */
+    /* AVX2 version */
+    const __m256i limit8 = _mm256_set1_epi32(limit);
+    for (i=0; i<HASH_TABLESIZE; i+=8)
+    {
+        __m256i src =_mm256_loadu_si256((const __m256i*)(h+i));
+                src = _mm256_max_epu32(src, limit8);
+        _mm256_storeu_si256((__m256i*)(h+i), src);
+    }
+#else
+    for (i=0; i<HASH_TABLESIZE; ++i)
+    {
+        h[i] = h[i] > limit ? h[i] : limit;
+    }
+#endif
+}
+
+
+size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
+{
+    // Sanity check
+    if (maxDstSize < 4) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    // Init
+    ZSTD_resetCCtx(ctx);
+
+    // Header
+    ZSTD_writeBE32(dst, ZSTD_magicNumber);
+
+    return 4;
+}
+
+
+size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    refTables_t* ctx = (refTables_t*) cctx;
+    const BYTE* const istart = src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+    //U32 limit = 4 * BLOCKSIZE;
+    //const U32 updateRate = 2 * BLOCKSIZE;
+
+    // Init
+    if (ctx->base==NULL) ctx->base = src, ctx->current=0;
+    if (src != ctx->base + ctx->current)   // not contiguous
+    {
+            ZSTD_resetCCtx(ctx);
+            ctx->base = src;
+            ctx->current = 0;
+    }
+    ctx->current += (U32)srcSize;
+
+    while (srcSize)
+    {
+        size_t cSize;
+        size_t blockSize = BLOCKSIZE;
+        if (blockSize > srcSize) blockSize = srcSize;
+
+        /*
+        // update hash table
+        if (g_maxDistance <= BLOCKSIZE)   // static test
+        {
+            ZSTD_resetCCtx(ctx);
+            ctx->base = ip;
+            ctx->current=0;
+        }
+        else if (ip >= istart + limit)
+        {
+            limit += updateRate;
+            ZSTD_limitCtx(ctx, limit - g_maxDistance);
+        }
+        */
+
+        // compress
+        if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+        cSize = ZSTD_compressBlock(ctx, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
+        if (cSize == 0)
+        {
+            cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize);
+            if (ZSTD_isError(cSize)) return cSize;
+        }
+        else
+        {
+            if (ZSTD_isError(cSize)) return cSize;
+            op[0] = (BYTE)(cSize>>16);
+            op[1] = (BYTE)(cSize>>8);
+            op[2] = (BYTE)cSize;
+            op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
+            cSize += 3;
+        }
+        op += cSize;
+        maxDstSize -= cSize;
+        ip += blockSize;
+        srcSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
+{
+    BYTE* op = dst;
+
+    // Sanity check
+    (void)ctx;
+    if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+
+    // End of frame
+    op[0] = (BYTE)(bt_end << 6);
+    op[1] = 0;
+    op[2] = 0;
+
+    return 3;
+}
+
+
+static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+
+    // Header
+    {
+        size_t headerSize = ZSTD_compressBegin(ctx, dst, maxDstSize);
+        if(ZSTD_isError(headerSize)) return headerSize;
+        op += headerSize;
+        maxDstSize -= headerSize;
+    }
+
+    // Compression
+    {
+        size_t cSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize);
+        if (ZSTD_isError(cSize)) return cSize;
+        op += cSize;
+        maxDstSize -= cSize;
+    }
+
+    // Close frame
+    {
+        size_t endSize = ZSTD_compressEnd(ctx, op, maxDstSize);
+        if(ZSTD_isError(endSize)) return endSize;
+        op += endSize;
+    }
+
+    return (op - ostart);
+}
+
+
+size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    void* ctx;
+    size_t r;
+
+    ctx = ZSTD_createCCtx();
+    r = ZSTD_compressCCtx(ctx, dst, maxDstSize, src, srcSize);
+    ZSTD_freeCCtx(ctx);
+    return r;
+}
+
+
+/**************************************************************
+*   Decompression code
+**************************************************************/
+
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
+{
+    const BYTE* const in = src;
+    BYTE headerFlags;
+    U32 cSize;
+
+    if (srcSize < 3) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+
+    headerFlags = *in;
+    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
+
+    bpPtr->blockType = headerFlags >> 6;
+    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
+
+    if (bpPtr->blockType == bt_end) return 0;
+    if (bpPtr->blockType == bt_rle) return 1;
+    return cSize;
+}
+
+
+static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
+    memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+
+/* force inline : 'fast' really needs to be evaluated at compile time */
+FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
+                       void* const dst, size_t maxDstSize,
+                 const void* src, size_t srcSize,
+                 const void* DTable, U32 fast)
+{
+    BYTE* op = (BYTE*) dst;
+    BYTE* const olimit = op;
+    BYTE* const oend = op + maxDstSize;
+    FSE_DStream_t bitD;
+    FSE_DState_t state1, state2;
+    size_t errorCode;
+
+    /* Init */
+    errorCode = FSE_initDStream(&bitD, src, srcSize);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+
+    FSE_initDState(&state1, &bitD, DTable);
+    FSE_initDState(&state2, &bitD, DTable);
+    op = oend;
+
+    // 2 symbols per loop
+    while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
+    {
+        *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if (LitFSELog*2+7 > sizeof(size_t)*8)    // This test must be static
+            FSE_reloadDStream(&bitD);
+
+        *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+
+        if (LitFSELog*4+7 < sizeof(size_t)*8)    // This test must be static
+        {
+            *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+            *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+        }
+    }
+
+    /* tail */
+    while (1)
+    {
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
+
+        if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
+            break;
+
+        *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
+    }
+
+    /* end ? */
+    if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
+        return oend-op;
+
+    if (op==olimit) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+    return (size_t)-ZSTD_ERROR_GENERIC;
+}
+
+static size_t ZSTD_decompressLiterals_usingDTable(
+                       void* const dst, size_t maxDstSize,
+                 const void* src, size_t srcSize,
+                 const void* DTable, U32 fast)
+{
+    if (fast) return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 1);
+    return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 0);
+}
+
+static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
+                                const void* src, size_t srcSize)
+{
+    /* assumed : blockType == blockCompressed */
+    const BYTE* ip = src;
+    short norm[256];
+    void* DTable = ctx;
+    U32 maxSymbolValue = 255;
+    U32 tableLog;
+    U32 fastMode;
+    size_t errorCode;
+
+    if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize;   // too small input size
+
+    errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    ip += errorCode;
+    srcSize -= errorCode;
+
+    errorCode = FSE_buildDTable (DTable, norm, maxSymbolValue, tableLog);
+    if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+    fastMode = (U32)errorCode;
+
+    return ZSTD_decompressLiterals_usingDTable (dst, maxDstSize, ip, srcSize, DTable, fastMode);
+}
+
+
+size_t ZSTD_decodeLiteralsBlock(void* ctx,
+                                void* dst, size_t maxDstSize,
+                          const BYTE** litPtr,
+                          const void* src, size_t srcSize)
+{
+    const BYTE* const istart = src;
+    const BYTE* ip = istart;
+    BYTE* const ostart = dst;
+    BYTE* const oend = ostart + maxDstSize;
+    blockProperties_t litbp;
+
+    size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
+    if (ZSTD_isError(litcSize)) return litcSize;
+    if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_wrongLBlockSize;
+    ip += ZSTD_blockHeaderSize;
+
+    switch(litbp.blockType)
+    {
+    case bt_raw: *litPtr = ip; ip+= litcSize; break;
+    case bt_rle:
+        {
+            size_t rleSize = litbp.origSize;
+            memset(oend - rleSize, *ip, rleSize);
+            *litPtr = oend - rleSize;
+            ip++;
+            break;
+        }
+    case bt_compressed:
+        {
+            size_t cSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
+            if (ZSTD_isError(cSize)) return cSize;
+            *litPtr = oend - cSize;
+            ip += litcSize;
+            break;
+        }
+    default:
+        return (size_t)-ZSTD_ERROR_GENERIC;
+    }
+
+    return ip-istart;
+}
+
+
+size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
+                               void* DTableLL, void* DTableML, void* DTableOffb,
+                         const void* src, size_t srcSize)
+{
+    const BYTE* const istart = src;
+    const BYTE* ip = istart;
+    const BYTE* const iend = istart + srcSize;
+    U32 LLtype, Offtype, MLtype;
+    U32 LLlog, Offlog, MLlog;
+    size_t dumpsLength;
+
+    /* SeqHead */
+    ip += ZSTD_readProgressive(lastLLPtr, ip);
+    LLtype  = *ip >> 6;
+    Offtype = (*ip >> 4) & 3;
+    MLtype  = (*ip >> 2) & 3;
+    if (*ip & 2)
+    {
+        dumpsLength  = ip[2];
+        dumpsLength += ip[1] << 8;
+        ip += 3;
+    }
+    else
+    {
+        dumpsLength  = ip[1];
+        dumpsLength += (ip[0] & 1) << 8;
+        ip += 2;
+    }
+    *dumpsPtr = ip;
+    ip += dumpsLength;
+
+    /* sequences */
+    {
+        S16 norm[MaxML+1];    /* assumption : MaxML >= MaxLL and MaxOff */
+        size_t errorCode;
+
+        /* Build DTables */
+        switch(LLtype)
+        {
+        U32 max;
+        case bt_rle :
+            LLlog = 0;
+            FSE_buildDTable_rle(DTableLL, *ip++); break;
+        case bt_raw :
+            LLlog = LLbits;
+            FSE_buildDTable_raw(DTableLL, LLbits); break;
+        default :
+            max = MaxLL;
+            errorCode = FSE_readHeader(norm, &max, &LLlog, ip, iend-ip);
+            if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+            ip += errorCode;
+            FSE_buildDTable(DTableLL, norm, max, LLlog);
+        }
+
+        switch(Offtype)
+        {
+        U32 max;
+        case bt_rle :
+            Offlog = 0;
+            FSE_buildDTable_rle(DTableOffb, *ip++); break;
+        case bt_raw :
+            Offlog = Offbits;
+            FSE_buildDTable_raw(DTableOffb, Offbits); break;
+        default :
+            max = MaxOff;
+            errorCode = FSE_readHeader(norm, &max, &Offlog, ip, iend-ip);
+            if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+            ip += errorCode;
+            FSE_buildDTable(DTableOffb, norm, max, Offlog);
+        }
+
+        switch(MLtype)
+        {
+        U32 max;
+        case bt_rle :
+            MLlog = 0;
+            FSE_buildDTable_rle(DTableML, *ip++); break;
+        case bt_raw :
+            MLlog = MLbits;
+            FSE_buildDTable_raw(DTableML, MLbits); break;
+        default :
+            max = MaxML;
+            errorCode = FSE_readHeader(norm, &max, &MLlog, ip, iend-ip);
+            if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
+            ip += errorCode;
+            FSE_buildDTable(DTableML, norm, max, MLlog);
+        }
+    }
+
+    return ip-istart;
+}
+
+
+#define ZSTD_prefetch(p) { const BYTE pByte = *(volatile const BYTE*)p; }
+
+FORCE_INLINE size_t ZSTD_decompressBlock(void* ctx, void* dst, size_t maxDstSize,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* ip = src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t errorCode;
+    size_t lastLLSize;
+    const BYTE* dumps;
+    const BYTE* litPtr;
+    const BYTE* litEnd;
+    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};   /* added */
+    const size_t dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    void* DTableML = ctx;
+    void* DTableLL = ((U32*)ctx) + FSE_DTABLE_SIZE_U32(MLFSELog);
+    void* DTableOffb = ((U32*)DTableLL) + FSE_DTABLE_SIZE_U32(LLFSELog);
+
+    /* blockType == blockCompressed, srcSize is trusted */
+
+    /* literal sub-block */
+    errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, src, srcSize);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    ip += errorCode;
+
+    /* Build Decoding Tables */
+    errorCode = ZSTD_decodeSeqHeaders(&lastLLSize, &dumps,
+                                      DTableLL, DTableML, DTableOffb,
+                                      ip, iend-ip);
+    if (ZSTD_isError(errorCode)) return errorCode;
+    /* end pos */
+    if ((litPtr>=ostart) && (litPtr<=oend))
+        litEnd = oend - lastLLSize;
+    else
+        litEnd = ip - lastLLSize;
+    ip += errorCode;
+
+    /* decompression */
+    {
+        FSE_DStream_t DStream;
+        FSE_DState_t stateLL, stateOffb, stateML;
+        size_t prevOffset = 0, offset = 0;
+        size_t qutt=0;
+
+        FSE_initDStream(&DStream, ip, iend-ip);
+        FSE_initDState(&stateLL, &DStream, DTableLL);
+        FSE_initDState(&stateOffb, &DStream, DTableOffb);
+        FSE_initDState(&stateML, &DStream, DTableML);
+
+        while (FSE_reloadDStream(&DStream)<2)
+        {
+            U32 nbBits, offsetCode;
+            const BYTE* match;
+            size_t litLength;
+            size_t matchLength;
+            size_t newOffset;
+
+_another_round:
+
+            /* Literals */
+            litLength = FSE_decodeSymbol(&stateLL, &DStream);
+            if (litLength) prevOffset = offset;
+            if (litLength == MaxLL)
+            {
+                BYTE add = *dumps++;
+                if (add < 255) litLength += add;
+                else
+                {
+                    litLength = (*(U32*)dumps) & 0xFFFFFF;
+                    dumps += 3;
+                }
+            }
+            if (((size_t)(litPtr - op) < 8) || ((size_t)(oend-(litPtr+litLength)) < 8))
+                memmove(op, litPtr, litLength);   /* overwrite risk */
+            else
+                ZSTD_wildcopy(op, litPtr, litLength);
+            op += litLength;
+            litPtr += litLength;
+
+            /* Offset */
+            offsetCode = FSE_decodeSymbol(&stateOffb, &DStream);
+            if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
+            nbBits = offsetCode - 1;
+            if (offsetCode==0) nbBits = 0;   /* cmove */
+            newOffset = FSE_readBits(&DStream, nbBits);
+            if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
+            newOffset += (size_t)1 << nbBits;
+            if (offsetCode==0) newOffset = prevOffset;
+            match = op - newOffset;
+            prevOffset = offset;
+            offset = newOffset;
+
+            /* MatchLength */
+            matchLength = FSE_decodeSymbol(&stateML, &DStream);
+            if (matchLength == MaxML)
+            {
+                BYTE add = *dumps++;
+                if (add < 255) matchLength += add;
+                else
+                {
+                    matchLength = (*(U32*)dumps) & 0xFFFFFF;
+                    dumps += 3;
+                }
+            }
+            matchLength += MINMATCH;
+
+            /* copy Match */
+            {
+                BYTE* const endMatch = op + matchLength;
+                U64 saved[2];
+
+                if ((size_t)(litPtr - endMatch) < 12)
+                {
+                    qutt = endMatch + 12 - litPtr;
+                    if ((litPtr + qutt) > oend) qutt = oend-litPtr;
+                    memcpy(saved, litPtr, qutt);
+                }
+
+                if (offset < 8)
+                {
+                    const size_t dec64 = dec64table[offset];
+                    op[0] = match[0];
+                    op[1] = match[1];
+                    op[2] = match[2];
+                    op[3] = match[3];
+                    match += dec32table[offset];
+                    ZSTD_copy4(op+4, match);
+                    match -= dec64;
+                } else { ZSTD_copy8(op, match); }
+
+                if (endMatch > oend-12)
+                {
+                    if (op < oend-16)
+                    {
+                        ZSTD_wildcopy(op+8, match+8, (oend-8) - (op+8));
+                        match += (oend-8) - op;
+                        op = oend-8;
+                    }
+                    while (op<endMatch) *op++ = *match++;
+                }
+                else
+                    ZSTD_wildcopy(op+8, match+8, matchLength-8);   /* works even if matchLength < 8 */
+
+                op = endMatch;
+
+                if ((size_t)(litPtr - endMatch) < 12)
+                    memcpy((void*)litPtr, saved, qutt);
+            }
+        }
+
+        /* check if reached exact end */
+        if (FSE_reloadDStream(&DStream) > 2) return (size_t)-ZSTD_ERROR_GENERIC;   /* requested too much : data is corrupted */
+        if (!FSE_endOfDState(&stateLL) && !FSE_endOfDState(&stateML) && !FSE_endOfDState(&stateOffb)) goto _another_round;   /* some ultra-compressible sequence remain ! */
+        if (litPtr != litEnd) goto _another_round;   /* literals not entirely spent */
+
+        /* last literal segment */
+        if (op != litPtr) memmove(op, litPtr, lastLLSize);
+        op += lastLLSize;
+    }
+
+    return op-ostart;
+}
+
+
+static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    const BYTE* ip = src;
+    const BYTE* iend = ip + srcSize;
+    BYTE* const ostart = dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + maxDstSize;
+    size_t remainingSize = srcSize;
+    U32 magicNumber;
+    size_t errorCode=0;
+    blockProperties_t blockProperties;
+
+    /* Header */
+    if (srcSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+    magicNumber = ZSTD_readBE32(src);
+    if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
+    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
+
+    while (1)
+    {
+        size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
+        if (ZSTD_isError(blockSize))
+            return blockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSize -= ZSTD_blockHeaderSize;
+        if (ip+blockSize > iend)
+            return (size_t)-ZSTD_ERROR_wrongSrcSize;
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
+            break;
+        case bt_raw :
+            errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */
+            break;
+        case bt_end :
+            /* end of frame */
+            if (remainingSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+        if (blockSize == 0) break;   /* bt_end */
+
+        if (ZSTD_isError(errorCode)) return errorCode;
+        op += errorCode;
+        ip += blockSize;
+        remainingSize -= blockSize;
+    }
+
+    return op-ostart;
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
+    return ZSTD_decompressDCtx(ctx, dst, maxDstSize, src, srcSize);
+}
+
+
+/******************************
+*  Streaming Decompression API
+******************************/
+
+typedef struct
+{
+    U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
+    size_t expected;
+    blockType_t bType;
+    U32 started;
+} dctx_t;
+
+
+ZSTD_dctx_t ZSTD_createDCtx(void)
+{
+    dctx_t* dctx = malloc(sizeof(dctx_t));
+    dctx->expected = 4 + ZSTD_blockHeaderSize;   // Frame Header + Block Header
+    dctx->started = 0;
+    return (ZSTD_dctx_t)dctx;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx)
+{
+    free(dctx);
+    return 0;
+}
+
+
+size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
+{
+    return ((dctx_t*)dctx)->expected;
+}
+
+size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    dctx_t* ctx = (dctx_t*)dctx;
+    size_t cSize = srcSize - ZSTD_blockHeaderSize;
+    size_t rSize;
+
+    // Sanity check
+    if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_wrongSrcSize;
+
+    // Decompress
+    if (!ctx->started)
+    {
+        // Just check correct magic header
+        U32 magicNumber = ZSTD_readBE32(src);
+        if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
+        rSize = 0;
+    }
+    else
+    {
+        switch(ctx->bType)
+        {
+        case bt_compressed:
+            rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, cSize);
+            break;
+        case bt_raw :
+            rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, cSize);
+            break;
+        case bt_rle :
+            return (size_t)-ZSTD_ERROR_GENERIC;   /* not yet handled */
+            break;
+        case bt_end :
+            rSize = 0;
+            break;
+        default:
+            return (size_t)-ZSTD_ERROR_GENERIC;
+        }
+    }
+
+    // Prepare next block
+    {
+        const BYTE* header = src;
+        blockProperties_t bp;
+        size_t blockSize;
+        header += cSize;
+        blockSize = ZSTD_getcBlockSize(header, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(blockSize)) return blockSize;
+        if (bp.blockType == bt_end)
+        {
+            ctx->expected = 0;
+            ctx->started = 0;
+        }
+        else
+        {
+            ctx->expected = blockSize + ZSTD_blockHeaderSize;
+            ctx->bType = bp.blockType;
+            ctx->started = 1;
+        }
+    }
+
+    return rSize;
+}
+
+
diff --git a/lib/zstd.h b/lib/zstd.h
new file mode 100644
index 0000000..47ce21f
--- /dev/null
+++ b/lib/zstd.h
@@ -0,0 +1,93 @@
+/*
+    zstd - standard compression library
+    Header File
+    Copyright (C) 2014-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**************************************
+*  Includes
+**************************************/
+#include <stddef.h>   /* size_t */
+
+
+/**************************************
+*  Version
+**************************************/
+#define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
+#define ZSTD_VERSION_MINOR    0    /* for new (non-breaking) interface capabilities */
+#define ZSTD_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+unsigned ZSTD_versionNumber (void);
+
+
+/**************************************
+*  Simple one-step functions
+**************************************/
+size_t ZSTD_compress(   void* dst, size_t maxDstSize,
+                  const void* src, size_t srcSize);
+
+size_t ZSTD_decompress( void* dst, size_t maxOriginalSize,
+                  const void* src, size_t compressedSize);
+
+/*
+ZSTD_compress() :
+    Compresses 'srcSize' bytes from buffer 'src' into buffer 'dst', of maximum size 'dstSize'.
+    Destination buffer should be sized to handle worst cases situations (input data not compressible).
+    Worst case size evaluation is provided by function ZSTD_compressBound().
+    return : the number of bytes written into buffer 'dst'
+             or an error code if it fails (which can be tested using ZSTD_isError())
+
+ZSTD_decompress() :
+    compressedSize : is obviously the source size
+    maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
+                      It must be equal or larger than originalSize, otherwise decompression will fail.
+    return : the number of bytes decompressed into destination buffer (originalSize)
+             or an errorCode if it fails (which can be tested using ZSTD_isError())
+*/
+
+
+/**************************************
+*  Tool functions
+**************************************/
+size_t      ZSTD_compressBound(size_t srcSize);   /* maximum compressed size */
+
+/* Error Management */
+unsigned    ZSTD_isError(size_t code);         /* tells if a return value is an error code */
+const char* ZSTD_getErrorName(size_t code);    /* provides error code string (useful for debugging) */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lib/zstd_static.h b/lib/zstd_static.h
new file mode 100755
index 0000000..952a006
--- /dev/null
+++ b/lib/zstd_static.h
@@ -0,0 +1,80 @@
+/*

+    zstd - standard compression library

+    Header File for static linking only

+    Copyright (C) 2014-2015, Yann Collet.

+

+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)

+

+    Redistribution and use in source and binary forms, with or without

+    modification, are permitted provided that the following conditions are

+    met:

+    * Redistributions of source code must retain the above copyright

+    notice, this list of conditions and the following disclaimer.

+    * Redistributions in binary form must reproduce the above

+    copyright notice, this list of conditions and the following disclaimer

+    in the documentation and/or other materials provided with the

+    distribution.

+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+

+    You can contact the author at :

+    - zstd source repository : https://github.com/Cyan4973/zstd

+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c

+*/

+#pragma once

+

+#if defined (__cplusplus)

+extern "C" {

+#endif

+

+/**************************************

+*  Includes

+**************************************/

+#include "zstd.h"

+

+

+/**************************************

+*  Streaming functions

+**************************************/

+typedef void* ZSTD_cctx_t;

+ZSTD_cctx_t ZSTD_createCCtx(void);

+size_t      ZSTD_freeCCtx(ZSTD_cctx_t cctx);

+

+size_t ZSTD_compressBegin(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize);

+size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);

+size_t ZSTD_compressEnd(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize);

+

+typedef void* ZSTD_dctx_t;

+ZSTD_dctx_t ZSTD_createDCtx(void);

+size_t      ZSTD_freeDCtx(ZSTD_dctx_t dctx);

+

+size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx);

+size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);

+

+

+/**************************************

+*  Error management

+**************************************/

+#define ZSTD_LIST_ERRORS(ITEM) \

+        ITEM(ZSTD_OK_NoError) ITEM(ZSTD_ERROR_GENERIC) \

+        ITEM(ZSTD_ERROR_wrongMagicNumber) \

+        ITEM(ZSTD_ERROR_wrongSrcSize) ITEM(ZSTD_ERROR_maxDstSize_tooSmall) \

+        ITEM(ZSTD_ERROR_wrongLBlockSize) \

+        ITEM(ZSTD_ERROR_maxCode)

+

+#define ZSTD_GENERATE_ENUM(ENUM) ENUM,

+typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes;   /* exposed list of errors; static linking only */

+

+

+#if defined (__cplusplus)

+}

+#endif
\ No newline at end of file
diff --git a/programs/COPYING b/programs/COPYING
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/programs/COPYING
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/programs/Makefile b/programs/Makefile
new file mode 100644
index 0000000..8ab2706
--- /dev/null
+++ b/programs/Makefile
@@ -0,0 +1,151 @@
+# ##########################################################################
+# ZSTD programs - Makefile
+# Copyright (C) Yann Collet 2015
+#
+# GPL v2 License
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# You can contact the author at :
+#  - ZSTD source repository : http://code.google.com/p/zstd/
+#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+# ##########################################################################
+# zstd : Command Line Utility, supporting gzip-like arguments
+# datagen : Synthetic and parametrable data generator, for tests
+# fuzzer  : Test tool, to check zstd integrity on target platform
+# fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode
+# fullbench  : Precisely measure speed for each zstd inner function
+# fullbench32: Same as fullbench, but forced to compile in 32-bits mode
+# ##########################################################################
+
+RELEASE?= r0
+
+DESTDIR?=
+PREFIX ?= /usr
+CFLAGS ?= -O3
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -DZSTD_VERSION=\"$(RELEASE)\"
+FLAGS   = -I../lib $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+BINDIR=$(PREFIX)/bin
+MANDIR=$(PREFIX)/share/man/man1
+ZSTDDIR=../lib
+
+TEST_FILES = COPYING
+TEST_TARGETS=test-native
+
+
+# Define *.exe as extension for Windows systems
+ifneq (,$(filter Windows%,$(OS)))
+EXT =.exe
+VOID = nul
+else
+EXT =
+VOID = /dev/null
+endif
+
+
+default: zstd
+
+all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32
+
+zstd: $(ZSTDDIR)/zstd.c xxhash.c bench.c fileio.c zstdcli.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+zstd32: $(ZSTDDIR)/zstd.c xxhash.c bench.c fileio.c zstdcli.c
+	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
+fullbench  : $(ZSTDDIR)/zstd.c fullbench.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+fullbench32: $(ZSTDDIR)/zstd.c fullbench.c
+	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
+fuzzer  : $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+fuzzer32: $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
+	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+
+datagen : datagen.c
+	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+
+clean:
+	@rm -f core *.o tmp \
+        zstd$(EXT) zstd32$(EXT) \
+        fullbench$(EXT) fullbench32$(EXT) \
+        fuzzer$(EXT) fuzzer32$(EXT) \
+	datagen$(EXT)
+	@echo Cleaning completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+install: zstd
+	@echo Installing binaries
+	@install -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/
+	@install -m 755 zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT)
+	@ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat
+	@echo Installing man pages
+	@install -m 644 zstd.1 $(DESTDIR)$(MANDIR)/zstd.1
+	@install -m 644 zstdcat.1 $(DESTDIR)$(MANDIR)/zstdcat.1
+	@echo zstd installation completed
+
+uninstall:
+	rm -f $(DESTDIR)$(BINDIR)/zstdcat
+	[ -x $(DESTDIR)$(BINDIR)/zstd$(EXT) ] && rm -f $(DESTDIR)$(BINDIR)/zstd$(EXT)
+	[ -f $(DESTDIR)$(MANDIR)/zstd.1 ] && rm -f $(DESTDIR)$(MANDIR)/zstd.1
+	[ -f $(DESTDIR)$(MANDIR)/zstdcat.1 ] && rm -f $(DESTDIR)$(MANDIR)/zstdcat.1
+	@echo zstd programs successfully uninstalled
+
+test: test-zstd test-fullbench test-fuzzer test-mem
+
+test32: test-zstd32 test-fullbench32 test-fuzzer32
+
+test-all: test test32
+
+test-zstd: zstd datagen
+	./datagen         | ./zstd -v    | ./zstd -d > $(VOID)
+	./datagen -g256MB | ./zstd -v    | ./zstd -d > $(VOID)
+	./datagen -g6GB   | ./zstd -vq   | ./zstd -d > $(VOID)
+
+test-zstd32: zstd32 datagen
+	./datagen         | ./zstd32 -v  | ./zstd32 -d > $(VOID)
+	./datagen -g256MB | ./zstd32 -v  | ./zstd32 -d > $(VOID)
+	./datagen -g6GB   | ./zstd32 -vq | ./zstd32 -d > $(VOID)
+
+test-fullbench: fullbench
+	./fullbench -i1
+
+test-fullbench32: fullbench32
+	./fullbench32 -i1
+
+test-fuzzer: fuzzer
+	./fuzzer
+
+test-fuzzer32: fuzzer32
+	./fuzzer32
+
+test-mem: zstd datagen fuzzer fullbench
+	./datagen -g16KB > tmp
+	valgrind --leak-check=yes ./zstd -vf tmp /dev/null
+	./datagen -g128MB > tmp
+	valgrind --leak-check=yes ./zstd -vf tmp /dev/null
+	rm tmp
+	valgrind --leak-check=yes ./fuzzer -i128 -t1
+	valgrind --leak-check=yes ./fullbench -i1
+
+endif
diff --git a/programs/bench.c b/programs/bench.c
new file mode 100755
index 0000000..85f4993
--- /dev/null
+++ b/programs/bench.c
@@ -0,0 +1,494 @@
+/*
+    bench.c - Demo module to benchmark open-source compression algorithms
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/***************************************
+*  Compiler Options
+***************************************/
+/* Disable some Visual warning messages */
+#define _CRT_SECURE_NO_WARNINGS                  /* fopen */
+
+// Unix Large Files support (>4GB)
+#define _FILE_OFFSET_BITS 64
+#if (defined(__sun__) && (!defined(__LP64__)))   // Sun Solaris 32-bits requires specific definitions
+#  define _LARGEFILE_SOURCE
+#elif ! defined(__LP64__)                        // No point defining Large file for 64 bit
+#  define _LARGEFILE64_SOURCE
+#endif
+
+// S_ISREG & gettimeofday() are not supported by MSVC
+#if defined(_MSC_VER) || defined(_WIN32)
+#  define BMK_LEGACY_TIMER 1
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>      /* malloc, free */
+#include <string.h>      /* memset */
+#include <stdio.h>       // fprintf, fopen, ftello64
+#include <sys/types.h>   // stat64
+#include <sys/stat.h>    // stat64
+
+// Use ftime() if gettimeofday() is not available on your target
+#if defined(BMK_LEGACY_TIMER)
+#  include <sys/timeb.h>   // timeb, ftime
+#else
+#  include <sys/time.h>    // gettimeofday
+#endif
+
+#include "zstd.h"
+#include "xxhash.h"
+
+
+/**************************************
+*  Compiler specifics
+**************************************/
+#if !defined(S_ISREG)
+#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/**************************************
+* Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+  typedef uint8_t  BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define NBLOOPS    3
+#define TIMELOOP   2500
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAX_MEM             (2 GB - 64 MB)
+#define DEFAULT_CHUNKSIZE   (4 MB)
+
+static U32 g_compressibilityDefault = 50;
+static U32 prime1 = 2654435761U;
+static U32 prime2 = 2246822519U;
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+
+
+/**************************************
+*  Benchmark Parameters
+**************************************/
+static int nbIterations = NBLOOPS;
+
+void BMK_SetNbIterations(int nbLoops)
+{
+    nbIterations = nbLoops;
+    DISPLAY("- %i iterations -\n", nbIterations);
+}
+
+
+/*********************************************************
+*  Private functions
+*********************************************************/
+
+#if defined(BMK_LEGACY_TIMER)
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on Legacy ftime()
+  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeb tb;
+  int nCount;
+  ftime( &tb );
+  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
+  return nCount;
+}
+
+#else
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on newer gettimeofday()
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeval tv;
+  int nCount;
+  gettimeofday(&tv, NULL);
+  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
+  return nCount;
+}
+
+#endif
+
+
+static int BMK_GetMilliSpan( int nTimeStart )
+{
+  int nSpan = BMK_GetMilliStart() - nTimeStart;
+  if ( nSpan < 0 )
+    nSpan += 0x100000 * 1000;
+  return nSpan;
+}
+
+
+
+/*********************************************************
+*  Data generator
+*********************************************************/
+/* will hopefully be converted into ROL instruction by compiler */
+static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
+
+static U32 BMK_rand(U32* src)
+{
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 += prime2;
+    rand32 = BMK_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 9;
+}
+
+
+#define BMK_RAND15BITS  ( BMK_rand(&seed) & 0x7FFF)
+#define BMK_RANDLENGTH  ((BMK_rand(&seed) & 3) ? (BMK_rand(&seed) % 15) : (BMK_rand(&seed) % 510) + 15)
+#define BMK_RANDCHAR    (BYTE)((BMK_rand(&seed) & 63) + '0')
+static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
+{
+    BYTE* BBuffer = (BYTE*)buffer;
+    unsigned pos = 0;
+    U32 P32 = (U32)(32768 * proba);
+
+    /* First Byte */
+    BBuffer[pos++] = BMK_RANDCHAR;
+
+    while (pos < bufferSize)
+    {
+        /* Select : Literal (noise) or copy (within 64K) */
+        if (BMK_RAND15BITS < P32)
+        {
+            /* Match */
+            size_t match, end;
+            unsigned length = BMK_RANDLENGTH + 4;
+            unsigned offset = BMK_RAND15BITS + 1;
+            if (offset > pos) offset = pos;
+            match = pos - offset;
+            end = pos + length;
+            if (end > bufferSize) end = bufferSize;
+            while (pos < end) BBuffer[pos++] = BBuffer[match++];
+        }
+        else
+        {
+            /* Literal */
+            size_t end;
+            unsigned length = BMK_RANDLENGTH;
+            end = pos + length;
+            if (end > bufferSize) end = bufferSize;
+            while (pos < end) BBuffer[pos++] = BMK_RANDCHAR;
+        }
+    }
+}
+
+
+/*********************************************************
+*  Bench functions
+*********************************************************/
+
+static int BMK_benchMem(void* srcBuffer, size_t srcSize, char* fileName, int cLevel)
+{
+    size_t maxCompressedSize = ZSTD_compressBound(srcSize);
+    void* compressedBuffer = malloc(maxCompressedSize);
+    void* resultBuffer = malloc(srcSize);
+    U64 crcOrig;
+
+    /* Init */
+    (void)cLevel;
+
+    /* Memory allocation & restrictions */
+    if (!compressedBuffer || !resultBuffer)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(compressedBuffer);
+        free(resultBuffer);
+        return 12;
+    }
+
+    /* Calculating input Checksum */
+    crcOrig = XXH64(srcBuffer, srcSize, 0);
+
+    /* warmimg up memory */
+    BMK_datagen(compressedBuffer, maxCompressedSize, 0.10, 1);   /* warmimg up memory */
+
+    /* Bench */
+    {
+        int loopNb;
+        size_t cSize = 0;
+        double fastestC = 100000000., fastestD = 100000000.;
+        double ratio = 0.;
+        U64 crcCheck = 0;
+
+        DISPLAY("\r%79s\r", "");
+        for (loopNb = 1; loopNb <= nbIterations; loopNb++)
+        {
+            int nbLoops;
+            int milliTime;
+
+            /* Compression */
+            DISPLAY("%1i-%-14.14s : %9u ->\r", loopNb, fileName, (U32)srcSize);
+            memset(compressedBuffer, 0xE5, maxCompressedSize);
+
+            nbLoops = 0;
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliStart() == milliTime);
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
+            {
+                cSize = ZSTD_compress(compressedBuffer, maxCompressedSize, srcBuffer, srcSize);
+                nbLoops++;
+            }
+            milliTime = BMK_GetMilliSpan(milliTime);
+
+            if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops;
+            ratio = (double)cSize / (double)srcSize*100.;
+            DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000.);
+
+#if 1
+            /* Decompression */
+            memset(resultBuffer, 0xD6, srcSize);
+
+            nbLoops = 0;
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliStart() == milliTime);
+            milliTime = BMK_GetMilliStart();
+            while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
+            {
+                ZSTD_decompress(resultBuffer, srcSize, compressedBuffer, cSize);
+                nbLoops++;
+            }
+            milliTime = BMK_GetMilliSpan(milliTime);
+
+            if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops;
+            DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
+#endif
+
+            /* CRC Checking */
+            crcCheck = XXH64(resultBuffer, srcSize, 0);
+            if (crcOrig!=crcCheck)
+            {
+                unsigned i = 0;
+                DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", fileName, (unsigned)crcOrig, (unsigned)crcCheck);
+                while (i<srcSize)
+                {
+                    if (((BYTE*)srcBuffer)[i] != ((BYTE*)resultBuffer)[i])
+                    {
+                        printf("\nDecoding error at pos %u   \n", i);
+                        break;
+                    }
+                    i++;
+                }
+                break;
+            }
+        }
+
+        if (crcOrig == crcCheck)
+        {
+            if (ratio<100.)
+                DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
+            else
+                DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
+        }
+    }
+
+    /* End cleaning */
+    free(compressedBuffer);
+    free(resultBuffer);
+    return 0;
+}
+
+
+static U64 BMK_GetFileSize(char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+#endif
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+    return (U64)statbuf.st_size;
+}
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+    size_t step = 64 MB;
+    BYTE* testmem = NULL;
+
+    requiredMem = (((requiredMem >> 26) + 1) << 26);
+    requiredMem += 2 * step;
+    if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
+
+    while (!testmem)
+    {
+        requiredMem -= step;
+        testmem = (BYTE*)malloc((size_t)requiredMem);
+    }
+
+    free(testmem);
+    return (size_t)(requiredMem - step);
+}
+
+static int BMK_benchOneFile(char* inFileName, int cLevel)
+{
+    FILE*  inFile;
+    U64    inFileSize;
+    size_t benchedSize, readSize;
+    void* srcBuffer;
+    int result;
+
+    /* Init */
+    (void)cLevel;
+
+    // Check file existence
+    inFile = fopen(inFileName, "rb");
+    if (inFile == NULL)
+    {
+        DISPLAY("Pb opening %s\n", inFileName);
+        return 11;
+    }
+
+    // Memory allocation & restrictions
+    inFileSize = BMK_GetFileSize(inFileName);
+    benchedSize = BMK_findMaxMem(inFileSize * 3) / 3;
+    if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+    if (benchedSize < inFileSize)
+        DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize >> 20));
+
+    // Alloc
+    srcBuffer = malloc(benchedSize);
+
+    if (!srcBuffer)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(srcBuffer);
+        fclose(inFile);
+        return 12;
+    }
+
+    // Fill input buffer
+    DISPLAY("Loading %s...       \r", inFileName);
+    readSize = fread(srcBuffer, 1, benchedSize, inFile);
+    fclose(inFile);
+
+    if (readSize != benchedSize)
+    {
+        DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+        free(srcBuffer);
+        return 13;
+    }
+
+    // Bench
+    result = BMK_benchMem(srcBuffer, benchedSize, inFileName, cLevel);
+
+    // End
+    free(srcBuffer);
+    DISPLAY("\n");
+    return result;
+}
+
+
+static int BMK_syntheticTest(int cLevel, double compressibility)
+{
+    size_t benchedSize = 10000000;
+    void* srcBuffer = malloc(benchedSize);
+    int result;
+    char name[20] = {0};
+
+    /* Init */
+    (void)cLevel;
+
+    /* Memory allocation */
+    if (!srcBuffer)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(srcBuffer);
+        return 12;
+    }
+
+    /* Fill input buffer */
+    BMK_datagen(srcBuffer, benchedSize, compressibility, 0);
+
+    /* Bench */
+#ifdef _MSC_VER
+    sprintf_s(name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100));
+#else
+    snprintf (name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100));
+#endif
+    result = BMK_benchMem(srcBuffer, benchedSize, name, cLevel);
+
+    /* End */
+    free(srcBuffer);
+    DISPLAY("\n");
+    return result;
+}
+
+
+int BMK_bench(char** fileNamesTable, unsigned nbFiles, unsigned cLevel)
+{
+    double compressibility = (double)g_compressibilityDefault / 100;
+
+    if (nbFiles == 0)
+    {
+        BMK_syntheticTest(cLevel, compressibility);
+    }
+    else
+    {
+        /* Loop for each file */
+        unsigned fileIdx = 0;
+        while (fileIdx<nbFiles)
+        {
+            BMK_benchOneFile(fileNamesTable[fileIdx], cLevel);
+            fileIdx++;
+        }
+    }
+    return 0;
+}
+
diff --git a/programs/bench.h b/programs/bench.h
new file mode 100755
index 0000000..e07f37f
--- /dev/null
+++ b/programs/bench.h
@@ -0,0 +1,42 @@
+/*
+    bench.h - Demo program to benchmark open-source compression algorithm
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - LZ4 source repository : http://code.google.com/p/lz4/
+    - LZ4 public forum : https://group.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* Main function */
+int BMK_bench(char** fileNamesTable, unsigned nbFiles, unsigned cLevel);
+
+/* Set Parameters */
+void BMK_SetNbIterations(int nbLoops);
+
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/programs/datagen.c b/programs/datagen.c
new file mode 100644
index 0000000..4956499
--- /dev/null
+++ b/programs/datagen.c
@@ -0,0 +1,320 @@
+/*
+    datagen.c - compressible data generator test tool
+    Copyright (C) Yann Collet 2012-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+   - ZSTD source repository : https://github.com/Cyan4973/zstd
+   - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Remove Visual warning messages
+**************************************/
+#define _CRT_SECURE_NO_WARNINGS   /* fgets */
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>    /* malloc */
+#include <stdio.h>     /* fgets, sscanf */
+#include <string.h>    /* strcmp */
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION "r0"
+#endif
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define CDG_SIZE_DEFAULT (64 KB)
+#define CDG_SEED_DEFAULT 0
+#define CDG_COMPRESSIBILITY_DEFAULT 50
+#define PRIME1   2654435761U
+#define PRIME2   2246822519U
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+
+
+/**************************************
+*  Local Parameters
+**************************************/
+static unsigned no_prompt = 0;
+static char*    programName;
+static unsigned displayLevel = 2;
+
+
+/*********************************************************
+*  Local Functions
+*********************************************************/
+#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static unsigned int CDG_rand(U32* src)
+{
+    U32 rand32 = *src;
+    rand32 *= PRIME1;
+    rand32 += PRIME2;
+    rand32  = CDG_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32;
+}
+
+
+#define LTSIZE 8192
+#define LTMASK (LTSIZE-1)
+static const char firstChar = '(';
+static const char lastChar = '}';
+static void* CDG_createLiteralDistrib(double ld)
+{
+    char* lt = malloc(LTSIZE);
+    U32 i = 0;
+    char character = '0';
+
+    while (i<LTSIZE)
+    {
+        U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1;
+        U32 end;
+        if (weight + i > LTSIZE) weight = LTSIZE-i;
+        end = i + weight;
+        while (i < end) lt[i++] = character;
+        character++;
+        if (character > lastChar) character = firstChar;
+    }
+    return lt;
+}
+
+static char CDG_genChar(U32* seed, const void* ltctx)
+{
+    const BYTE* lt = ltctx;
+    U32 id = CDG_rand(seed) & LTMASK;
+    return lt[id];
+}
+
+#define CDG_RAND15BITS  ((CDG_rand(seed) >> 3) & 32767)
+#define CDG_RANDLENGTH  ( ((CDG_rand(seed) >> 7) & 7) ? (CDG_rand(seed) & 15) : (CDG_rand(seed) & 511) + 15)
+#define CDG_DICTSIZE    (32 KB)
+static void CDG_generate(U64 size, U32* seed, double matchProba)
+{
+    BYTE fullbuff[CDG_DICTSIZE + 128 KB + 1];
+    BYTE* buff = fullbuff + CDG_DICTSIZE;
+    U64 total=0;
+    U32 P32 = (U32)(32768 * matchProba);
+    U32 pos=1;
+    U32 genBlockSize = 128 KB;
+    double literalDistrib = 0.13;
+    void* ldctx = CDG_createLiteralDistrib(literalDistrib);
+
+    /* Build initial prefix */
+    fullbuff[0] = CDG_genChar(seed, ldctx);
+    while (pos<32 KB)
+    {
+        /* Select : Literal (char) or Match (within 32K) */
+        if (CDG_RAND15BITS < P32)
+        {
+            /* Copy (within 64K) */
+            U32 d;
+            int ref;
+            int length = CDG_RANDLENGTH + 4;
+            U32 offset = CDG_RAND15BITS + 1;
+            if (offset > pos) offset = pos;
+            ref = pos - offset;
+            d = pos + length;
+            while (pos < d) fullbuff[pos++] = fullbuff[ref++];
+        }
+        else
+        {
+            /* Literal (noise) */
+            U32 d = pos + CDG_RANDLENGTH;
+            while (pos < d) fullbuff[pos++] = CDG_genChar(seed, ldctx);
+        }
+    }
+
+    /* Generate compressible data */
+    pos = 0;
+    while (total < size)
+    {
+        if (size-total < 128 KB) genBlockSize = (U32)(size-total);
+        total += genBlockSize;
+        buff[genBlockSize] = 0;
+        pos = 0;
+        while (pos<genBlockSize)
+        {
+            /* Select : Literal (char) or Match (within 32K) */
+            if (CDG_RAND15BITS < P32)
+            {
+                /* Copy (within 64K) */
+                int ref;
+                U32 d;
+                int length = CDG_RANDLENGTH + 4;
+                U32 offset = CDG_RAND15BITS + 1;
+                if (pos + length > genBlockSize ) length = genBlockSize - pos;
+                ref = pos - offset;
+                d = pos + length;
+                while (pos < d) buff[pos++] = buff[ref++];
+            }
+            else
+            {
+                /* Literal (noise) */
+                U32 d;
+                int length = CDG_RANDLENGTH;
+                if (pos + length > genBlockSize) length = genBlockSize - pos;
+                d = pos + length;
+                while (pos < d) buff[pos++] = CDG_genChar(seed, ldctx);
+            }
+        }
+
+        /* output datagen */
+        pos=0;
+        for (;pos+512<=genBlockSize;pos+=512)
+            printf("%512.512s", buff+pos);
+        for (;pos<genBlockSize;pos++) printf("%c", buff[pos]);
+        /* Regenerate prefix */
+        memcpy(fullbuff, buff + 96 KB, 32 KB);
+    }
+}
+
+
+/*********************************************************
+*  Command line
+*********************************************************/
+static int CDG_usage(void)
+{
+    DISPLAY( "Compressible data generator\n");
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [size] [args]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -g#    : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
+    DISPLAY( " -s#    : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
+    DISPLAY( " -p#    : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+    int argNb;
+    int proba = CDG_COMPRESSIBILITY_DEFAULT;
+    U64 size = CDG_SIZE_DEFAULT;
+    U32 seed = CDG_SEED_DEFAULT;
+
+    /* Check command line */
+    programName = argv[0];
+    for(argNb=1; argNb<argc; argNb++)
+    {
+        char* argument = argv[argNb];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Handle commands. Aggregated commands are allowed */
+        if (*argument=='-')
+        {
+            if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
+
+            argument++;
+            while (*argument!=0)
+            {
+                switch(*argument)
+                {
+                case 'h':
+                    return CDG_usage();
+                case 'g':
+                    argument++;
+                    size=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        size *= 10;
+                        size += *argument - '0';
+                        argument++;
+                    }
+                    if (*argument=='K') { size <<= 10; argument++; }
+                    if (*argument=='M') { size <<= 20; argument++; }
+                    if (*argument=='G') { size <<= 30; argument++; }
+                    if (*argument=='B') { argument++; }
+                    break;
+                case 's':
+                    argument++;
+                    seed=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        seed *= 10;
+                        seed += *argument - '0';
+                        argument++;
+                    }
+                    break;
+                case 'p':
+                    argument++;
+                    proba=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        proba *= 10;
+                        proba += *argument - '0';
+                        argument++;
+                    }
+                    if (proba<0) proba=0;
+                    if (proba>100) proba=100;
+                    break;
+                case 'v':
+                    displayLevel = 4;
+                    argument++;
+                    break;
+                default: ;
+                }
+            }
+
+        }
+    }
+
+    DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION);
+    DISPLAYLEVEL(3, "Seed = %u \n", seed);
+    if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba);
+
+    CDG_generate(size, &seed, ((double)proba) / 100);
+
+    return 0;
+}
diff --git a/programs/fileio.c b/programs/fileio.c
new file mode 100644
index 0000000..83216e8
--- /dev/null
+++ b/programs/fileio.c
@@ -0,0 +1,389 @@
+/*
+  fileio.c - File i/o handler
+  Copyright (C) Yann Collet 2013-2015
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd source repository : https://github.com/Cyan4973/zstd
+  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is stand-alone program.
+  It is not part of ZSTD compression library, it is a user program of ZSTD library.
+  The license of ZSTD library is BSD.
+  The license of this file is GPLv2.
+*/
+
+/**************************************
+*  Compiler Options
+**************************************/
+/* Disable some Visual warning messages */
+#ifdef _MSC_VER
+#  define _CRT_SECURE_NO_WARNINGS
+#  define _CRT_SECURE_NO_DEPRECATE     /* VS2005 */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#define _FILE_OFFSET_BITS 64   /* Large file support on 32-bits unix */
+#define _POSIX_SOURCE 1        /* enable fileno() within <stdio.h> on unix */
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdio.h>    /* fprintf, fopen, fread, _fileno, stdin, stdout */
+#include <stdlib.h>   /* malloc, free */
+#include <string.h>   /* strcmp, strlen */
+#include <time.h>     /* clock */
+#include "fileio.h"
+#include "zstd_static.h"
+
+
+/**************************************
+*  OS-specific Includes
+**************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#  include <fcntl.h>    /* _O_BINARY */
+#  include <io.h>       /* _setmode, _isatty */
+#  ifdef __MINGW32__
+   int _fileno(FILE *stream);   /* MINGW somehow forgets to include this windows declaration into <stdio.h> */
+#  endif
+#  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
+#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#else
+#  include <unistd.h>   /* isatty */
+#  define SET_BINARY_MODE(file)
+#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#endif
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef uint8_t  BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _6BITS 0x3F
+#define _8BITS 0xFF
+
+#define BIT6  0x40
+#define BIT7  0x80
+
+static const unsigned FIO_magicNumber = 0x183E2308;
+static const unsigned FIO_maxBlockSizeID = 0xB;   /* => 2MB block */
+static const unsigned FIO_blockHeaderSize = 3;
+
+#define FIO_FRAMEHEADERSIZE 5        /* as a define, because needed to allocated table on stack */
+#define FSE_CHECKSUM_SEED        0
+
+#define CACHELINE 64
+
+
+/**************************************
+*  Complex types
+**************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_crc } bType_t;
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
+            { g_time = clock(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const unsigned refreshRate = 150;
+static clock_t g_time = 0;
+
+
+/**************************************
+*  Local Parameters
+**************************************/
+static U32 g_overwrite = 0;
+
+void FIO_overwriteMode(void) { g_overwrite=1; }
+void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
+
+
+/**************************************
+*  Exceptions
+**************************************/
+#define DEBUG 0
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/**************************************
+*  Functions
+**************************************/
+static unsigned FIO_GetMilliSpan(clock_t nPrevious)
+{
+    clock_t nCurrent = clock();
+    unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
+    return nSpan;
+}
+
+
+static void FIO_getFileHandles(FILE** pfinput, FILE** pfoutput, const char* input_filename, const char* output_filename)
+{
+    if (!strcmp (input_filename, stdinmark))
+    {
+        DISPLAYLEVEL(4,"Using stdin for input\n");
+        *pfinput = stdin;
+        SET_BINARY_MODE(stdin);
+    }
+    else
+    {
+        *pfinput = fopen(input_filename, "rb");
+    }
+
+    if (!strcmp (output_filename, stdoutmark))
+    {
+        DISPLAYLEVEL(4,"Using stdout for output\n");
+        *pfoutput = stdout;
+        SET_BINARY_MODE(stdout);
+    }
+    else
+    {
+        /* Check if destination file already exists */
+        *pfoutput=0;
+        if (strcmp(output_filename,nulmark)) *pfoutput = fopen( output_filename, "rb" );
+        if (*pfoutput!=0)
+        {
+            fclose(*pfoutput);
+            if (!g_overwrite)
+            {
+                char ch;
+                if (g_displayLevel <= 1)   /* No interaction possible */
+                    EXM_THROW(11, "Operation aborted : %s already exists", output_filename);
+                DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename);
+                DISPLAYLEVEL(2, "Overwrite ? (Y/N) : ");
+                ch = (char)getchar();
+                if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename);
+            }
+        }
+        *pfoutput = fopen( output_filename, "wb" );
+    }
+
+    if ( *pfinput==0 ) EXM_THROW(12, "Pb opening %s", input_filename);
+    if ( *pfoutput==0) EXM_THROW(13, "Pb opening %s", output_filename);
+}
+
+
+unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename)
+{
+    U64 filesize = 0;
+    U64 compressedfilesize = 0;
+    BYTE* inBuff;
+    BYTE* inSlot;
+    BYTE* inEnd;
+    BYTE* outBuff;
+    size_t blockSize = 128 KB;
+    size_t inBuffSize = 4 * blockSize;
+    size_t outBuffSize = ZSTD_compressBound(blockSize);
+    FILE* finput;
+    FILE* foutput;
+    size_t sizeCheck, cSize;
+    ZSTD_cctx_t ctx;
+
+
+    /* Init */
+    FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
+    ctx = ZSTD_createCCtx();
+
+    /* Allocate Memory */
+    inBuff  = malloc(inBuffSize);
+    outBuff = malloc(outBuffSize);
+    if (!inBuff || !outBuff) EXM_THROW(21, "Allocation error : not enough memory");
+    inSlot = inBuff;
+    inEnd = inBuff + inBuffSize;
+
+    /* Write Frame Header */
+    cSize = ZSTD_compressBegin(ctx, outBuff, outBuffSize);
+    if (ZSTD_isError(cSize)) EXM_THROW(22, "Compression error : cannot create frame header");
+
+    sizeCheck = fwrite(outBuff, 1, cSize, foutput);
+    if (sizeCheck!=cSize) EXM_THROW(23, "Write error : cannot write header");
+    compressedfilesize += cSize;
+
+    /* Main compression loop */
+    while (1)
+    {
+        size_t inSize;
+
+        /* Fill input Buffer */
+        if (inSlot + blockSize > inEnd) inSlot = inBuff;
+        inSize = fread(inSlot, (size_t)1, blockSize, finput);
+        if (inSize==0) break;
+        filesize += inSize;
+        DISPLAYUPDATE(2, "\rRead : %u MB   ", (U32)(filesize>>20));
+
+        /* Compress Block */
+        cSize = ZSTD_compressContinue(ctx, outBuff, outBuffSize, inSlot, inSize);
+        if (ZSTD_isError(cSize))
+            EXM_THROW(24, "Compression error : %s ", ZSTD_getErrorName(cSize));
+
+        /* Write cBlock */
+        sizeCheck = fwrite(outBuff, 1, cSize, foutput);
+        if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block");
+        compressedfilesize += cSize;
+        inSlot += inSize;
+
+        DISPLAYUPDATE(2, "\rRead : %u MB  ==> %.2f%%   ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
+    }
+
+    /* End of Frame */
+    cSize = ZSTD_compressEnd(ctx, outBuff, outBuffSize);
+    if (ZSTD_isError(cSize)) EXM_THROW(26, "Compression error : cannot create frame end");
+
+    sizeCheck = fwrite(outBuff, 1, cSize, foutput);
+    if (sizeCheck!=cSize) EXM_THROW(27, "Write error : cannot write frame end");
+    compressedfilesize += cSize;
+
+    /* Status */
+    DISPLAYLEVEL(2, "\r%79s\r", "");
+    DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
+        (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
+
+    /* clean */
+    free(inBuff);
+    free(outBuff);
+    fclose(finput);
+    fclose(foutput);
+    ZSTD_freeCCtx(ctx);
+
+    return compressedfilesize;
+}
+
+
+#define MAXHEADERSIZE FIO_FRAMEHEADERSIZE+3
+unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename)
+{
+    FILE* finput, *foutput;
+    BYTE* inBuff;
+    size_t inBuffSize;
+    BYTE* outBuff, *op, *oend;
+    size_t outBuffSize;
+    U32   blockSize = 128 KB;
+    U32   wNbBlocks = 4;
+    U64   filesize = 0;
+    BYTE* header[MAXHEADERSIZE];
+    ZSTD_cctx_t dctx;
+    size_t toRead;
+    size_t sizeCheck;
+
+
+    /* Init */
+    FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
+    dctx = ZSTD_createDCtx();
+    toRead = ZSTD_getNextcBlockSize(dctx);
+    if (toRead > MAXHEADERSIZE) EXM_THROW(30, "Not enough memory to read header");
+
+    /* check header */
+    sizeCheck = fread(header, (size_t)1, toRead, finput);
+    if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header");
+    sizeCheck = ZSTD_decompressContinue(dctx, NULL, 0, header, toRead);   // Decode frame header
+    if (ZSTD_isError(sizeCheck)) EXM_THROW(32, "Error decoding header");
+
+    /* Here later : blockSize determination */
+
+    /* Allocate Memory */
+    inBuffSize = blockSize + FIO_blockHeaderSize;
+    inBuff  = malloc(inBuffSize);
+    outBuffSize = wNbBlocks * blockSize;
+    outBuff = malloc(outBuffSize);
+    op = outBuff;
+    oend = outBuff + outBuffSize;
+    if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory");
+
+    /* Main decompression Loop */
+    toRead = ZSTD_getNextcBlockSize(dctx);
+    while (toRead)
+    {
+        size_t readSize, decodedSize;
+
+        /* Fill input buffer */
+        readSize = fread(inBuff, 1, toRead, finput);
+        if (readSize != toRead)
+            EXM_THROW(34, "Read error");
+
+        /* Decode block */
+        decodedSize = ZSTD_decompressContinue(dctx, op, oend-op, inBuff, readSize);
+
+        /* Write block */
+        sizeCheck = fwrite(op, 1, decodedSize, foutput);
+        if (sizeCheck != decodedSize) EXM_THROW(35, "Write error : unable to write data block to destination file");
+        filesize += decodedSize;
+
+        /* prepare for next Block */
+        op += decodedSize;
+        if (op==oend) op = outBuff;
+        toRead = ZSTD_getNextcBlockSize(dctx);
+        DISPLAYUPDATE(2, "\rDecoded : %u MB...     ", (U32)(filesize>>20) );
+    }
+
+    DISPLAYLEVEL(2, "\r%79s\r", "");
+    DISPLAYLEVEL(2,"Decoded %llu bytes   \n", (long long unsigned)filesize);
+
+    /* clean */
+    free(inBuff);
+    free(outBuff);
+    fclose(finput);
+    fclose(foutput);
+    ZSTD_freeDCtx(dctx);
+
+    return filesize;
+}
+
diff --git a/programs/fileio.h b/programs/fileio.h
new file mode 100755
index 0000000..a449f6f
--- /dev/null
+++ b/programs/fileio.h
@@ -0,0 +1,69 @@
+/*

+  fileio.h - file i/o handler

+  Copyright (C) Yann Collet 2013-2015

+

+  GPL v2 License

+

+  This program is free software; you can redistribute it and/or modify

+  it under the terms of the GNU General Public License as published by

+  the Free Software Foundation; either version 2 of the License, or

+  (at your option) any later version.

+

+  This program is distributed in the hope that it will be useful,

+  but WITHOUT ANY WARRANTY; without even the implied warranty of

+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

+  GNU General Public License for more details.

+

+  You should have received a copy of the GNU General Public License along

+  with this program; if not, write to the Free Software Foundation, Inc.,

+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

+

+  You can contact the author at :

+  - ZSTD source repository : https://github.com/Cyan4973/zstd

+  - Public forum : https://groups.google.com/forum/#!forum/lz4c

+*/

+#pragma once

+

+#if defined (__cplusplus)

+extern "C" {

+#endif

+

+

+/**************************************

+*  Special i/o constants

+**************************************/

+#define nullString "null"

+#define stdinmark "-"

+#define stdoutmark "-"

+#ifdef _WIN32

+#  define nulmark "nul"

+#else

+#  define nulmark "/dev/null"

+#endif

+

+

+/**************************************

+*  Parameters

+**************************************/

+void FIO_overwriteMode(void);

+void FIO_setNotificationLevel(unsigned level);

+

+

+/**************************************

+*  Stream/File functions

+**************************************/

+unsigned long long FIO_compressFilename (const char* outfilename, const char* infilename);

+unsigned long long FIO_decompressFilename (const char* outfilename, const char* infilename);

+/*

+FIO_compressFilename :

+    result : size of compressed file

+

+FIO_decompressFilename :

+    result : size of regenerated file

+*/

+

+

+

+#if defined (__cplusplus)

+}

+#endif
\ No newline at end of file
diff --git a/programs/fullbench.c b/programs/fullbench.c
new file mode 100644
index 0000000..b13ea60
--- /dev/null
+++ b/programs/fullbench.c
@@ -0,0 +1,682 @@
+/*
+    fullbench.c - Detailed bench program for zstd
+    Copyright (C) Yann Collet 2014-2015
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - zstd source repository : https://github.com/Cyan4973/zstd
+    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Compiler Options
+**************************************/
+/* Disable some Visual warning messages */
+#define _CRT_SECURE_NO_WARNINGS
+#define _CRT_SECURE_NO_DEPRECATE     /* VS2005 */
+
+/* Unix Large Files support (>4GB) */
+#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  define _LARGEFILE_SOURCE
+#  define _FILE_OFFSET_BITS 64
+#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  define _LARGEFILE64_SOURCE
+#endif
+
+/* S_ISREG & gettimeofday() are not supported by MSVC */
+#if defined(_MSC_VER) || defined(_WIN32)
+#  define BMK_LEGACY_TIMER 1
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>       /* malloc */
+#include <stdio.h>        /* fprintf, fopen, ftello64 */
+#include <sys/types.h>    /* stat64 */
+#include <sys/stat.h>     /* stat64 */
+#include <string.h>       /* strcmp */
+
+/* Use ftime() if gettimeofday() is not available on your target */
+#if defined(BMK_LEGACY_TIMER)
+#  include <sys/timeb.h>  /* timeb, ftime */
+#else
+#  include <sys/time.h>   /* gettimeofday */
+#endif
+
+#include "zstd.h"
+#include "fse_static.h"
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+/* S_ISREG & gettimeofday() are not supported by MSVC */
+#if !defined(S_ISREG)
+#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+  typedef uint8_t  BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+*  Constants
+**************************************/
+#define PROGRAM_DESCRIPTION "zStandard speed analyzer"
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION ""
+#endif
+#define AUTHOR "Yann Collet"
+#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION, (int)(sizeof(void*)*8), AUTHOR, __DATE__
+
+
+#define KB *(1<<10)
+#define MB *(1<<20)
+
+#define NBLOOPS    6
+#define TIMELOOP   2500
+
+#define KNUTH      2654435761U
+#define MAX_MEM    (1984 MB)
+#define DEFAULT_CHUNKSIZE   (4<<20)
+
+static double g_compressibilityDefault = 0.50;
+static const U32 prime1 = 2654435761U;
+static const U32 prime2 = 2246822519U;
+static const size_t sampleSize = 10000000;
+
+
+/**************************************
+*  Macros
+**************************************/
+#define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
+
+
+/**************************************
+*  Benchmark Parameters
+**************************************/
+static int nbIterations = NBLOOPS;
+
+void BMK_SetNbIterations(int nbLoops)
+{
+    nbIterations = nbLoops;
+    DISPLAY("- %i iterations -\n", nbIterations);
+}
+
+
+/*********************************************************
+*  Private functions
+*********************************************************/
+
+#if defined(BMK_LEGACY_TIMER)
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on Legacy ftime()
+  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeb tb;
+  int nCount;
+  ftime( &tb );
+  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
+  return nCount;
+}
+
+#else
+
+static int BMK_GetMilliStart(void)
+{
+  /* Based on newer gettimeofday()
+  *  Use GetMilliSpan to correct for rollover */
+  struct timeval tv;
+  int nCount;
+  gettimeofday(&tv, NULL);
+  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
+  return nCount;
+}
+
+#endif
+
+
+static int BMK_GetMilliSpan( int nTimeStart )
+{
+  int nSpan = BMK_GetMilliStart() - nTimeStart;
+  if ( nSpan < 0 )
+    nSpan += 0x100000 * 1000;
+  return nSpan;
+}
+
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+    size_t step = 64 MB;
+    BYTE* testmem=NULL;
+
+    requiredMem = (((requiredMem >> 26) + 1) << 26);
+    if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
+
+    requiredMem += 2*step;
+    while (!testmem)
+    {
+        requiredMem -= step;
+        testmem = (BYTE*) malloc ((size_t)requiredMem);
+    }
+
+    free (testmem);
+    return (size_t) (requiredMem - step);
+}
+
+
+static U64 BMK_GetFileSize(char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+#endif
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+    return (U64)statbuf.st_size;
+}
+
+
+static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
+
+static U32 BMK_rand(U32* src)
+{
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 += prime2;
+    rand32 = BMK_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 9;
+}
+
+#define BMK_RAND15BITS  ( BMK_rand(&seed) & 0x7FFF)
+#define BMK_RANDLENGTH  ((BMK_rand(&seed) & 3) ? (BMK_rand(&seed) % 15) : (BMK_rand(&seed) % 510) + 15)
+#define BMK_RANDCHAR    (BYTE)((BMK_rand(&seed) & 63) + '0')
+static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
+{
+    BYTE* BBuffer = (BYTE*)buffer;
+    unsigned pos = 0;
+    U32 P32 = (U32)(32768 * proba);
+
+    /* First Byte */
+    BBuffer[pos++] = BMK_RANDCHAR;
+
+    while (pos < bufferSize)
+    {
+        /* Select : Literal (noise) or copy (within 64K) */
+        if (BMK_RAND15BITS < P32)
+        {
+            /* Match */
+            size_t match, end;
+            unsigned length = BMK_RANDLENGTH + 4;
+            unsigned offset = BMK_RAND15BITS + 1;
+            if (offset > pos) offset = pos;
+            match = pos - offset;
+            end = pos + length;
+            if (end > bufferSize) end = bufferSize;
+            while (pos < end) BBuffer[pos++] = BBuffer[match++];
+        }
+        else
+        {
+            /* Literal */
+            size_t end;
+            unsigned length = BMK_RANDLENGTH;
+            end = pos + length;
+            if (end > bufferSize) end = bufferSize;
+            while (pos < end) BBuffer[pos++] = BMK_RANDCHAR;
+        }
+    }
+}
+
+
+/*********************************************************
+*  Benchmark wrappers
+*********************************************************/
+typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
+typedef struct
+{
+    blockType_t blockType;
+    U32 unusedBits;
+    U32 origSize;
+} blockProperties_t;
+
+static size_t g_cSize = 0;
+
+extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr);
+extern size_t ZSTD_decodeLiteralsBlock(void* ctx, void* dst, size_t maxDstSize, const BYTE** litPtr, const void* src, size_t srcSize);
+extern size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr, void* DTableLL, void* DTableML, void* DTableOffb, const void* src, size_t srcSize);
+
+
+size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    (void)buff2;
+    return ZSTD_compress(dst, dstSize, src, srcSize);
+}
+
+size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    (void)src; (void)srcSize;
+    return ZSTD_decompress(dst, dstSize, buff2, g_cSize);
+}
+
+size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    U32 ctx[1<<12];
+    const BYTE* ll;
+    (void)src; (void)srcSize;
+    ZSTD_decodeLiteralsBlock(ctx, dst, dstSize, &ll, buff2, g_cSize);
+    return (const BYTE*)dst + dstSize - ll;
+}
+
+size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    U32 DTableML[1<<11], DTableLL[1<<10], DTableOffb[1<<9];
+    const BYTE* dumps;
+    size_t lastllSize;
+    (void)src; (void)srcSize; (void)dst; (void)dstSize;
+    return ZSTD_decodeSeqHeaders(&lastllSize, &dumps, DTableLL, DTableML, DTableOffb, buff2, g_cSize);
+}
+
+size_t local_conditionalNull(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    U32 i;
+    size_t total = 0;
+    BYTE* data = buff2;
+
+    (void)dst; (void)dstSize; (void)src;
+    for (i=0; i < srcSize; i++)
+    {
+        U32 b = data[i];
+        total += b;
+        if (b==0) total = 0;   // 825
+        //if (!b) total = 0;     // 825
+        //total = b ? total : 0; // 622
+        //total *= !!b;          // 465
+        //total &= -!b;          // 622
+    }
+    return total;
+}
+
+size_t local_decodeLiteralsForward(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize)
+{
+    (void)src; (void)srcSize;
+    return FSE_decompress(dst, dstSize, buff2, g_cSize);
+}
+
+
+
+/*********************************************************
+*  Bench functions
+*********************************************************/
+size_t benchMem(void* src, size_t srcSize, U32 benchNb)
+{
+    BYTE*  dstBuff;
+    size_t dstBuffSize;
+    BYTE*  buff2;
+    int loopNb;
+    const char* benchName;
+    size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize);
+    double bestTime = 100000000.;
+    size_t errorCode = 0;
+
+    // Declaration
+    switch(benchNb)
+    {
+    case 1:
+        benchFunction = local_ZSTD_compress; benchName = "ZSTD_compress";
+        break;
+    case 11:
+        benchFunction = local_ZSTD_decompress; benchName = "ZSTD_decompress";
+        break;
+    case 31:
+        benchFunction = local_ZSTD_decodeLiteralsBlock; benchName = "ZSTD_decodeLiteralsBlock";
+        break;
+    case 32:
+        benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "ZSTD_decodeSeqHeaders";
+        break;
+    case 101:
+        benchFunction = local_conditionalNull; benchName = "conditionalNull";
+        break;
+    case 102:
+        benchFunction = local_decodeLiteralsForward; benchName = "ZSTD_decodeLiteralsForward";
+        break;
+    default :
+        return 0;
+    }
+
+    /* Allocation */
+    dstBuffSize = srcSize + 512;
+    dstBuff = malloc(dstBuffSize);
+    buff2 = malloc(dstBuffSize);
+    if ((!dstBuff) || (!buff2))
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        free(dstBuff); free(buff2);
+        return 12;
+    }
+
+    /* Preparation */
+    switch(benchNb)
+    {
+    case 11:
+        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize);
+        break;
+    case 31:  // ZSTD_decodeLiteralsBlock
+        {
+            blockProperties_t bp;
+            ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
+            g_cSize = ZSTD_getcBlockSize(dstBuff+7, dstBuffSize, &bp) + 3;
+            memcpy(buff2, dstBuff+7, g_cSize);
+            //srcSize = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);   // real speed
+            srcSize = srcSize > 128 KB ? 128 KB : srcSize;   // relative to block
+            break;
+        }
+    case 32:   // ZSTD_decodeSeqHeaders
+        {
+            blockProperties_t bp;
+            const BYTE* ip = dstBuff;
+            const BYTE* iend;
+            size_t blockSize;
+            ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
+            ip += 4;   // Jump magic Number
+            blockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   // Get first block compressed size
+            iend = ip + 3 + blockSize;   // Get end of first block
+            ip += 3;   // jump first block header
+            ip += ZSTD_getcBlockSize(ip, iend - ip, &bp) + 3;   // jump literal sub block and its header
+            g_cSize = iend-ip;
+            memcpy(buff2, ip, g_cSize);   // copy rest of block (starting with SeqHeader)
+            srcSize = srcSize > 128 KB ? 128 KB : srcSize;   // speed relative to block
+            break;
+        }
+
+    /* test functions */
+
+    case 101:   // conditionalNull
+        {
+            size_t i;
+            U32 seed = (U32)srcSize;
+            for (i=0; i<srcSize; i++)
+                buff2[i] = (BYTE)(BMK_rand(&seed) & 15);
+            break;
+        }
+    case 102:   //
+        {
+            blockProperties_t bp;
+            ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
+            g_cSize = ZSTD_getcBlockSize(dstBuff+7, dstBuffSize, &bp);
+            memcpy(buff2, dstBuff+10, g_cSize);
+            //srcSize = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);   // real speed
+            srcSize = srcSize > 128 KB ? 128 KB : srcSize;   // relative to block
+            break;
+        }
+    default : ;
+    }
+
+    for (loopNb = 1; loopNb <= nbIterations; loopNb++)
+    {
+        double averageTime;
+        int milliTime;
+        U32 nbRounds=0;
+
+        DISPLAY("%2i- %-30.30s : \r", loopNb, benchName);
+        { size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; }     /* warming up memory */
+
+        milliTime = BMK_GetMilliStart();
+        while(BMK_GetMilliStart() == milliTime);
+        milliTime = BMK_GetMilliStart();
+        while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
+        {
+            errorCode = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);
+            if (ZSTD_isError(errorCode)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(errorCode)); exit(1); }
+            nbRounds++;
+        }
+        milliTime = BMK_GetMilliSpan(milliTime);
+
+        averageTime = (double)milliTime / nbRounds;
+        if (averageTime < bestTime) bestTime = averageTime;
+        DISPLAY("%2i- %-30.30s : %7.1f MB/s  (%9u)\r", loopNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
+    }
+
+    DISPLAY("%2u- %-30.30s : %7.1f MB/s  (%9u)\n", benchNb, benchName, (double)srcSize / bestTime / 1000., (U32)errorCode);
+
+    free(dstBuff);
+    free(buff2);
+    return 0;
+}
+
+
+int benchSample(U32 benchNb)
+{
+    char* origBuff;
+    size_t benchedSize = sampleSize;
+    const char* name = "Sample50";
+
+    /* Allocation */
+    origBuff = (char*) malloc((size_t)benchedSize);
+    if(!origBuff)
+    {
+        DISPLAY("\nError: not enough memory!\n");
+        return 12;
+    }
+
+    /* Fill buffer */
+    BMK_datagen(origBuff, benchedSize, g_compressibilityDefault, 0);
+
+    /* bench */
+    DISPLAY("\r%79s\r", "");
+    DISPLAY(" %s : \n", name);
+    if (benchNb)
+        benchMem(origBuff, benchedSize, benchNb);
+    else
+        for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb);
+
+    free(origBuff);
+    return 0;
+}
+
+
+int benchFiles(char** fileNamesTable, int nbFiles, U32 benchNb)
+{
+    int fileIdx=0;
+
+    /* Loop for each file */
+    while (fileIdx<nbFiles)
+    {
+        FILE* inFile;
+        char* inFileName;
+        U64   inFileSize;
+        size_t benchedSize;
+        size_t readSize;
+        char* origBuff;
+
+        /* Check file existence */
+        inFileName = fileNamesTable[fileIdx++];
+        inFile = fopen( inFileName, "rb" );
+        if (inFile==NULL)
+        {
+            DISPLAY( "Pb opening %s\n", inFileName);
+            return 11;
+        }
+
+        // Memory allocation & restrictions
+        inFileSize = BMK_GetFileSize(inFileName);
+        benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
+        if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+        if (benchedSize < inFileSize)
+        {
+            DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
+        }
+
+        // Alloc
+        origBuff = (char*) malloc((size_t)benchedSize);
+        if(!origBuff)
+        {
+            DISPLAY("\nError: not enough memory!\n");
+            fclose(inFile);
+            return 12;
+        }
+
+        // Fill input buffer
+        DISPLAY("Loading %s...       \r", inFileName);
+        readSize = fread(origBuff, 1, benchedSize, inFile);
+        fclose(inFile);
+
+        if(readSize != benchedSize)
+        {
+            DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+            free(origBuff);
+            return 13;
+        }
+
+        // bench
+        DISPLAY("\r%79s\r", "");
+        DISPLAY(" %s : \n", inFileName);
+        if (benchNb)
+            benchMem(origBuff, benchedSize, benchNb);
+        else
+            for (benchNb=0; benchNb<100; benchNb++) benchMem(origBuff, benchedSize, benchNb);
+    }
+
+    return 0;
+}
+
+
+int usage(char* exename)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [arg] file1 file2 ... fileX\n", exename);
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -H/-h  : Help (this text + advanced options)\n");
+    return 0;
+}
+
+int usage_advanced(void)
+{
+    DISPLAY( "\nAdvanced options :\n");
+    DISPLAY( " -b#    : test only function # \n");
+    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
+    return 0;
+}
+
+int badusage(char* exename)
+{
+    DISPLAY("Wrong parameters\n");
+    usage(exename);
+    return 0;
+}
+
+int main(int argc, char** argv)
+{
+    int i,
+        filenamesStart=0,
+        result;
+    char* exename=argv[0];
+    char* input_filename=0;
+    U32 benchNb = 0, main_pause = 0;
+
+    // Welcome message
+    DISPLAY(WELCOME_MESSAGE);
+
+    if (argc<1) { badusage(exename); return 1; }
+
+    for(i=1; i<argc; i++)
+    {
+        char* argument = argv[i];
+
+        if(!argument) continue;   // Protection if argument empty
+
+        // Decode command (note : aggregated commands are allowed)
+        if (argument[0]=='-')
+        {
+            while (argument[1]!=0)
+            {
+                argument ++;
+
+                switch(argument[0])
+                {
+                    // Display help on usage
+                case 'h' :
+                case 'H': usage(exename); usage_advanced(); return 0;
+
+                    // Pause at the end (hidden option)
+                case 'p': main_pause = 1; break;
+
+                    // Select specific bench algorithm only
+                case 'b':
+                    benchNb = 0;
+                    while ((argument[1]>= '0') && (argument[1]<= '9'))
+                    {
+                        benchNb *= 10;
+                        benchNb += argument[1] - '0';
+                        argument++;
+                    }
+                    break;
+
+                    // Modify Nb Iterations
+                case 'i':
+                    if ((argument[1] >='1') && (argument[1] <='9'))
+                    {
+                        int iters = argument[1] - '0';
+                        BMK_SetNbIterations(iters);
+                        argument++;
+                    }
+                    break;
+
+                    // Unknown command
+                default : badusage(exename); return 1;
+                }
+            }
+            continue;
+        }
+
+        // first provided filename is input
+        if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
+    }
+
+    if (filenamesStart==0)
+        result = benchSample(benchNb);
+    else result = benchFiles(argv+filenamesStart, argc-filenamesStart, benchNb);
+
+    if (main_pause) { printf("press enter...\n"); getchar(); }
+
+    return result;
+}
+
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
new file mode 100644
index 0000000..91218b9
--- /dev/null
+++ b/programs/fuzzer.c
@@ -0,0 +1,494 @@
+/*
+    Fuzzer test tool for zstd
+    Copyright (C) Yann Collet 2014-2105
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - ZSTD source repository : https://github.com/Cyan4973/zstd
+    - ZSTD public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+*  Compiler specific
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  define _CRT_SECURE_NO_WARNINGS     /* fgets */
+#  pragma warning(disable : 4127)     /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-braces"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
+#  pragma GCC diagnostic ignored "-Wmissing-field-initializers"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
+#endif
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdlib.h>      /* free */
+#include <stdio.h>       /* fgets, sscanf */
+#include <sys/timeb.h>   /* timeb */
+#include <string.h>      /* strcmp */
+#include "zstd_static.h"
+#include "xxhash.h"      /* XXH64 */
+
+
+/**************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+ Constants
+**************************************/
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION ""
+#endif
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+static const U32 nbTestsDefault = 32 KB;
+#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
+#define FUZ_COMPRESSIBILITY_DEFAULT 50
+static const U32 prime1 = 2654435761U;
+static const U32 prime2 = 2246822519U;
+
+
+
+/**************************************
+*  Display Macros
+**************************************/
+#define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static U32 g_displayLevel = 2;
+
+#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
+            if ((FUZ_GetMilliSpan(g_time) > g_refreshRate) || (g_displayLevel>=4)) \
+            { g_time = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \
+            if (g_displayLevel>=4) fflush(stdout); } }
+static const U32 g_refreshRate = 150;
+static U32 g_time = 0;
+
+
+/*********************************************************
+*  Fuzzer functions
+*********************************************************/
+static U32 FUZ_GetMilliStart(void)
+{
+    struct timeb tb;
+    U32 nCount;
+    ftime( &tb );
+    nCount = (U32) (((tb.time & 0xFFFFF) * 1000) +  tb.millitm);
+    return nCount;
+}
+
+
+static U32 FUZ_GetMilliSpan(U32 nTimeStart)
+{
+    U32 nCurrent = FUZ_GetMilliStart();
+    U32 nSpan = nCurrent - nTimeStart;
+    if (nTimeStart > nCurrent)
+        nSpan += 0x100000 * 1000;
+    return nSpan;
+}
+
+
+#  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+unsigned int FUZ_rand(unsigned int* src)
+{
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 += prime2;
+    rand32  = FUZ_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 5;
+}
+
+
+#define FUZ_RAND15BITS  (FUZ_rand(seed) & 0x7FFF)
+#define FUZ_RANDLENGTH  ( (FUZ_rand(seed) & 3) ? (FUZ_rand(seed) % 15) : (FUZ_rand(seed) % 510) + 15)
+static void FUZ_generateSynthetic(void* buffer, size_t bufferSize, double proba, U32* seed)
+{
+    BYTE* BBuffer = (BYTE*)buffer;
+    unsigned pos = 0;
+    U32 P32 = (U32)(32768 * proba);
+
+    // First Byte
+    BBuffer[pos++] = (BYTE)((FUZ_rand(seed) & 0x3F) + '0');
+
+    while (pos < bufferSize)
+    {
+        // Select : Literal (noise) or copy (within 64K)
+        if (FUZ_RAND15BITS < P32)
+        {
+            // Copy (within 64K)
+            size_t match, end;
+            size_t length = FUZ_RANDLENGTH + 4;
+            size_t offset = FUZ_RAND15BITS + 1;
+            if (offset > pos) offset = pos;
+            if (pos + length > bufferSize) length = bufferSize - pos;
+            match = pos - offset;
+            end = pos + length;
+            while (pos < end) BBuffer[pos++] = BBuffer[match++];
+        }
+        else
+        {
+            // Literal (noise)
+            size_t end;
+            size_t length = FUZ_RANDLENGTH;
+            if (pos + length > bufferSize) length = bufferSize - pos;
+            end = pos + length;
+            while (pos < end) BBuffer[pos++] = (BYTE)((FUZ_rand(seed) & 0x3F) + '0');
+        }
+    }
+}
+
+
+/*
+static unsigned FUZ_highbit(U32 v32)
+{
+    unsigned nbBits = 0;
+    if (v32==0) return 0;
+    while (v32)
+    {
+        v32 >>= 1;
+        nbBits ++;
+    }
+    return nbBits;
+}
+*/
+
+
+static int basicUnitTests(U32 seed, double compressibility)
+{
+    int testResult = 0;
+    void* CNBuffer;
+    void* compressedBuffer;
+    void* decodedBuffer;
+    U32 randState = seed;
+    size_t result, cSize;
+    U32 testNb=0;
+
+    // Create compressible test buffer
+    CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
+    compressedBuffer = malloc(ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH));
+    decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
+    FUZ_generateSynthetic(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, &randState);
+
+    // Basic tests
+    DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH);
+    if (ZSTD_isError(result)) goto _output_error;
+    cSize = result;
+    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
+
+    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize);
+    if (ZSTD_isError(result)) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    {
+        size_t i;
+        DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
+        for (i=0; i<COMPRESSIBLE_NOISE_LENGTH; i++)
+        {
+            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+        }
+        DISPLAYLEVEL(4, "OK \n");
+    }
+
+    DISPLAYLEVEL(4, "test%3i : decompress with 1 missing byte : ", testNb++);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize-1);
+    if (!ZSTD_isError(result)) goto _output_error;
+    if (result != (size_t)-ZSTD_ERROR_wrongSrcSize) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    DISPLAYLEVEL(4, "test%3i : decompress with 1 too much byte : ", testNb++);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, compressedBuffer, cSize+1);
+    if (!ZSTD_isError(result)) goto _output_error;
+    if (result != (size_t)-ZSTD_ERROR_wrongSrcSize) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    /* Decompression defense tests */
+    DISPLAYLEVEL(4, "test%3i : Check input length for magic number : ", testNb++);
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, CNBuffer, 3);
+    if (!ZSTD_isError(result)) goto _output_error;
+    if (result != (size_t)-ZSTD_ERROR_wrongSrcSize) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+    DISPLAYLEVEL(4, "test%3i : Check magic Number : ", testNb++);
+    ((char*)(CNBuffer))[0] = 1;
+    result = ZSTD_decompress(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, CNBuffer, 4);
+    if (!ZSTD_isError(result)) goto _output_error;
+    if (result != (size_t)-ZSTD_ERROR_wrongMagicNumber) goto _output_error;
+    DISPLAYLEVEL(4, "OK \n");
+
+_end:
+    free(CNBuffer);
+    free(compressedBuffer);
+    free(decodedBuffer);
+    return testResult;
+
+_output_error:
+    testResult = 1;
+    DISPLAY("Error detected in Unit tests ! \n");
+    goto _end;
+}
+
+
+static size_t findDiff(const void* buf1, const void* buf2, size_t max)
+{
+    const BYTE* b1 = buf1;
+    const BYTE* b2 = buf2;
+    size_t i;
+    for (i=0; i<max; i++)
+    {
+        if (b1[i] != b2[i]) break;
+    }
+    return i;
+}
+
+#   define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
+                            DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
+
+static const U32 maxSrcLog = 23;
+static const U32 maxSampleLog = 22;
+
+int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
+{
+    BYTE* srcBuffer;
+    BYTE* cBuffer;
+    BYTE* dstBuffer;
+    size_t srcBufferSize = (size_t)1<<maxSrcLog;
+    size_t dstBufferSize = (size_t)1<<maxSampleLog;
+    size_t cBufferSize   = ZSTD_compressBound(dstBufferSize);
+    U32 result = 0;
+    U32 testNb = 0;
+    U32 coreSeed = seed, lseed = 0;
+    (void)startTest; (void)compressibility;
+
+    /* allocation */
+    srcBuffer = malloc (srcBufferSize);
+    dstBuffer = malloc (dstBufferSize);
+    cBuffer   = malloc (cBufferSize);
+    CHECK (!srcBuffer || !dstBuffer || !cBuffer, "Not enough memory, fuzzer tests cancelled");
+
+    /* Create initial sample */
+    FUZ_generateSynthetic(srcBuffer, srcBufferSize, 0.50, &coreSeed);
+
+    /* catch up testNb */
+    for (testNb=0; testNb < startTest; testNb++)
+        FUZ_rand(&coreSeed);
+
+    /* test loop */
+    for (testNb=startTest; testNb < nbTests; testNb++)
+    {
+        size_t sampleSize, sampleStart;
+        size_t cSize, dSize, dSupSize;
+        U32 sampleSizeLog;
+        U64 crcOrig, crcDest;
+
+        /* init */
+        DISPLAYUPDATE(2, "\r%6u/%6u   ", testNb, nbTests);
+        FUZ_rand(&coreSeed);
+        lseed = coreSeed ^ prime1;
+        sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+        sampleSize = (size_t)1<<sampleSizeLog;
+        sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+        sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+        crcOrig = XXH64(srcBuffer + sampleStart, sampleSize, 0);
+
+        /* compression tests*/
+        cSize = ZSTD_compress(cBuffer, cBufferSize, srcBuffer + sampleStart, sampleSize);
+        CHECK(ZSTD_isError(cSize), "ZSTD_compress failed");
+
+        /* decompression tests*/
+        dSupSize = (FUZ_rand(&lseed) & 1) ? 0 : (FUZ_rand(&lseed) & 31) + 1;
+        dSize = ZSTD_decompress(dstBuffer, sampleSize + dSupSize, cBuffer, cSize);
+        CHECK(dSize != sampleSize, "ZSTD_decompress failed (%s)", ZSTD_getErrorName(dSize));
+        crcDest = XXH64(dstBuffer, sampleSize, 0);
+        CHECK(crcOrig != crcDest, "dstBuffer corrupted (pos %u / %u)", (U32)findDiff(srcBuffer+sampleStart, dstBuffer, sampleSize), (U32)sampleSize);
+    }
+    DISPLAY("\rAll fuzzer tests completed   \n");
+
+_cleanup:
+    free(srcBuffer);
+    free(cBuffer);
+    free(dstBuffer);
+    return result;
+
+_output_error:
+    result = 1;
+    goto _cleanup;
+}
+
+
+/*********************************************************
+*  Command line
+*********************************************************/
+int FUZ_usage(char* programName)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [args]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -i#    : Nb of tests (default:%u) \n", nbTestsDefault);
+    DISPLAY( " -s#    : Select seed (default:prompt user)\n");
+    DISPLAY( " -t#    : Select starting test number (default:0)\n");
+    DISPLAY( " -p#    : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT);
+    DISPLAY( " -v     : verbose\n");
+    DISPLAY( " -h     : display help and exit\n");
+    return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+    U32 seed=0;
+    int seedset=0;
+    int argNb;
+    int nbTests = nbTestsDefault;
+    int testNb = 0;
+    int proba = FUZ_COMPRESSIBILITY_DEFAULT;
+    int result=0;
+    U32 mainPause = 0;
+    char* programName;
+
+    /* Check command line */
+    programName = argv[0];
+    for(argNb=1; argNb<argc; argNb++)
+    {
+        char* argument = argv[argNb];
+
+        if(!argument) continue;   /* Protection if argument empty */
+
+        /* Handle commands. Aggregated commands are allowed */
+        if (argument[0]=='-')
+        {
+            argument++;
+
+            while (*argument!=0)
+            {
+                switch(*argument)
+                {
+                case 'h':
+                    return FUZ_usage(programName);
+                case 'v':
+                    argument++;
+                    g_displayLevel=4;
+                    break;
+                case 'q':
+                    argument++;
+                    g_displayLevel--;
+                    break;
+                case 'p': /* pause at the end */
+                    argument++;
+                    mainPause = 1;
+                    break;
+
+                case 'i':
+                    argument++;
+                    nbTests=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        nbTests *= 10;
+                        nbTests += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 's':
+                    argument++;
+                    seed=0;
+                    seedset=1;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        seed *= 10;
+                        seed += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 't':
+                    argument++;
+                    testNb=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        testNb *= 10;
+                        testNb += *argument - '0';
+                        argument++;
+                    }
+                    break;
+
+                case 'P':   /* compressibility % */
+                    argument++;
+                    proba=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        proba *= 10;
+                        proba += *argument - '0';
+                        argument++;
+                    }
+                    if (proba<0) proba=0;
+                    if (proba>100) proba=100;
+                    break;
+
+                default:
+                    return FUZ_usage(programName);
+                }
+            }
+        }
+    }
+
+    /* Get Seed */
+    DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION);
+
+    if (!seedset) seed = FUZ_GetMilliStart() % 10000;
+    DISPLAY("Seed = %u\n", seed);
+    if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);
+
+    if (nbTests<=0) nbTests=1;
+
+    if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
+    if (!result)
+        result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
+    if (mainPause)
+    {
+        DISPLAY("Press Enter \n");
+        getchar();
+    }
+    return result;
+}
diff --git a/programs/xxhash.c b/programs/xxhash.c
new file mode 100644
index 0000000..e221626
--- /dev/null
+++ b/programs/xxhash.c
@@ -0,0 +1,928 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2014, Yann Collet.
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : http://code.google.com/p/xxhash/
+- public discussion board : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+//**************************************
+// Tuning parameters
+//**************************************
+// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
+// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
+// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
+#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USE_UNALIGNED_ACCESS 1
+#endif
+
+// XXH_ACCEPT_NULL_INPUT_POINTER :
+// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+// This option has a very small performance cost (only measurable on small inputs).
+// By default, this option is disabled. To enable it, uncomment below define :
+// #define XXH_ACCEPT_NULL_INPUT_POINTER 1
+
+// XXH_FORCE_NATIVE_FORMAT :
+// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+// Results are therefore identical for little-endian and big-endian CPU.
+// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+// Should endian-independance be of no importance for your application, you may set the #define below to 1.
+// It will improve speed for Big-endian CPU.
+// This option has no impact on Little_Endian CPU.
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+//**************************************
+// Compiler Specific Options
+//**************************************
+// Disable some Visual warning messages
+#ifdef _MSC_VER  // Visual Studio
+#  pragma warning(disable : 4127)      // disable: C4127: conditional expression is constant
+#endif
+
+#ifdef _MSC_VER    // Visual Studio
+#  define FORCE_INLINE static __forceinline
+#else
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+//**************************************
+// Includes & Memory related functions
+//**************************************
+#include "xxhash.h"
+// Modify the local functions below should you wish to use some other memory routines
+// for malloc(), free()
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+// for memcpy()
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+
+//**************************************
+// Basic Types
+//**************************************
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
+# include <stdint.h>
+typedef uint8_t  BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char      BYTE;
+typedef unsigned short     U16;
+typedef unsigned int       U32;
+typedef   signed int       S32;
+typedef unsigned long long U64;
+#endif
+
+#if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
+#  define _PACKED __attribute__ ((packed))
+#else
+#  define _PACKED
+#endif
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  ifdef __IBMC__
+#    pragma pack(1)
+#  else
+#    pragma pack(push, 1)
+#  endif
+#endif
+
+typedef struct _U32_S
+{
+    U32 v;
+} _PACKED U32_S;
+typedef struct _U64_S
+{
+    U64 v;
+} _PACKED U64_S;
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  pragma pack(pop)
+#endif
+
+#define A32(x) (((U32_S *)(x))->v)
+#define A64(x) (((U64_S *)(x))->v)
+
+
+//***************************************
+// Compiler-specific Functions and Macros
+//***************************************
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     // Visual Studio
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static inline U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static inline U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+//**************************************
+// Constants
+//**************************************
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3  1609587929392839161ULL
+#define PRIME64_4  9650029242287828579ULL
+#define PRIME64_5  2870177450012600261ULL
+
+//**************************************
+// Architecture Macros
+//**************************************
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+#ifndef XXH_CPU_LITTLE_ENDIAN   // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
+static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(char*)(&one))
+#endif
+
+
+//**************************************
+// Macros
+//**************************************
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    // use only *after* variable declarations
+
+
+//****************************
+// Memory reads
+//****************************
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+
+//****************************
+// Simple Hash Functions
+//****************************
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_get32bits(p) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_get32bits(p) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_get32bits(p) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_get32bits(p) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32 (const void* input, size_t len, unsigned seed)
+{
+#if 0
+    // Simple version, good for code maintenance, but unfortunately slow for small inputs
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 3) == 0)   // Input is aligned, let's leverage the speed advantage
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do
+        {
+            v1 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            v2 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            v3 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            v4 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+        }
+        while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_get64bits(p);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    // Simple version, good for code maintenance, but unfortunately slow for small inputs
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 7)==0)   // Input is aligned, let's leverage the speed advantage
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/****************************************************
+ *  Advanced Hash Functions
+****************************************************/
+
+/*** Allocation ***/
+typedef struct
+{
+    U64 total_len;
+    U32 seed;
+    U32 v1;
+    U32 v2;
+    U32 v3;
+    U32 v4;
+    U32 mem32[4];   /* defined as U32 for alignment */
+    U32 memsize;
+} XXH_istate32_t;
+
+typedef struct
+{
+    U64 total_len;
+    U64 seed;
+    U64 v1;
+    U64 v2;
+    U64 v3;
+    U64 v4;
+    U64 mem64[4];   /* defined as U64 for alignment */
+    U32 memsize;
+} XXH_istate64_t;
+
+
+XXH32_state_t* XXH32_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   // A compilation error here means XXH32_state_t is not large enough
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH64_state_t* XXH64_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   // A compilation error here means XXH64_state_t is not large enough
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+{
+    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME64_1 + PRIME64_2;
+    state->v2 = seed + PRIME64_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME64_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   // fill in tmp buffer
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   // some data left from previous update
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {
+            const U32* p32 = state->mem32;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v1 = XXH_rotl32(state->v1, 13);
+            state->v1 *= PRIME32_1;
+            p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v2 = XXH_rotl32(state->v2, 13);
+            state->v2 *= PRIME32_1;
+            p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v3 = XXH_rotl32(state->v3, 13);
+            state->v3 *= PRIME32_1;
+            p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v4 = XXH_rotl32(state->v4, 13);
+            state->v4 *= PRIME32_1;
+            p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32(p, endian) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_readLE32(p, endian) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_readLE32(p, endian) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_readLE32(p, endian) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem32, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    const BYTE * p = (const BYTE*)state->mem32;
+    BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+U32 XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32)   // fill in tmp buffer
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   // some data left from previous update
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        {
+            const U64* p64 = state->mem64;
+            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v1 = XXH_rotl64(state->v1, 31);
+            state->v1 *= PRIME64_1;
+            p64++;
+            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v2 = XXH_rotl64(state->v2, 31);
+            state->v2 *= PRIME64_1;
+            p64++;
+            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v3 = XXH_rotl64(state->v3, 31);
+            state->v3 *= PRIME64_1;
+            p64++;
+            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v4 = XXH_rotl64(state->v4, 31);
+            state->v4 *= PRIME64_1;
+            p64++;
+        }
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE64(p, endian) * PRIME64_2;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            p+=8;
+            v2 += XXH_readLE64(p, endian) * PRIME64_2;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            p+=8;
+            v3 += XXH_readLE64(p, endian) * PRIME64_2;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            p+=8;
+            v4 += XXH_readLE64(p, endian) * PRIME64_2;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+            p+=8;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem64, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE * p = (const BYTE*)state->mem64;
+    BYTE* bEnd = (BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32)
+    {
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_readLE64(p, endian);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
diff --git a/programs/xxhash.h b/programs/xxhash.h
new file mode 100755
index 0000000..55b4501
--- /dev/null
+++ b/programs/xxhash.h
@@ -0,0 +1,156 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2014, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : http://code.google.com/p/xxhash/
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*****************************
+   Includes
+*****************************/
+#include <stddef.h>   /* size_t */
+
+
+/*****************************
+   Type
+*****************************/
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+
+/*****************************
+   Simple Hash Functions
+*****************************/
+
+unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
+unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+*/
+
+
+
+/*****************************
+   Advanced Hash Functions
+*****************************/
+typedef struct { long long ll[ 6]; } XXH32_state_t;
+typedef struct { long long ll[11]; } XXH64_state_t;
+
+/*
+These structures allow static allocation of XXH states.
+States must then be initialized using XXHnn_reset() before first use.
+
+If you prefer dynamic allocation, please refer to functions below.
+*/
+
+XXH32_state_t* XXH32_createState(void);
+XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH64_state_t* XXH64_createState(void);
+XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*
+These functions create and release memory for XXH state.
+States must then be initialized using XXHnn_reset() before first use.
+*/
+
+
+XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
+XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions calculate the xxHash of an input provided in multiple smaller packets,
+as opposed to an input provided as a single block.
+
+XXH state space must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, meaning allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, you can produce a hash anytime, by using XXHnn_digest().
+This function returns the final nn-bits hash.
+You can nonetheless continue feeding the hash state with more input,
+and therefore get some new hashes, by calling again XXHnn_digest().
+
+When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/programs/zstd.1 b/programs/zstd.1
new file mode 100644
index 0000000..ce36027
--- /dev/null
+++ b/programs/zstd.1
@@ -0,0 +1,69 @@
+\"
+\" zstd.1: This is a manual page for 'zstd' program. This file is part of the
+\" zstd <https://github.com/Cyan4973/zstd> project.
+\"
+
+\" No hyphenation
+.hy 0
+.nr HY 0
+
+.TH zstd "1" "2015-01-22" "zstd" "User Commands"
+.SH NAME
+\fBzstd\fR - standard compression algorithm
+
+.SH SYNOPSIS
+.TP 5
+\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] <OUTPUT-FILE>
+
+.SH DESCRIPTION
+.PP
+\fBzstd\fR is a fast lossless compression algorithm
+with highly parametrable compression strength and memory usage.
+It is based on the \fBLZ77\fR family, coupled with FSE entropy stage.
+zstd offers compression speed of 200 MB/s per core.
+It also features a fast decoder, with speed > 500 MB/s per core.
+\fBzstd\fR supports the following options :
+
+.SH OPTIONS
+.TP
+.B \-1
+ fast compression (default)
+.TP
+.B \-d
+ decompression
+.TP
+.B \-f
+ overwrite output without prompting
+.TP
+.B \-h/\-H
+ display help/long help and exit
+.TP
+.B \-V
+ display Version number and exit
+.TP
+.B \-v
+ verbose mode
+.TP
+.B \-q
+ suppress warnings; specify twice to suppress errors too
+.TP
+.B \-c
+ force write to standard output, even if it is the console
+.TP
+.B \-t
+ test compressed file integrity
+.TP
+.B \-z
+ force compression
+.TP
+.B \-b
+ benchmark file(s)
+.TP
+.B \-i#
+ iteration loops [1-9](default : 3), benchmark mode only
+
+.SH BUGS
+Report bugs at:- https://github.com/Cyan4973/zstd
+
+.SH AUTHOR
+Yann Collet
diff --git a/programs/zstdcat.1 b/programs/zstdcat.1
new file mode 100644
index 0000000..036ac07
--- /dev/null
+++ b/programs/zstdcat.1
@@ -0,0 +1,32 @@
+\"
+\" zstdcat.1: This is a manual page for 'zstdcat' program. This file is part of
+\" the zstd <https://github.com/Cyan4973/zstd/> project.
+\"
+
+\" No hyphenation
+.hy 0
+.nr HY 0
+
+.TH zstdcat "1" "2014-06-20" "zstdcat" "User Commands"
+.SH NAME
+\fBzstdcat\fR - Utility based on zstd
+
+.SH SYNOPSIS
+.TP 5
+\fBzstdcat\fR [\fBOPTIONS\fR] [-|INPUT-FILE]
+
+.SH DESCRIPTION
+.PP
+\fBzstdcat\fR is an utility based on \fBzstd\fR, a fast lossless compression algorithm.
+
+\fBzstdcat\fR decompress input file or stream, redirecting its output to the console.
+It is equivalent to \fBzstd -cd\fR,
+
+Available options are the same as \fBzstd\fR ones (man zstd).
+
+
+.SH BUGS
+Report bugs at:- https://github.com/Cyan4973/zstd/
+
+.SH AUTHOR
+Yann Collet
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
new file mode 100644
index 0000000..9c47845
--- /dev/null
+++ b/programs/zstdcli.c
@@ -0,0 +1,322 @@
+/*
+  zstdcli - Command Line Interface (cli) for zstd
+  Copyright (C) Yann Collet 2014-2015
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd source repository : https://github.com/Cyan4973/zstd
+  - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/*
+  Note : this is user program.
+  It is not part of zstd compression library.
+  The license of this compression CLI program is GPLv2.
+  The license of zstd library is BSD.
+*/
+
+
+/**************************************
+*  Compiler Options
+**************************************/
+#define _CRT_SECURE_NO_WARNINGS  /* Visual : removes warning from strcpy */
+#define _POSIX_SOURCE 1          /* triggers fileno() within <stdio.h> on unix */
+
+
+/**************************************
+*  Includes
+**************************************/
+#include <stdio.h>    /* fprintf, getchar */
+#include <stdlib.h>   /* exit, calloc, free */
+#include <string.h>   /* strcmp, strlen */
+#include "bench.h"    /* BMK_benchFiles, BMK_SetNbIterations */
+#include "fileio.h"
+
+
+/**************************************
+*  OS-specific Includes
+**************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+#  include <fcntl.h>    // _O_BINARY
+#  include <io.h>       // _setmode, _isatty
+#  ifdef __MINGW32__
+   int _fileno(FILE *stream);   // MINGW somehow forgets to include this windows declaration into <stdio.h>
+#  endif
+#  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
+#  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#else
+#  include <unistd.h>   // isatty
+#  define SET_BINARY_MODE(file)
+#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#endif
+
+
+//****************************
+// Constants
+//****************************
+#define COMPRESSOR_NAME "zstd command line interface"
+#ifndef ZSTD_VERSION
+#  define ZSTD_VERSION "v0.0.1"
+#endif
+#define AUTHOR "Yann Collet"
+#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__
+#define ZSTD_EXTENSION ".zst"
+#define ZSTD_CAT "zstdcat"
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+
+/**************************************
+*  Display Macros
+**************************************/
+#define DISPLAY(...)           fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...)   if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static unsigned displayLevel = 2;   // 0 : no display  // 1: errors  // 2 : + result + interaction + warnings ;  // 3 : + progression;  // 4 : + information
+
+
+/**************************************
+*  Exceptions
+**************************************/
+#define DEBUG 0
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/**************************************
+*  Command Line
+**************************************/
+static int usage(const char* programName)
+{
+    DISPLAY( "Usage :\n");
+    DISPLAY( "      %s [arg] [input] [output]\n", programName);
+    DISPLAY( "\n");
+    DISPLAY( "input   : a filename\n");
+    DISPLAY( "          with no FILE, or when FILE is - , read standard input\n");
+    DISPLAY( "Arguments :\n");
+    DISPLAY( " -d     : decompression (default for %s extension)\n", ZSTD_EXTENSION);
+    //DISPLAY( " -z     : force compression\n");
+    DISPLAY( " -f     : overwrite output without prompting \n");
+    DISPLAY( " -h/-H  : display help/long help and exit\n");
+    return 0;
+}
+
+static int usage_advanced(const char* programName)
+{
+    DISPLAY(WELCOME_MESSAGE);
+    usage(programName);
+    DISPLAY( "\n");
+    DISPLAY( "Advanced arguments :\n");
+    DISPLAY( " -V     : display Version number and exit\n");
+    DISPLAY( " -v     : verbose mode\n");
+    DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
+    DISPLAY( " -c     : force write to standard output, even if it is the console\n");
+    //DISPLAY( " -t     : test compressed file integrity\n");
+    DISPLAY( "Benchmark arguments :\n");
+    DISPLAY( " -b     : benchmark file(s)\n");
+    DISPLAY( " -i#    : iteration loops [1-9](default : 3), benchmark mode only\n");
+    return 0;
+}
+
+static int badusage(const char* programName)
+{
+    DISPLAYLEVEL(1, "Incorrect parameters\n");
+    if (displayLevel >= 1) usage(programName);
+    return 1;
+}
+
+
+static void waitEnter(void)
+{
+    DISPLAY("Press enter to continue...\n");
+    getchar();
+}
+
+
+int main(int argc, char** argv)
+{
+    int i,
+        bench=0,
+        decode=0,
+        forceStdout=0,
+        main_pause=0;
+    unsigned fileNameStart = 0;
+    unsigned nbFiles = 0;
+    const char* programName = argv[0];
+    const char* inFileName = NULL;
+    const char* outFileName = NULL;
+    char* dynNameSpace = NULL;
+    char extension[] = ZSTD_EXTENSION;
+
+    /* zstdcat behavior */
+    if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; }
+
+    // command switches
+    for(i=1; i<argc; i++)
+    {
+        char* argument = argv[i];
+
+        if(!argument) continue;   // Protection if argument empty
+
+        /* Decode commands (note : aggregated commands are allowed) */
+        if (argument[0]=='-')
+        {
+            /* '-' means stdin/stdout */
+            if (argument[1]==0)
+            {
+                if (!inFileName) inFileName=stdinmark;
+                else outFileName=stdoutmark;
+            }
+
+            argument++;
+
+            while (argument[0]!=0)
+            {
+                switch(argument[0])
+                {
+                    /* Display help */
+                case 'V': DISPLAY(WELCOME_MESSAGE); return 0;   /* Version Only */
+                case 'H':
+                case 'h': return usage_advanced(programName);
+
+                    // Compression (default)
+                //case 'z': forceCompress = 1; break;
+
+                    // Decoding
+                case 'd': decode=1; argument++; break;
+
+                    // Force stdout, even if stdout==console
+                case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
+
+                    // Test
+                //case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break;
+
+                    /* Overwrite */
+                case 'f': FIO_overwriteMode(); argument++; break;
+
+                    /* Verbose mode */
+                case 'v': displayLevel=4; argument++; break;
+
+                    /* Quiet mode */
+                case 'q': displayLevel--; argument++; break;
+
+                    /* keep source file (default anyway, so useless; only for xz/lzma compatibility) */
+                case 'k': argument++; break;
+
+                    /* Benchmark */
+                case 'b': bench=1; argument++; break;
+
+                    /* Modify Nb Iterations (benchmark only) */
+                case 'i':
+                    {
+                        int iters= 0;
+                        argument++;
+                        while ((*argument >='0') && (*argument <='9'))
+                            iters *= 10, iters += *argument++ - '0';
+                        BMK_SetNbIterations(iters);
+                    }
+                    break;
+
+                    /* Pause at the end (hidden option) */
+                case 'p': main_pause=1; argument++; break;
+
+                    /* unknown command */
+                default : return badusage(programName);
+                }
+            }
+            continue;
+        }
+
+        /* first provided filename is input */
+        if (!inFileName) { inFileName = argument; fileNameStart = i; nbFiles = argc-i; continue; }
+
+        /* second provided filename is output */
+        if (!outFileName)
+        {
+            outFileName = argument;
+            if (!strcmp (outFileName, nullString)) outFileName = nulmark;
+            continue;
+        }
+    }
+
+    /* Welcome message (if verbose) */
+    DISPLAYLEVEL(3, WELCOME_MESSAGE);
+
+    /* No input filename ==> use stdin */
+    if(!inFileName) { inFileName=stdinmark; }
+
+    /* Check if input defined as console; trigger an error in this case */
+    if (!strcmp(inFileName, stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
+
+    /* Check if benchmark is selected */
+    if (bench) { BMK_bench(argv+fileNameStart, nbFiles, 0); goto _end; }
+
+    /* No output filename ==> try to select one automatically (when possible) */
+    while (!outFileName)
+    {
+        if (!IS_CONSOLE(stdout)) { outFileName=stdoutmark; break; }   /* Default to stdout whenever possible (i.e. not a console) */
+        if (!decode)   /* compression to file */
+        {
+            size_t l = strlen(inFileName);
+            dynNameSpace = (char*)calloc(1,l+5);
+            strcpy(dynNameSpace, inFileName);
+            strcpy(dynNameSpace+l, ZSTD_EXTENSION);
+            outFileName = dynNameSpace;
+            DISPLAYLEVEL(2, "Compressed filename will be : %s \n", outFileName);
+            break;
+        }
+        /* decompression to file (automatic name will work only if input filename has correct format extension) */
+        {
+            size_t outl;
+            size_t inl = strlen(inFileName);
+            dynNameSpace = (char*)calloc(1,inl+1);
+            outFileName = dynNameSpace;
+            strcpy(dynNameSpace, inFileName);
+            outl = inl;
+            if (inl>4)
+                while ((outl >= inl-4) && (inFileName[outl] ==  extension[outl-inl+4])) dynNameSpace[outl--]=0;
+            if (outl != inl-5) { DISPLAYLEVEL(1, "Cannot determine an output filename\n"); return badusage(programName); }
+            DISPLAYLEVEL(2, "Decoding file %s \n", outFileName);
+        }
+    }
+
+    /* Check if output is defined as console; trigger an error in this case */
+    if (!strcmp(outFileName,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
+
+    /* No warning message in pure pipe mode (stdin + stdout) */
+    if (!strcmp(inFileName, stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
+
+    /* IO Stream/File */
+    FIO_setNotificationLevel(displayLevel);
+    if (decode)
+        FIO_decompressFilename(outFileName, inFileName);
+    else
+        FIO_compressFilename(outFileName, inFileName);
+
+_end:
+    if (main_pause) waitEnter();
+    free(dynNameSpace);
+    return 0;
+}