Merge pull request #434 from terrelln/dev

Pzstd Improvements
diff --git a/.travis.yml b/.travis.yml
index 0c89607..3be4575 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -44,7 +44,7 @@
             - qemu-user-static
     - os: linux
       sudo: required
-      env: PLATFORM="Ubuntu 12.04" CMD="make -C programs zstd-small && make -C programs zstd-decompress && make -C programs zstd-compress && make clean && make -C tests versionsTest"
+      env: PLATFORM="Ubuntu 12.04" CMD="make -C programs zstd-small && make -C programs zstd-decompress && make -C programs zstd-compress && make -C programs clean && make -C tests versionsTest"
     - os: linux
       sudo: required
       env: PLATFORM="Ubuntu 12.04" CMD="make asan32"
diff --git a/Makefile b/Makefile
index ac0c583..20ae31e 100644
--- a/Makefile
+++ b/Makefile
@@ -22,15 +22,18 @@
 
 .PHONY: default all zlibwrapper zstd clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan
 
-default: zstd
+default: libzstd zstd
 
 all:
 	$(MAKE) -C $(ZSTDDIR) $@
 	$(MAKE) -C $(PRGDIR) $@ zstd32
 	$(MAKE) -C $(TESTDIR) $@ all32
 
+libzstd:
+	@$(MAKE) -C $(ZSTDDIR)
+
 zstd:
-	$(MAKE) -C $(PRGDIR)
+	@$(MAKE) -C $(PRGDIR)
 	cp $(PRGDIR)/zstd .
 
 zlibwrapper:
@@ -48,18 +51,18 @@
 	@echo Cleaning completed
 
 
-#----------------------------------------------------------------------------------
-#make install is validated only for Linux, OSX, kFreeBSD, Hurd and some BSD targets
-#----------------------------------------------------------------------------------
+#------------------------------------------------------------------------------
+# make install is validated only for Linux, OSX, Hurd and some BSD targets
+#------------------------------------------------------------------------------
 ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU FreeBSD DragonFly NetBSD))
 HOST_OS = POSIX
 install:
-	$(MAKE) -C $(ZSTDDIR) $@
-	$(MAKE) -C $(PRGDIR) $@
+	@$(MAKE) -C $(ZSTDDIR) $@
+	@$(MAKE) -C $(PRGDIR) $@
 
 uninstall:
-	$(MAKE) -C $(ZSTDDIR) $@
-	$(MAKE) -C $(PRGDIR) $@
+	@$(MAKE) -C $(ZSTDDIR) $@
+	@$(MAKE) -C $(PRGDIR) $@
 
 travis-install:
 	$(MAKE) install PREFIX=~/install_test_dir
diff --git a/NEWS b/NEWS
index ad56d17..1b0c3d6 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,13 @@
+v1.1.1
+New : command -M#, --memory=, --memlimit=, --memlimit-decompress= to limit allowed memory consumption
+New : doc/zstd_manual.html, by Przemyslaw Skibinski
+Improved : slightly better compression ratio at --ultra levels (>= 20)
+Added : ZSTD_initCStream_usingCDict(), ZSTD_initDStream_usingDDict() (experimental section)
+Added : example/multiple_streaming_compression
+Changed : zstd_errors.h is now part of include installation
+Updated man page
+Fixed : zstd-small, zstd-compress and zstd-decompress compilation targets
+
 v1.1.0
 New : contrib/pzstd, parallel version of zstd, by Nick Terrell
 added : NetBSD install target (#338)
diff --git a/README.md b/README.md
index 53609a1..0ab1b66 100644
--- a/README.md
+++ b/README.md
@@ -38,10 +38,10 @@
 
 Compression Speed vs Ratio | Decompression Speed
 ---------------------------|--------------------
-![Compression Speed vs Ratio](images/Cspeed4.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed4.png "Decompression Speed")
+![Compression Speed vs Ratio](doc/images/Cspeed4.png "Compression Speed vs Ratio") | ![Decompression Speed](doc/images/Dspeed4.png "Decompression Speed")
 
 Several algorithms can produce higher compression ratios, but at slower speeds, falling outside of the graph.
-For a larger picture including very slow modes, [click on this link](images/DCspeed5.png) .
+For a larger picture including very slow modes, [click on this link](doc/images/DCspeed5.png) .
 
 
 ### The case for Small Data compression
@@ -52,7 +52,7 @@
 
 To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data, by providing it with a few samples. The result of the training is stored in a file called "dictionary", which can be loaded before compression and decompression. Using this dictionary, the compression ratio achievable on small data improves dramatically:
 
-![Compressing Small Data](images/smallData.png "Compressing Small Data")
+![Compressing Small Data](doc/images/smallData.png "Compressing Small Data")
 
 These compression gains are achieved while simultaneously providing faster compression and decompression speeds.
 
diff --git a/build/VS2005/fullbench/fullbench.vcproj b/build/VS2005/fullbench/fullbench.vcproj
index 17af9c8..c28d1f6 100644
--- a/build/VS2005/fullbench/fullbench.vcproj
+++ b/build/VS2005/fullbench/fullbench.vcproj
@@ -336,6 +336,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\compress\fse_compress.c"
 				>
 			</File>
@@ -394,7 +398,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2005/fuzzer/fuzzer.vcproj b/build/VS2005/fuzzer/fuzzer.vcproj
index f1a6f82..dd7450f 100644
--- a/build/VS2005/fuzzer/fuzzer.vcproj
+++ b/build/VS2005/fuzzer/fuzzer.vcproj
@@ -340,6 +340,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\compress\fse_compress.c"
 				>
 			</File>
@@ -398,7 +402,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2005/zstd/zstd.vcproj b/build/VS2005/zstd/zstd.vcproj
index 68c3578..223285e 100644
--- a/build/VS2005/zstd/zstd.vcproj
+++ b/build/VS2005/zstd/zstd.vcproj
@@ -352,6 +352,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\programs\fileio.c"
 				>
 			</File>
@@ -450,7 +454,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2005/zstdlib/zstdlib.vcproj b/build/VS2005/zstdlib/zstdlib.vcproj
index 7ea3d9b..7a0a76e 100644
--- a/build/VS2005/zstdlib/zstdlib.vcproj
+++ b/build/VS2005/zstdlib/zstdlib.vcproj
@@ -336,6 +336,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\compress\fse_compress.c"
 				>
 			</File>
@@ -426,7 +430,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/fullbench/fullbench.vcproj b/build/VS2008/fullbench/fullbench.vcproj
index cdfa048..8fe4f10 100644
--- a/build/VS2008/fullbench/fullbench.vcproj
+++ b/build/VS2008/fullbench/fullbench.vcproj
@@ -337,6 +337,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\compress\fse_compress.c"
 				>
 			</File>
@@ -395,7 +399,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj
index 0991702..3644b8c 100644
--- a/build/VS2008/fuzzer/fuzzer.vcproj
+++ b/build/VS2008/fuzzer/fuzzer.vcproj
@@ -341,6 +341,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\compress\fse_compress.c"
 				>
 			</File>
@@ -399,7 +403,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj
index b272ffd..1d51f51 100644
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@@ -353,6 +353,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\programs\fileio.c"
 				>
 			</File>
@@ -451,7 +455,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj
index fa4cd26..9c61e94 100644
--- a/build/VS2008/zstdlib/zstdlib.vcproj
+++ b/build/VS2008/zstdlib/zstdlib.vcproj
@@ -337,6 +337,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\compress\fse_compress.c"
 				>
 			</File>
@@ -427,7 +431,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\error_public.h"
+				RelativePath="..\..\..\lib\common\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj
index d342bcb..ea0f06e 100644
--- a/build/VS2010/fullbench/fullbench.vcxproj
+++ b/build/VS2010/fullbench/fullbench.vcxproj
@@ -158,6 +158,7 @@
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\common\entropy_common.c" />
     <ClCompile Include="..\..\..\lib\common\zstd_common.c" />
+    <ClCompile Include="..\..\..\lib\common\error_private.c" />
     <ClCompile Include="..\..\..\lib\common\xxhash.c" />
     <ClCompile Include="..\..\..\lib\common\fse_decompress.c" />
     <ClCompile Include="..\..\..\lib\compress\fse_compress.c" />
@@ -175,6 +176,7 @@
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\common\zbuff.h" />
+    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
index a436b03..020e521 100644
--- a/build/VS2010/fuzzer/fuzzer.vcxproj
+++ b/build/VS2010/fuzzer/fuzzer.vcxproj
@@ -157,6 +157,7 @@
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\common\entropy_common.c" />
+    <ClCompile Include="..\..\..\lib\common\error_private.c" />
     <ClCompile Include="..\..\..\lib\common\fse_decompress.c" />
     <ClCompile Include="..\..\..\lib\common\xxhash.c" />
     <ClCompile Include="..\..\..\lib\common\zstd_common.c" />
@@ -176,6 +177,7 @@
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\common\zbuff.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
+    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
diff --git a/build/VS2010/zstd/generate_res/generate_res.bat b/build/VS2010/zstd/generate_res/generate_res.bat
deleted file mode 100644
index b552dcc..0000000
--- a/build/VS2010/zstd/generate_res/generate_res.bat
+++ /dev/null
@@ -1,3 +0,0 @@
-REM http://stackoverflow.com/questions/708238/how-do-i-add-an-icon-to-a-mingw-gcc-compiled-executable
-REM copy "c:\Program Files (x86)\Windows Kits\8.1\Include\um\verrsrc.h" .
-windres -I ..\..\..\..\lib -O coff -I . -i ..\zstd.rc -o zstd.res
diff --git a/build/VS2010/zstd/generate_res/verrsrc.h b/build/VS2010/zstd/generate_res/verrsrc.h
deleted file mode 100644
index 37e48d3..0000000
--- a/build/VS2010/zstd/generate_res/verrsrc.h
+++ /dev/null
@@ -1,172 +0,0 @@
-#include <winapifamily.h>
-
-/*****************************************************************************\
-*                                                                             *
-* verrsrc.h -   Version Resource definitions                                  *
-*                                                                             *
-*               Include file declaring version resources in rc files          *
-*                                                                             *
-*               Copyright (c) Microsoft Corporation. All rights reserved.     *
-*                                                                             *
-\*****************************************************************************/
-
-#pragma region Application Family
-#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
-
-/* ----- Symbols ----- */
-#define VS_FILE_INFO            RT_VERSION
-#define VS_VERSION_INFO         1
-#define VS_USER_DEFINED         100
-
-/* ----- VS_VERSION.dwFileFlags ----- */
-#ifndef _MAC
-#define VS_FFI_SIGNATURE        0xFEEF04BDL
-#else
-#define VS_FFI_SIGNATURE        0xBD04EFFEL
-#endif
-#define VS_FFI_STRUCVERSION     0x00010000L
-#define VS_FFI_FILEFLAGSMASK    0x0000003FL
-
-/* ----- VS_VERSION.dwFileFlags ----- */
-#define VS_FF_DEBUG             0x00000001L
-#define VS_FF_PRERELEASE        0x00000002L
-#define VS_FF_PATCHED           0x00000004L
-#define VS_FF_PRIVATEBUILD      0x00000008L
-#define VS_FF_INFOINFERRED      0x00000010L
-#define VS_FF_SPECIALBUILD      0x00000020L
-
-/* ----- VS_VERSION.dwFileOS ----- */
-#define VOS_UNKNOWN             0x00000000L
-#define VOS_DOS                 0x00010000L
-#define VOS_OS216               0x00020000L
-#define VOS_OS232               0x00030000L
-#define VOS_NT                  0x00040000L
-#define VOS_WINCE               0x00050000L
-
-#define VOS__BASE               0x00000000L
-#define VOS__WINDOWS16          0x00000001L
-#define VOS__PM16               0x00000002L
-#define VOS__PM32               0x00000003L
-#define VOS__WINDOWS32          0x00000004L
-
-#define VOS_DOS_WINDOWS16       0x00010001L
-#define VOS_DOS_WINDOWS32       0x00010004L
-#define VOS_OS216_PM16          0x00020002L
-#define VOS_OS232_PM32          0x00030003L
-#define VOS_NT_WINDOWS32        0x00040004L
-
-/* ----- VS_VERSION.dwFileType ----- */
-#define VFT_UNKNOWN             0x00000000L
-#define VFT_APP                 0x00000001L
-#define VFT_DLL                 0x00000002L
-#define VFT_DRV                 0x00000003L
-#define VFT_FONT                0x00000004L
-#define VFT_VXD                 0x00000005L
-#define VFT_STATIC_LIB          0x00000007L
-
-/* ----- VS_VERSION.dwFileSubtype for VFT_WINDOWS_DRV ----- */
-#define VFT2_UNKNOWN            0x00000000L
-#define VFT2_DRV_PRINTER        0x00000001L
-#define VFT2_DRV_KEYBOARD       0x00000002L
-#define VFT2_DRV_LANGUAGE       0x00000003L
-#define VFT2_DRV_DISPLAY        0x00000004L
-#define VFT2_DRV_MOUSE          0x00000005L
-#define VFT2_DRV_NETWORK        0x00000006L
-#define VFT2_DRV_SYSTEM         0x00000007L
-#define VFT2_DRV_INSTALLABLE    0x00000008L
-#define VFT2_DRV_SOUND          0x00000009L
-#define VFT2_DRV_COMM           0x0000000AL
-#define VFT2_DRV_INPUTMETHOD    0x0000000BL
-#define VFT2_DRV_VERSIONED_PRINTER    0x0000000CL
-
-/* ----- VS_VERSION.dwFileSubtype for VFT_WINDOWS_FONT ----- */
-#define VFT2_FONT_RASTER        0x00000001L
-#define VFT2_FONT_VECTOR        0x00000002L
-#define VFT2_FONT_TRUETYPE      0x00000003L
-
-#endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
-#pragma endregion
-
-#pragma region Desktop Family
-#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-
-/* ----- VerFindFile() flags ----- */
-#define VFFF_ISSHAREDFILE       0x0001
-
-#define VFF_CURNEDEST           0x0001
-#define VFF_FILEINUSE           0x0002
-#define VFF_BUFFTOOSMALL        0x0004
-
-/* ----- VerInstallFile() flags ----- */
-#define VIFF_FORCEINSTALL       0x0001
-#define VIFF_DONTDELETEOLD      0x0002
-
-#define VIF_TEMPFILE            0x00000001L
-#define VIF_MISMATCH            0x00000002L
-#define VIF_SRCOLD              0x00000004L
-
-#define VIF_DIFFLANG            0x00000008L
-#define VIF_DIFFCODEPG          0x00000010L
-#define VIF_DIFFTYPE            0x00000020L
-
-#define VIF_WRITEPROT           0x00000040L
-#define VIF_FILEINUSE           0x00000080L
-#define VIF_OUTOFSPACE          0x00000100L
-#define VIF_ACCESSVIOLATION     0x00000200L
-#define VIF_SHARINGVIOLATION    0x00000400L
-#define VIF_CANNOTCREATE        0x00000800L
-#define VIF_CANNOTDELETE        0x00001000L
-#define VIF_CANNOTRENAME        0x00002000L
-#define VIF_CANNOTDELETECUR     0x00004000L
-#define VIF_OUTOFMEMORY         0x00008000L
-
-#define VIF_CANNOTREADSRC       0x00010000L
-#define VIF_CANNOTREADDST       0x00020000L
-
-#define VIF_BUFFTOOSMALL        0x00040000L
-#define VIF_CANNOTLOADLZ32      0x00080000L
-#define VIF_CANNOTLOADCABINET   0x00100000L
-
-#ifndef RC_INVOKED              /* RC doesn't need to see the rest of this */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-    
-/* 
-    FILE_VER_GET_... flags are for use by 
-    GetFileVersionInfoSizeEx
-    GetFileVersionInfoExW
-*/
-#define FILE_VER_GET_LOCALISED  0x01
-#define FILE_VER_GET_NEUTRAL    0x02
-#define FILE_VER_GET_PREFETCHED 0x04
-
-/* ----- Types and structures ----- */
-
-typedef struct tagVS_FIXEDFILEINFO
-{
-    DWORD   dwSignature;            /* e.g. 0xfeef04bd */
-    DWORD   dwStrucVersion;         /* e.g. 0x00000042 = "0.42" */
-    DWORD   dwFileVersionMS;        /* e.g. 0x00030075 = "3.75" */
-    DWORD   dwFileVersionLS;        /* e.g. 0x00000031 = "0.31" */
-    DWORD   dwProductVersionMS;     /* e.g. 0x00030010 = "3.10" */
-    DWORD   dwProductVersionLS;     /* e.g. 0x00000031 = "0.31" */
-    DWORD   dwFileFlagsMask;        /* = 0x3F for version "0.42" */
-    DWORD   dwFileFlags;            /* e.g. VFF_DEBUG | VFF_PRERELEASE */
-    DWORD   dwFileOS;               /* e.g. VOS_DOS_WINDOWS16 */
-    DWORD   dwFileType;             /* e.g. VFT_DRIVER */
-    DWORD   dwFileSubtype;          /* e.g. VFT2_DRV_KEYBOARD */
-    DWORD   dwFileDateMS;           /* e.g. 0 */
-    DWORD   dwFileDateLS;           /* e.g. 0 */
-} VS_FIXEDFILEINFO;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* !RC_INVOKED */
-
-#endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */
-#pragma endregion
-
diff --git a/build/VS2010/zstd/generate_res/zstd32.res b/build/VS2010/zstd/generate_res/zstd32.res
deleted file mode 100644
index 75705d0..0000000
--- a/build/VS2010/zstd/generate_res/zstd32.res
+++ /dev/null
Binary files differ
diff --git a/build/VS2010/zstd/generate_res/zstd64.res b/build/VS2010/zstd/generate_res/zstd64.res
deleted file mode 100644
index 85de277..0000000
--- a/build/VS2010/zstd/generate_res/zstd64.res
+++ /dev/null
Binary files differ
diff --git a/build/VS2010/zstd/zstd.rc b/build/VS2010/zstd/zstd.rc
index 464b2f1..f5e4047 100644
--- a/build/VS2010/zstd/zstd.rc
+++ b/build/VS2010/zstd/zstd.rc
@@ -32,11 +32,11 @@
     BEGIN
         BLOCK "040904B0"
         BEGIN
-            VALUE "CompanyName", "Yann Collet"
-            VALUE "FileDescription", "Fast and efficient compression algorithm"
+            VALUE "CompanyName", "Yann Collet, Facebook, Inc."
+            VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm"
             VALUE "FileVersion", ZSTD_VERSION_STRING
             VALUE "InternalName", "zstd.exe"
-            VALUE "LegalCopyright", "Copyright (C) 2013-2016, Yann Collet"
+            VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc."
             VALUE "OriginalFilename", "zstd.exe"
             VALUE "ProductName", "Zstandard"
             VALUE "ProductVersion", ZSTD_VERSION_STRING
diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
index eb7e7b5..181bbe6 100644
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@@ -20,6 +20,7 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\common\entropy_common.c" />
+    <ClCompile Include="..\..\..\lib\common\error_private.c" />
     <ClCompile Include="..\..\..\lib\common\xxhash.c" />
     <ClCompile Include="..\..\..\lib\common\zstd_common.c" />
     <ClCompile Include="..\..\..\lib\common\fse_decompress.c" />
@@ -54,6 +55,7 @@
     <ClInclude Include="..\..\..\lib\common\zbuff.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
+    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
@@ -220,4 +222,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/build/VS2010/zstdlib/zstdlib.vcxproj b/build/VS2010/zstdlib/zstdlib.vcxproj
index b97808d..d32b486 100644
--- a/build/VS2010/zstdlib/zstdlib.vcxproj
+++ b/build/VS2010/zstdlib/zstdlib.vcxproj
@@ -20,6 +20,7 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\common\entropy_common.c" />
+    <ClCompile Include="..\..\..\lib\common\error_private.c" />
     <ClCompile Include="..\..\..\lib\common\xxhash.c" />
     <ClCompile Include="..\..\..\lib\common\zstd_common.c" />
     <ClCompile Include="..\..\..\lib\common\fse_decompress.c" />
@@ -39,9 +40,11 @@
     <ClCompile Include="..\..\..\lib\legacy\zstd_v05.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v06.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v07.c" />
+  </ItemGroup>
+  <ItemGroup>    
     <ClInclude Include="..\..\..\lib\common\bitstream.h" />
     <ClInclude Include="..\..\..\lib\common\error_private.h" />
-    <ClInclude Include="..\..\..\lib\common\error_public.h" />
+    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\common\mem.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index c984145..f970fe7 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -59,6 +59,7 @@
 SET(Sources
         ${LIBRARY_DIR}/common/entropy_common.c
         ${LIBRARY_DIR}/common/zstd_common.c
+        ${LIBRARY_DIR}/common/error_private.c
         ${LIBRARY_DIR}/common/xxhash.c
         ${LIBRARY_DIR}/common/fse_decompress.c
         ${LIBRARY_DIR}/compress/fse_compress.c
@@ -74,7 +75,7 @@
 SET(Headers
         ${LIBRARY_DIR}/common/bitstream.h
         ${LIBRARY_DIR}/common/error_private.h
-        ${LIBRARY_DIR}/common/error_public.h
+        ${LIBRARY_DIR}/common/zstd_errors.h
         ${LIBRARY_DIR}/common/fse.h
         ${LIBRARY_DIR}/common/huf.h
         ${LIBRARY_DIR}/common/mem.h
diff --git a/contrib/gen_html/Makefile b/contrib/gen_html/Makefile
new file mode 100644
index 0000000..c68e560
--- /dev/null
+++ b/contrib/gen_html/Makefile
@@ -0,0 +1,36 @@
+# ##########################################################################
+# Copyright (c) 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+# ##########################################################################
+
+
+CFLAGS ?= -O3
+CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
+CFLAGS += $(MOREFLAGS)
+FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+
+
+# Define *.exe as extension for Windows systems
+ifneq (,$(filter Windows%,$(OS)))
+EXT =.exe
+else
+EXT =
+endif
+
+
+.PHONY: default gen_html
+
+default: gen_html
+
+gen_html: gen_html.cpp
+	$(CXX)      $(FLAGS) $^ -o $@$(EXT)
+
+
+clean:
+	@$(RM) gen_html$(EXT)
+	@echo Cleaning completed
diff --git a/contrib/gen_html/README.md b/contrib/gen_html/README.md
new file mode 100644
index 0000000..63a4caa
--- /dev/null
+++ b/contrib/gen_html/README.md
@@ -0,0 +1,31 @@
+gen_html - a program for automatic generation of zstd manual 
+============================================================
+
+#### Introduction
+
+This simple C++ program generates a single-page HTML manual from `zstd.h`.
+
+The format of recognized comment blocks is following:
+- comments of type `/*!` mean: this is a function declaration; switch comments with declarations
+- comments of type `/**` and `/*-` mean: this is a comment; use a `<H2>` header for the first line
+- comments of type `/*=` and `/**=` mean: use a `<H3>` header and show also all functions until first empty line
+- comments of type `/*X` where `X` is different from above-mentioned are ignored
+
+Moreover:
+- `ZSTDLIB_API` is removed to improve readability
+- `typedef` are detected and included even if uncommented
+- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold)
+
+
+#### Usage
+
+The program requires 3 parameters:
+```
+gen_html [zstd_version] [input_file] [output_html]
+```
+
+To compile program and generate zstd manual we have used: 
+```
+make
+./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html
+```
diff --git a/contrib/gen_html/gen_html.cpp b/contrib/gen_html/gen_html.cpp
new file mode 100644
index 0000000..2157829
--- /dev/null
+++ b/contrib/gen_html/gen_html.cpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+
+/* trim string at the beginning and at the end */
+void trim(string& s, string characters)
+{
+    size_t p = s.find_first_not_of(characters);
+    s.erase(0, p);
+ 
+    p = s.find_last_not_of(characters);
+    if (string::npos != p)
+       s.erase(p+1);
+}
+
+
+/* trim C++ style comments */
+void trim_comments(string &s)
+{
+    size_t spos, epos;
+
+    spos = s.find("/*");
+    epos = s.find("*/");
+    s = s.substr(spos+3, epos-(spos+3));
+}
+
+
+/* get lines until a given terminator */
+vector<string> get_lines(vector<string>& input, int& linenum, string terminator)
+{
+    vector<string> out;
+    string line;
+    size_t epos;
+
+    while ((size_t)linenum < input.size()) {
+        line = input[linenum];
+
+        if (terminator.empty() && line.empty()) { linenum--; break; }
+        
+        epos = line.find(terminator);
+        if (!terminator.empty() && epos!=string::npos) {
+            out.push_back(line);
+            break;
+        }
+        out.push_back(line);
+        linenum++;
+    }
+    return out;
+}
+
+
+/* print line with ZSTDLIB_API removed and C++ comments not bold */
+void print_line(stringstream &sout, string line)
+{
+    size_t spos;
+
+    if (line.substr(0,12) == "ZSTDLIB_API ") line = line.substr(12);
+    spos = line.find("/*");
+    if (spos!=string::npos) {
+        sout << line.substr(0, spos);
+        sout << "</b>" << line.substr(spos) << "<b>" << endl;
+    } else {
+      //  fprintf(stderr, "lines=%s\n", line.c_str());
+        sout << line << endl;
+    }
+}
+
+
+int main(int argc, char *argv[]) {
+    char exclam;
+    int linenum, chapter = 1;
+    vector<string> input, lines, comments, chapters;
+    string line, version;
+    size_t spos, l;
+    stringstream sout;
+    ifstream istream;
+    ofstream ostream;
+
+    if (argc < 4) {
+        cout << "usage: " << argv[0] << " [zstd_version] [input_file] [output_html]" << endl;
+        exit(0);
+    }
+
+    version = "zstd " + string(argv[1]) + " Manual";
+
+    istream.open(argv[2], ifstream::in);
+    if (!istream.is_open()) {
+        cout << "Error opening file " << argv[2] << endl;
+        exit(0);
+    }
+
+    ostream.open(argv[3], ifstream::out);
+    if (!ostream.is_open()) {
+        cout << "Error opening file " << argv[3] << endl;
+        exit(0);
+    }
+
+    while (getline(istream, line)) {
+        input.push_back(line);
+    }
+
+    for (linenum=0; (size_t)linenum < input.size(); linenum++) {
+        line = input[linenum];
+
+        /* typedefs are detected and included even if uncommented */
+        if (line.substr(0,7) == "typedef" && line.find("{")!=string::npos) {
+            lines = get_lines(input, linenum, "}");
+            sout << "<pre><b>";
+            for (l=0; l<lines.size(); l++) {
+                print_line(sout, lines[l]);
+            }
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        /* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */
+        if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) {
+            sout << "<pre><b>";
+            print_line(sout, line);
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
+        if ((line.substr(0,3) == "/*=" || line.substr(0,4) == "/**=") && line.find("*/")!=string::npos) {
+            trim_comments(line);
+            trim(line, "= ");
+            sout << "<h3>" << line << "</h3><pre><b>";
+            lines = get_lines(input, ++linenum, "");
+            for (l=0; l<lines.size(); l++) {
+                print_line(sout, lines[l]);
+            }
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        spos = line.find("/*!");
+        if (spos==string::npos)
+            spos = line.find("/**");
+        if (spos==string::npos)
+            spos = line.find("/*-");
+
+        if (spos==string::npos)
+            continue;
+
+        exclam = line[spos+2];
+        comments = get_lines(input, linenum, "*/");
+        if (!comments.empty()) comments[0] = line.substr(spos+3);
+        if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
+        for (l=0; l<comments.size(); l++) {
+            if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
+            else if (comments[l].find("  *")==0) comments[l] = comments[l].substr(3);
+            trim(comments[l], "*-");
+        }
+        while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
+        while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
+
+        /* comments of type /*! mean: this is a function declaration; switch comments with declarations */
+        if (exclam == '!') {
+            if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "ZSTD_XXX() :" */
+            linenum++;
+            lines = get_lines(input, linenum, "");
+
+            sout << "<pre><b>";
+            for (l=0; l<lines.size(); l++) {
+              //  fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str());
+                print_line(sout, lines[l]);
+            }
+            sout << "</b><p>";
+            for (l=0; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            sout << "</p></pre><BR>" << endl << endl;
+        } else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
+            if (comments.empty()) continue;
+
+            trim(comments[0], " ");
+            sout << "<a name=\"Chapter" << chapter << "\"></a><h2>" << comments[0] << "</h2><pre>";
+            chapters.push_back(comments[0]);
+            chapter++;
+
+            for (l=1; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            if (comments.size() > 1)
+                sout << "<BR></pre>" << endl << endl;
+            else
+                sout << "</pre>" << endl << endl;
+        }
+    }
+
+    ostream << "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n<title>" << version << "</title>\n</head>\n<body>" << endl;
+    ostream << "<h1>" << version << "</h1>\n";
+
+    ostream << "<hr>\n<a name=\"Contents\"></a><h2>Contents</h2>\n<ol>\n";
+    for (size_t i=0; i<chapters.size(); i++)
+        ostream << "<li><a href=\"#Chapter" << i+1 << "\">" << chapters[i].c_str() << "</a></li>\n";
+    ostream << "</ol>\n<hr>\n";
+
+    ostream << sout.str();
+    ostream << "</html>" << endl << "</body>" << endl;
+
+    return 0;
+}
\ No newline at end of file
diff --git a/contrib/pzstd/test/OptionsTest.cpp b/contrib/pzstd/test/OptionsTest.cpp
index e7d4b2b..b3efe2b 100644
--- a/contrib/pzstd/test/OptionsTest.cpp
+++ b/contrib/pzstd/test/OptionsTest.cpp
@@ -182,12 +182,6 @@
   }
   {
     Options options;
-    auto args = makeArray("-o-");
-    EXPECT_FAILURE(options.parse(args.size(), args.data()));
-    EXPECT_EQ("-", options.getOutputFile(options.inputFiles[0]));
-  }
-  {
-    Options options;
     auto args = makeArray("x", "y", "-o", nullOutput);
     EXPECT_SUCCESS(options.parse(args.size(), args.data()));
     EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0]));
diff --git a/images/Cspeed4.png b/doc/images/Cspeed4.png
similarity index 100%
rename from images/Cspeed4.png
rename to doc/images/Cspeed4.png
Binary files differ
diff --git a/images/DCspeed5.png b/doc/images/DCspeed5.png
similarity index 100%
rename from images/DCspeed5.png
rename to doc/images/DCspeed5.png
Binary files differ
diff --git a/images/Dspeed4.png b/doc/images/Dspeed4.png
similarity index 100%
rename from images/Dspeed4.png
rename to doc/images/Dspeed4.png
Binary files differ
diff --git a/images/smallData.png b/doc/images/smallData.png
similarity index 100%
rename from images/smallData.png
rename to doc/images/smallData.png
Binary files differ
diff --git a/zstd_compression_format.md b/doc/zstd_compression_format.md
similarity index 100%
rename from zstd_compression_format.md
rename to doc/zstd_compression_format.md
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
new file mode 100644
index 0000000..aac5af9
--- /dev/null
+++ b/doc/zstd_manual.html
@@ -0,0 +1,528 @@
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>zstd 1.1.1 Manual</title>
+</head>
+<body>
+<h1>zstd 1.1.1 Manual</h1>
+<hr>
+<a name="Contents"></a><h2>Contents</h2>
+<ol>
+<li><a href="#Chapter1">Introduction</a></li>
+<li><a href="#Chapter2">Version</a></li>
+<li><a href="#Chapter3">Simple API</a></li>
+<li><a href="#Chapter4">Explicit memory management</a></li>
+<li><a href="#Chapter5">Simple dictionary API</a></li>
+<li><a href="#Chapter6">Fast dictionary API</a></li>
+<li><a href="#Chapter7">Streaming</a></li>
+<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
+<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
+<li><a href="#Chapter10">START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
+<li><a href="#Chapter11">Advanced types</a></li>
+<li><a href="#Chapter12">Advanced compression functions</a></li>
+<li><a href="#Chapter13">Advanced decompression functions</a></li>
+<li><a href="#Chapter14">Advanced streaming functions</a></li>
+<li><a href="#Chapter15">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter16">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter17">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter18">Block functions</a></li>
+</ol>
+<hr>
+<a name="Chapter1"></a><h2>Introduction</h2><pre>
+  Zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios
+  at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and
+  decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22.
+  Levels from 20 to 22 should be used with caution as they require about 300-1300 MB for compression.
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit memory management)
+    - repeated calls of the compression function (described as Streaming compression)
+  The compression ratio achievable on small data can be highly improved using compression with a dictionary in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Fast dictionary API)
+
+  Advanced and experimantal functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
+  These APIs shall never be used with a dynamic library. 
+  They are not "stable", their definition may change in the future. Only static linking is allowed.
+<BR></pre>
+
+<a name="Chapter2"></a><h2>Version</h2><pre></pre>
+
+<pre><b>unsigned ZSTD_versionNumber (void);  </b>/**< returns version number of ZSTD */<b>
+</b></pre><BR>
+<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
+
+<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+</b><p>    Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+    Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+    @return : compressed size written into `dst` (<= `dstCapacity),
+              or an error code if it fails (which can be tested using ZSTD_isError()) 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+</b><p>    `compressedSize` : must be the _exact_ size of a single compressed frame.
+    `dstCapacity` is an upper bound of originalSize.
+    If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTD_isError()) 
+</p></pre><BR>
+
+<pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+</b><p>   'src' is the start of a zstd compressed frame.
+   @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
+    note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+             When `return==0`, data to decompress could be any size.
+             In which case, it's necessary to use streaming mode to decompress data.
+             Optionally, application can still use ZSTD_decompress() while relying on implied limits.
+             (For example, data may be necessarily cut into blocks <= 16 KB).
+    note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+    note 3 : decompressed size can be very large (64-bits value),
+             potentially larger than what local system can handle as a single memory segment.
+             In which case, it's necessary to use streaming mode to decompress data.
+    note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+             Always ensure result fits within application's authorized limits.
+             Each application can set its own limits.
+    note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. 
+</p></pre><BR>
+
+<h3>Helper functions</h3><pre><b>int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
+size_t      ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case scenario */<b>
+unsigned    ZSTD_isError(size_t code);          </b>/*!< tells if a `size_t` function result is an error code */<b>
+const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable string from an error code */<b>
+</b></pre><BR>
+<a name="Chapter4"></a><h2>Explicit memory management</h2><pre></pre>
+
+<h3>Compression context</h3><pre><b>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTD_CCtx* ZSTD_createCCtx(void);
+size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+</b></pre><BR>
+<pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
+</b><p>    Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) 
+</p></pre><BR>
+
+<h3>Decompression context</h3><pre><b>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTD_DCtx* ZSTD_createDCtx(void);
+size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+</b></pre><BR>
+<pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+</b><p>   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) 
+</p></pre><BR>
+
+<a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre>
+
+<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+</b><p>   Compression using a predefined Dictionary (see dictBuilder/zdict.h).
+   Note : This function load the dictionary, resulting in significant startup delay. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+</b><p>   Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
+   Dictionary must be identical to the one used during compression.
+   Note : This function load the dictionary, resulting in significant startup delay 
+</p></pre><BR>
+
+<a name="Chapter6"></a><h2>Fast dictionary API</h2><pre></pre>
+
+<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+</b><p>   Create a digested dictionary, ready to start compression operation without startup delay.
+   `dict` can be released after ZSTD_CDict creation 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+</b><p>   Function frees memory allocated with ZSTD_createCDict() 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+</b><p>   Compression using a digested Dictionary.
+   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+   Note that compression level is decided during dictionary creation 
+</p></pre><BR>
+
+<pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+</b><p>   Create a digested dictionary, ready to start decompression operation without startup delay.
+   `dict` can be released after creation 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+</b><p>   Function frees memory allocated with ZSTD_createDDict() 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+</b><p>   Decompression using a digested Dictionary
+   Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. 
+</p></pre><BR>
+
+<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>
+
+<pre><b>typedef struct ZSTD_inBuffer_s {
+  const void* src;    </b>/**< start of input buffer */<b>
+  size_t size;        </b>/**< size of input buffer */<b>
+  size_t pos;         </b>/**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_inBuffer;
+</b></pre><BR>
+<pre><b>typedef struct ZSTD_outBuffer_s {
+  void*  dst;         </b>/**< start of output buffer */<b>
+  size_t size;        </b>/**< size of output buffer */<b>
+  size_t pos;         </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_outBuffer;
+</b></pre><BR>
+<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
+  A ZSTD_CStream object is required to track streaming operation.
+  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+
+  Start by initializing ZSTD_CStream.
+  Use ZSTD_initCStream() to start a new compression operation.
+  Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary.
+
+  Use ZSTD_compressStream() repetitively to consume input stream.
+  The function will automatically update both `pos` fields.
+  Note that it may not consume the entire input, in which case `pos < size`,
+  and it's up to the caller to present again remaining data.
+  @return : a size hint, preferred nb of bytes to use as input for next function call
+           (it's just a hint, to help latency a little, any other value will work fine)
+           (note : the size hint is guaranteed to be <= ZSTD_CStreamInSize() )
+            or an error code, which can be tested using ZSTD_isError().
+
+  At any moment, it's possible to flush whatever data remains within buffer, using ZSTD_flushStream().
+  `output->pos` will be updated.
+  Note some content might still be left within internal buffer if `output->size` is too small.
+  @return : nb of bytes still present within internal buffer (0 if it's empty)
+            or an error code, which can be tested using ZSTD_isError().
+
+  ZSTD_endStream() instructs to finish a frame.
+  It will perform a flush and write frame epilogue.
+  The epilogue is required for decoders to consider a frame completed.
+  Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small.
+  In which case, call again ZSTD_endStream() to complete the flush.
+  @return : nb of bytes still present within internal buffer (0 if it's empty)
+            or an error code, which can be tested using ZSTD_isError().
+
+ 
+<BR></pre>
+
+<h3>Streaming compression functions</h3><pre><b>typedef struct ZSTD_CStream_s ZSTD_CStream;
+ZSTD_CStream* ZSTD_createCStream(void);
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+</b></pre><BR>
+<pre><b>size_t ZSTD_CStreamInSize(void);    </b>/**< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
+  A ZSTD_DStream object is required to track streaming operations.
+  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+  ZSTD_DStream objects can be re-used multiple times.
+
+  Use ZSTD_initDStream() to start a new decompression operation,
+   or ZSTD_initDStream_usingDict() if decompression requires a dictionary.
+   @return : recommended first input size
+
+  Use ZSTD_decompressStream() repetitively to consume your input.
+  The function will update both `pos` fields.
+  If `input.pos < input.size`, some input has not been consumed.
+  It's up to the caller to present again remaining data.
+  If `output.pos < output.size`, decoder has flushed everything it could.
+  @return : 0 when a frame is completely decoded and fully flushed,
+            an error code, which can be tested using ZSTD_isError(),
+            any other value > 0, which means there is still some work to do to complete the frame.
+            The return value is a suggested next input size (just an hint, to help latency).
+ 
+<BR></pre>
+
+<h3>Streaming decompression functions</h3><pre><b>typedef struct ZSTD_DStream_s ZSTD_DStream;
+ZSTD_DStream* ZSTD_createDStream(void);
+size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+size_t ZSTD_initDStream(ZSTD_DStream* zds);
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+</b></pre><BR>
+<pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter10"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
+ They should never be used with a dynamic library, as they may change in the future.
+ They are provided for advanced usages.
+ Use them only in association with static linking.
+ 
+<BR></pre>
+
+<a name="Chapter11"></a><h2>Advanced types</h2><pre></pre>
+
+<pre><b>typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   </b>/* from faster to stronger */<b>
+</b></pre><BR>
+<pre><b>typedef struct {
+    unsigned windowLog;      </b>/**< largest match distance : larger == more compression, more memory needed during decompression */<b>
+    unsigned chainLog;       </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b>
+    unsigned hashLog;        </b>/**< dispatch table : larger == faster, more memory */<b>
+    unsigned searchLog;      </b>/**< nb of searches : larger == more compression, slower */<b>
+    unsigned searchLength;   </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b>
+    unsigned targetLength;   </b>/**< acceptable match size for optimal parser (only) : larger == more compression, slower */<b>
+    ZSTD_strategy strategy;
+} ZSTD_compressionParameters;
+</b></pre><BR>
+<pre><b>typedef struct {
+    unsigned contentSizeFlag; </b>/**< 1: content size will be in frame header (if known). */<b>
+    unsigned checksumFlag;    </b>/**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */<b>
+    unsigned noDictIDFlag;    </b>/**< 1: no dict ID will be saved into frame header (if dictionary compression) */<b>
+} ZSTD_frameParameters;
+</b></pre><BR>
+<pre><b>typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+</b></pre><BR>
+<h3>Custom memory allocation functions</h3><pre><b>typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+</b></pre><BR>
+<a name="Chapter12"></a><h2>Advanced compression functions</h2><pre></pre>
+
+<pre><b>size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);
+</b><p>  Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters.
+  `frameContentSize` is an optional parameter, provide `0` if unknown 
+</p></pre><BR>
+
+<pre><b>ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+</b><p>  Create a ZSTD compression context using external alloc and free functions 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+</b><p>  Gives the amount of memory used by a given ZSTD_CCtx 
+</p></pre><BR>
+
+<pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_parameters params, ZSTD_customMem customMem);
+</b><p>  Create a ZSTD_CDict using external alloc and free, and customized compression parameters 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+</b><p>  Gives the amount of memory used by a given ZSTD_sizeof_CDict 
+</p></pre><BR>
+
+<pre><b>ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+</b><p>   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
+   All fields of `ZSTD_frameParameters` are set to default (0) 
+</p></pre><BR>
+
+<pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+</b><p>   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
+   `srcSize` value is optional, select 0 if not known 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+</b><p>   Ensure param values remain within authorized range 
+</p></pre><BR>
+
+<pre><b>ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+</b><p>   optimize params for a given `srcSize` and `dictSize`.
+   both values are optional, select `0` if unknown. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           ZSTD_parameters params);
+</b><p>   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter 
+</p></pre><BR>
+
+<a name="Chapter13"></a><h2>Advanced decompression functions</h2><pre></pre>
+
+<pre><b>size_t ZSTD_estimateDCtxSize(void);
+</b><p>  Gives the potential amount of memory allocated to create a ZSTD_DCtx 
+</p></pre><BR>
+
+<pre><b>ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+</b><p>  Create a ZSTD decompression context using external alloc and free functions 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+</b><p>  Gives the amount of memory used by a given ZSTD_DCtx 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+</b><p>  Gives the amount of memory used by a given ZSTD_DDict 
+</p></pre><BR>
+
+<a name="Chapter14"></a><h2>Advanced streaming functions</h2><pre></pre>
+
+<h3>Advanced Streaming compression functions</h3><pre><b>ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
+                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  </b>/**< pledgedSrcSize is optional and can be zero == unknown */<b>
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  </b>/**< re-use compression parameters from previous init; saves dictionary loading */<b>
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+</b></pre><BR>
+<h3>Advanced Streaming decompression functions</h3><pre><b>typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
+size_t ZSTD_resetDStream(ZSTD_DStream* zds);  </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+</b></pre><BR>
+<a name="Chapter15"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
+  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+  But it's also a complex one, with many restrictions (documented below).
+  Prefer using normal streaming API for an easier experience 
+ 
+<BR></pre>
+
+<a name="Chapter16"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only.
+  - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
+  Without last block mark, frames will be considered unfinished (broken) by decoders.
+
+  You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+<BR></pre>
+
+<h3>Buffer-less streaming compression functions</h3><pre><b>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+</b></pre><BR>
+<a name="Chapter17"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams().
+  It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame,
+  such as the minimum rolling buffer size to allocate to decompress data (`windowSize`),
+  and the dictionary ID used.
+  (Note : content size is optional, it may not be present. 0 means : content size unknown).
+  Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information.
+  As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation.
+  Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB.
+  Frame parameters are extracted from the beginning of the compressed frame.
+  Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes.
+  @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
+  Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`.
+  They should preferably be located contiguously, prior to current block.
+  Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference.
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_decompressContinue() always returns 0.
+  For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+  It also returns Frame Size as fparamsPtr->frameContentSize.
+<BR></pre>
+
+<pre><b>typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameParams;
+</b></pre><BR>
+<h3>Buffer-less streaming decompression functions</h3><pre><b>size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input, see details below */<b>
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+</b></pre><BR>
+<a name="Chapter18"></a><h2>Block functions</h2><pre>
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTD_compressBegin()
+      + decompression : ZSTD_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
+      + If you need to compress more, cut data into multiple blocks
+      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
+        Use ZSTD_insertBlock() in such a case.
+<BR></pre>
+
+<h3>Raw zstd block functions</h3><pre><b>size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
+size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  </b>/**< insert block into `dctx` history. Useful for uncompressed blocks */<b>
+</b></pre><BR>
+</html>
+</body>
diff --git a/examples/.gitignore b/examples/.gitignore
index 1c98e18..0711813 100644
--- a/examples/.gitignore
+++ b/examples/.gitignore
@@ -5,6 +5,7 @@
 dictionary_decompression
 streaming_compression
 streaming_decompression
+multiple_streaming_compression
 
 #test artefact
 tmp*
diff --git a/examples/Makefile b/examples/Makefile
index 54602df..7410228 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,26 +1,11 @@
-# ##########################################################################
-# ZSTD educational examples - Makefile
-# Copyright (C) Yann Collet 2016
+# ################################################################
+# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# All rights reserved.
 #
-# GPL v2 License
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# You can contact the author at :
-#  - zstd homepage : http://www.zstd.net/
-# ##########################################################################
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+# ################################################################
 
 # This Makefile presumes libzstd is installed, using `sudo make install`
 
@@ -32,7 +17,8 @@
 
 all: simple_compression simple_decompression \
 	dictionary_compression dictionary_decompression \
-	streaming_compression streaming_decompression
+	streaming_compression streaming_decompression \
+	multiple_streaming_compression
 
 simple_compression : simple_compression.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
@@ -49,6 +35,9 @@
 streaming_compression : streaming_compression.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
 
+multiple_streaming_compression : multiple_streaming_compression.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
 streaming_decompression : streaming_decompression.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
 
@@ -56,7 +45,8 @@
 	@rm -f core *.o tmp* result* *.zst \
         simple_compression simple_decompression \
         dictionary_compression dictionary_decompression \
-        streaming_compression streaming_decompression
+        streaming_compression streaming_decompression \
+		multiple_streaming_compression
 	@echo Cleaning completed
 
 test: all
@@ -69,7 +59,10 @@
 	@echo starting streaming compression
 	./streaming_compression tmp
 	./streaming_decompression tmp.zst > /dev/null
+	@echo starting multiple streaming compression
+	./multiple_streaming_compression *.c
 	@echo starting dictionary compression
 	./dictionary_compression tmp2 tmp README.md
 	./dictionary_decompression tmp2.zst tmp.zst README.md
+	$(RM) tmp* *.zst
 	@echo tests completed
diff --git a/examples/README.md b/examples/README.md
index ba132f6..8a40443 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -15,6 +15,11 @@
   Compress a single file.
   Introduces usage of : `ZSTD_compressStream()`
 
+- [Multiple Streaming compression](multiple_streaming_compression.c) :
+  Compress multiple files in a single command line.
+  Introduces memory usage preservation technique,
+  reducing impact of malloc()/free() and memset() by re-using existing resources.
+
 - [Streaming decompression](streaming_decompression.c) :
   Decompress a single file compressed by zstd.
   Compatible with both simple and streaming compression.
diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c
new file mode 100644
index 0000000..6169910
--- /dev/null
+++ b/examples/multiple_streaming_compression.c
@@ -0,0 +1,163 @@
+/**
+ * Copyright 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE-examples file in the root directory of this source tree.
+ */
+
+
+/* The objective of this example is to show of to compress multiple successive files
+*  while preserving memory management.
+*  All structures and buffers will be created only once,
+*  and shared across all compression operations */
+
+#include <stdlib.h>    // malloc, exit
+#include <stdio.h>     // fprintf, perror, feof
+#include <string.h>    // strerror
+#include <errno.h>     // errno
+#define ZSTD_STATIC_LINKING_ONLY  // streaming API defined as "experimental" for the time being
+#include <zstd.h>      // presumes zstd library is installed
+
+
+static void* malloc_orDie(size_t size)
+{
+    void* const buff = malloc(size);
+    if (buff) return buff;
+    /* error */
+    perror("malloc:");
+    exit(1);
+}
+
+static FILE* fopen_orDie(const char *filename, const char *instruction)
+{
+    FILE* const inFile = fopen(filename, instruction);
+    if (inFile) return inFile;
+    /* error */
+    perror(filename);
+    exit(3);
+}
+
+static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
+{
+    size_t const readSize = fread(buffer, 1, sizeToRead, file);
+    if (readSize == sizeToRead) return readSize;   /* good */
+    if (feof(file)) return readSize;   /* good, reached end of file */
+    /* error */
+    perror("fread");
+    exit(4);
+}
+
+static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
+{
+    size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
+    if (writtenSize == sizeToWrite) return sizeToWrite;   /* good */
+    /* error */
+    perror("fwrite");
+    exit(5);
+}
+
+static size_t fclose_orDie(FILE* file)
+{
+    if (!fclose(file)) return 0;
+    /* error */
+    perror("fclose");
+    exit(6);
+}
+
+
+typedef struct {
+    void* buffIn;
+    void* buffOut;
+    size_t buffInSize;
+    size_t buffOutSize;
+    ZSTD_CStream* cstream;
+} resources ;
+
+static resources createResources_orDie()
+{
+    resources ress;
+    ress.buffInSize = ZSTD_CStreamInSize();   /* can always read one full block */
+    ress.buffOutSize= ZSTD_CStreamOutSize();  /* can always flush a full block */
+    ress.buffIn = malloc_orDie(ress.buffInSize);
+    ress.buffOut= malloc_orDie(ress.buffOutSize);
+    ress.cstream = ZSTD_createCStream();
+    if (ress.cstream==NULL) { fprintf(stderr, "ZSTD_createCStream() error \n"); exit(10); }
+    return ress;
+}
+
+static void freeResources(resources ress)
+{
+    ZSTD_freeCStream(ress.cstream);
+    free(ress.buffIn);
+    free(ress.buffOut);
+}
+
+
+static void compressFile_orDie(resources ress, const char* fname, const char* outName, int cLevel)
+{
+    FILE* const fin  = fopen_orDie(fname, "rb");
+    FILE* const fout = fopen_orDie(outName, "wb");
+
+    size_t const initResult = ZSTD_initCStream(ress.cstream, cLevel);
+    if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
+
+    size_t read, toRead = ress.buffInSize;
+    while( (read = fread_orDie(ress.buffIn, toRead, fin)) ) {
+        ZSTD_inBuffer input = { ress.buffIn, read, 0 };
+        while (input.pos < input.size) {
+            ZSTD_outBuffer output = { ress.buffOut, ress.buffOutSize, 0 };
+            toRead = ZSTD_compressStream(ress.cstream, &output , &input);   /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */
+            if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); }
+            if (toRead > ress.buffInSize) toRead = ress.buffInSize;   /* Safely handle when `buffInSize` is manually changed to a smaller value */
+            fwrite_orDie(ress.buffOut, output.pos, fout);
+        }
+    }
+
+    ZSTD_outBuffer output = { ress.buffOut, ress.buffOutSize, 0 };
+    size_t const remainingToFlush = ZSTD_endStream(ress.cstream, &output);   /* close frame */
+    if (remainingToFlush) { fprintf(stderr, "not fully flushed"); exit(13); }
+    fwrite_orDie(ress.buffOut, output.pos, fout);
+
+    fclose_orDie(fout);
+    fclose_orDie(fin);
+}
+
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+
+    if (argc<2) {
+        printf("wrong arguments\n");
+        printf("usage:\n");
+        printf("%s FILE(s)\n", exeName);
+        return 1;
+    }
+
+    resources const ress = createResources_orDie();
+    void* ofnBuffer = NULL;
+    size_t ofnbSize = 0;
+
+    int argNb;
+    for (argNb = 1; argNb < argc; argNb++) {
+        const char* const ifn = argv[argNb];
+        size_t const ifnSize = strlen(ifn);
+        size_t const ofnSize = ifnSize + 5;
+        if (ofnbSize <= ofnSize) {
+            ofnbSize = ofnSize + 16;
+            free(ofnBuffer);
+            ofnBuffer = malloc_orDie(ofnbSize);
+        }
+        memset(ofnBuffer, 0, ofnSize);
+        strcat(ofnBuffer, ifn);
+        strcat(ofnBuffer, ".zst");
+        compressFile_orDie(ress, ifn, ofnBuffer, 7);
+    }
+
+    freeResources(ress);
+    /* success */
+    printf("compressed %i files \n", argc-1);
+
+    return 0;
+}
diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c
index 907ee33..62a881f 100644
--- a/examples/simple_decompression.c
+++ b/examples/simple_decompression.c
@@ -65,7 +65,7 @@
     void* const cBuff = loadFile_X(fname, &cSize);
     unsigned long long const rSize = ZSTD_getDecompressedSize(cBuff, cSize);
     if (rSize==0) {
-        printf("%s : original size unknown \n", fname);
+        printf("%s : original size unknown. Use streaming decompression instead. \n", fname);
         exit(5);
     }
     void* const rBuff = malloc_X((size_t)rSize);
diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c
index 1ba1a3d..400aa67 100644
--- a/examples/streaming_decompression.c
+++ b/examples/streaming_decompression.c
@@ -71,9 +71,12 @@
 
     ZSTD_DStream* const dstream = ZSTD_createDStream();
     if (dstream==NULL) { fprintf(stderr, "ZSTD_createDStream() error \n"); exit(10); }
+
+    /* In more complex scenarios, a file may consist of multiple appended frames (ex : pzstd).
+    *  The following example decompresses only the first frame.
+    *  It is compatible with other provided streaming examples */
     size_t const initResult = ZSTD_initDStream(dstream);
     if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initDStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
-
     size_t read, toRead = initResult;
     while ( (read = fread_orDie(buffIn, toRead, fin)) ) {
         ZSTD_inBuffer input = { buffIn, read, 0 };
diff --git a/lib/Makefile b/lib/Makefile
index 4fb8ed9..1117b49 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -97,19 +97,21 @@
 	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
 	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
 	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
-	@install -m 644 common/zbuff.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h
+	@install -m 644 common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h
+	@install -m 644 common/zbuff.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h   # Deprecated streaming functions
 	@install -m 644 dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd static and shared library installed
 
 uninstall:
-	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
-	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
-	$(RM) $(DESTDIR)$(LIBDIR)/pkgconfig/libzstd.pc
-	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
-	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
-	$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h
-	$(RM) $(DESTDIR)$(INCLUDEDIR)/zbuff.h
-	$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h
+	@$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
+	@$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+	@$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+	@$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
+	@$(RM) $(DESTDIR)$(LIBDIR)/pkgconfig/libzstd.pc
+	@$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h
+	@$(RM) $(DESTDIR)$(INCLUDEDIR)/zbuff.h   # Deprecated streaming functions
+	@$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd libraries successfully uninstalled
 
 endif
diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c
index acd9669..18bba0e 100644
--- a/lib/common/entropy_common.c
+++ b/lib/common/entropy_common.c
@@ -168,9 +168,11 @@
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128) {  /* special header */
@@ -198,6 +200,7 @@
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
diff --git a/lib/common/error_private.c b/lib/common/error_private.c
new file mode 100644
index 0000000..a0fa172
--- /dev/null
+++ b/lib/common/error_private.c
@@ -0,0 +1,43 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(parameter_unknown): return "Unknown parameter type";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size incorrect";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+}
diff --git a/lib/common/error_private.h b/lib/common/error_private.h
index d27e15a..1bc2e49 100644
--- a/lib/common/error_private.h
+++ b/lib/common/error_private.h
@@ -21,7 +21,7 @@
 *  Dependencies
 ******************************************/
 #include <stddef.h>        /* size_t */
-#include "error_public.h"  /* enum list */
+#include "zstd_errors.h"  /* enum list */
 
 
 /* ****************************************
@@ -62,35 +62,7 @@
 *  Error Strings
 ******************************************/
 
-ERR_STATIC const char* ERR_getErrorString(ERR_enum code)
-{
-    static const char* notErrorCode = "Unspecified error code";
-    switch( code )
-    {
-    case PREFIX(no_error): return "No error detected";
-    case PREFIX(GENERIC):  return "Error (generic)";
-    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
-    case PREFIX(version_unsupported): return "Version not supported";
-    case PREFIX(parameter_unknown): return "Unknown parameter type";
-    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
-    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
-    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
-    case PREFIX(init_missing): return "Context should be init first";
-    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
-    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
-    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
-    case PREFIX(srcSize_wrong): return "Src size incorrect";
-    case PREFIX(corruption_detected): return "Corrupted block detected";
-    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
-    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
-    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
-    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
-    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
-    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
-    case PREFIX(maxCode):
-    default: return notErrorCode;
-    }
-}
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
 
 ERR_STATIC const char* ERR_getErrorName(size_t code)
 {
diff --git a/lib/common/fse.h b/lib/common/fse.h
index 720d54b..cecb1ae 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -503,6 +503,7 @@
     BIT_flushBits(bitC);
 }
 
+
 /* ======    Decompression    ====== */
 
 typedef struct {
@@ -581,14 +582,19 @@
 *  Increasing memory usage improves compression ratio
 *  Reduced memory usage can improve speed, due to cache effect
 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#define FSE_MAX_MEMORY_USAGE 14
-#define FSE_DEFAULT_MEMORY_USAGE 13
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
 
 /*!FSE_MAX_SYMBOL_VALUE :
 *  Maximum symbol value authorized.
 *  Required for proper stack allocation */
-#define FSE_MAX_SYMBOL_VALUE 255
-
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
 
 /* **************************************************************
 *  template functions type & suffix
diff --git a/lib/common/error_public.h b/lib/common/zstd_errors.h
similarity index 91%
rename from lib/common/error_public.h
rename to lib/common/zstd_errors.h
index d46abd2..50dc4f7 100644
--- a/lib/common/error_public.h
+++ b/lib/common/zstd_errors.h
@@ -7,8 +7,8 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 
-#ifndef ERROR_PUBLIC_H_MODULE
-#define ERROR_PUBLIC_H_MODULE
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
 
 #if defined (__cplusplus)
 extern "C" {
@@ -29,6 +29,7 @@
   ZSTD_error_parameter_unknown,
   ZSTD_error_frameParameter_unsupported,
   ZSTD_error_frameParameter_unsupportedBy32bits,
+  ZSTD_error_frameParameter_windowTooLarge,
   ZSTD_error_compressionParameter_unsupported,
   ZSTD_error_init_missing,
   ZSTD_error_memory_allocation,
@@ -56,4 +57,4 @@
 }
 #endif
 
-#endif /* ERROR_PUBLIC_H_MODULE */
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index f40e00a..d889c84 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -31,6 +31,16 @@
 #  endif /* __STDC_VERSION__ */
 #endif
 
+#ifdef _MSC_VER
+#  define FORCE_NOINLINE static __declspec(noinline)
+#else
+#  ifdef __GNUC__
+#    define FORCE_NOINLINE static __attribute__((__noinline__))
+#  else
+#    define FORCE_NOINLINE static
+#  endif
+#endif
+
 
 /*-*************************************
 *  Dependencies
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index b7d3d77..78784aa 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -155,13 +155,14 @@
     }   }
 
     /* fill val */
-    {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
-        U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
         /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
         {   U16 min = 0;
-            U32 n; for (n=HUF_TABLELOG_MAX; n>0; n--) {
-                valPerRank[n] = min;      /* get starting value within each rank */
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
                 min += nbPerRank[n];
                 min >>= 1;
         }   }
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 94f4b5a..e7f7d99 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -122,6 +122,11 @@
     return &(ctx->seqStore);
 }
 
+static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx)
+{
+    return cctx->params;
+}
+
 
 /** ZSTD_checkParams() :
     ensure param values remain within authorized range.
@@ -137,7 +142,7 @@
       U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
       CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
     CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-    if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported);
+    if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported);
     return 0;
 }
 
@@ -160,7 +165,7 @@
             if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
     }   }
     if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
-    {   U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt);
+    {   U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt) | (cPar.strategy == ZSTD_btopt2);
         U32 const maxChainLog = cPar.windowLog+btPlus;
         if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; }   /* <= ZSTD_CHAINLOG_MAX */
 
@@ -186,7 +191,7 @@
     size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
                           + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
     size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
-                             + ((cParams.strategy == ZSTD_btopt) ? optSpace : 0);
+                             + (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
 
     return sizeof(ZSTD_CCtx) + neededSpace;
 }
@@ -246,7 +251,7 @@
         {   size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
                                   + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
             size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
-                                  + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0);
+                                  + (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
             if (zc->workSpaceSize < neededSpace) {
                 ZSTD_free(zc->workSpace, zc->customMem);
                 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
@@ -276,7 +281,7 @@
         zc->frameContentSize = frameContentSize;
         { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
 
-        if (params.cParams.strategy == ZSTD_btopt) {
+        if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
             zc->seqStore.litFreq = (U32*)ptr;
             zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
             zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
@@ -377,7 +382,7 @@
 *  Block entropic compression
 *********************************************************/
 
-/* See zstd_compression_format.md for detailed format description */
+/* See doc/zstd_compression_format.md for detailed format description */
 
 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
@@ -1458,7 +1463,7 @@
     const U32 dictLimit = zc->dictLimit;
     const BYTE* const dictEnd = dictBase + dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* match = base + matchIndex;
+    const BYTE* match;
     const U32 current = (U32)(ip-base);
     const U32 btLow = btMask >= current ? 0 : current - btMask;
     U32* smallerPtr = bt + 2*(current&btMask);
@@ -2170,7 +2175,17 @@
 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
 #ifdef ZSTD_OPT_H_91842398743
-    ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
+}
+
+static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
 #else
     (void)ctx; (void)src; (void)srcSize;
     return;
@@ -2180,7 +2195,17 @@
 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
 #ifdef ZSTD_OPT_H_91842398743
-    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
+}
+
+static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+#ifdef ZSTD_OPT_H_91842398743
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
 #else
     (void)ctx; (void)src; (void)srcSize;
     return;
@@ -2192,9 +2217,9 @@
 
 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
-    static const ZSTD_blockCompressor blockCompressor[2][7] = {
-        { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
+    static const ZSTD_blockCompressor blockCompressor[2][8] = {
+        { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 },
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict }
     };
 
     return blockCompressor[extDict][(U32)strat];
@@ -2235,7 +2260,7 @@
     BYTE* op = ostart;
     U32 const maxDist = 1 << cctx->params.cParams.windowLog;
 
-    if (cctx->params.fParams.checksumFlag)
+    if (cctx->params.fParams.checksumFlag && srcSize)
         XXH64_update(&cctx->xxhState, src, srcSize);
 
     while (remaining) {
@@ -2247,7 +2272,7 @@
 
         /* preemptive overflow correction */
         if (cctx->lowLimit > (1<<30)) {
-            U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt);
+            U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt) | (cctx->params.cParams.strategy == ZSTD_btopt2);
             U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1;
             U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */);
             U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog);   /* preserve position % chainSize, ensure current-repcode doesn't underflow */
@@ -2436,6 +2461,7 @@
 
     case ZSTD_btlazy2:
     case ZSTD_btopt:
+    case ZSTD_btopt2:
         ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
         break;
 
@@ -2448,14 +2474,28 @@
 }
 
 
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
+   that we may encounter during compression.
+   NOTE: This behavior is not standard and could be improved in the future. */
+static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
+    U32 s;
+    if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
+    }
+    return 0;
+}
+
+
 /* Dictionary format :
-     Magic == ZSTD_DICT_MAGIC (4 bytes)
-     HUF_writeCTable(256)
-     FSE_writeNCount(off)
-     FSE_writeNCount(ml)
-     FSE_writeNCount(ll)
-     RepOffsets
-     Dictionary content
+    Magic == ZSTD_DICT_MAGIC (4 bytes)
+    HUF_writeCTable(256)
+    FSE_writeNCount(off)
+    FSE_writeNCount(ml)
+    FSE_writeNCount(ll)
+    RepOffsets
+    Dictionary content
 */
 /*! ZSTD_loadDictEntropyStats() :
     @return : size read from dictionary
@@ -2464,32 +2504,41 @@
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
 
     {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
         if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
         dictPtr += hufHeaderSize;
     }
 
-    {   short offcodeNCount[MaxOff+1];
-        unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
+    {   unsigned offcodeLog;
         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
         CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        /* Every match length code must have non-zero probability */
+        CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
         CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        /* Every literal length code must have non-zero probability */
+        CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
         CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
         dictPtr += litlengthHeaderSize;
     }
@@ -2500,6 +2549,16 @@
     cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
     dictPtr += 12;
 
+    {   U32 offcodeMax = MaxOff;
+        if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
+            /* Calculate minimum offset code required to represent maxOffset */
+            offcodeMax = ZSTD_highbit32(maxOffset);
+        }
+        /* Every possible supported offset <= dictContentSize + 128 KB must be representable */
+        CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
+    }
+
     cctx->flagStaticTables = 1;
     return dictPtr - (const BYTE*)dict;
 }
@@ -2688,7 +2747,9 @@
             return NULL;
         }
 
-        memcpy(dictContent, dict, dictSize);
+        if (dictSize) {
+            memcpy(dictContent, dict, dictSize);
+        }
         {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
             if (ZSTD_isError(errorCode)) {
                 ZSTD_free(dictContent, customMem);
@@ -2723,6 +2784,10 @@
     }
 }
 
+static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
+    return ZSTD_getParamsFromCCtx(cdict->refContext);
+}
+
 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
 {
     if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
@@ -2759,7 +2824,8 @@
 
 struct ZSTD_CStream_s {
     ZSTD_CCtx* cctx;
-    ZSTD_CDict* cdict;
+    ZSTD_CDict* cdictLocal;
+    const ZSTD_CDict* cdict;
     char*  inBuff;
     size_t inBuffSize;
     size_t inToCompress;
@@ -2773,6 +2839,7 @@
     ZSTD_cStreamStage stage;
     U32    checksum;
     U32    frameEnded;
+    ZSTD_parameters params;
     ZSTD_customMem customMem;
 };   /* typedef'd to ZSTD_CStream within "zstd.h" */
 
@@ -2802,7 +2869,7 @@
     if (zcs==NULL) return 0;   /* support free on NULL */
     {   ZSTD_customMem const cMem = zcs->customMem;
         ZSTD_freeCCtx(zcs->cctx);
-        ZSTD_freeCDict(zcs->cdict);
+        ZSTD_freeCDict(zcs->cdictLocal);
         ZSTD_free(zcs->inBuff, cMem);
         ZSTD_free(zcs->outBuff, cMem);
         ZSTD_free(zcs, cMem);
@@ -2818,7 +2885,10 @@
 
 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
 {
-    CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize));
+    if (zcs->inBuffSize==0) return ERROR(stage_wrong);   /* zcs has not been init at least once */
+
+    if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
+    else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
 
     zcs->inToCompress = 0;
     zcs->inBuffPos = 0;
@@ -2850,15 +2920,28 @@
         if (zcs->outBuff == NULL) return ERROR(memory_allocation);
     }
 
-    ZSTD_freeCDict(zcs->cdict);
-    zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
-    if (zcs->cdict == NULL) return ERROR(memory_allocation);
+    if (dict) {
+        ZSTD_freeCDict(zcs->cdictLocal);
+        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
+        if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+        zcs->cdict = zcs->cdictLocal;
+    } else zcs->cdict = NULL;
 
     zcs->checksum = params.fParams.checksumFlag > 0;
+    zcs->params = params;
 
     return ZSTD_resetCStream(zcs, pledgedSrcSize);
 }
 
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
+    size_t const initError =  ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
+    zcs->cdict = cdict;
+    return initError;
+}
+
 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
 {
     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
@@ -2873,7 +2956,7 @@
 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
 {
     if (zcs==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdict) + zcs->outBuffSize + zcs->inBuffSize;
+    return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
 }
 
 /*======   Compression   ======*/
@@ -3064,9 +3147,9 @@
     { 23, 21, 22,  4,  5, 24, ZSTD_btopt   },  /* level 17 */
     { 23, 23, 22,  6,  5, 32, ZSTD_btopt   },  /* level 18 */
     { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19 */
-    { 25, 25, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20 */
-    { 26, 26, 23,  7,  3,256, ZSTD_btopt   },  /* level 21 */
-    { 27, 27, 25,  9,  3,512, ZSTD_btopt   },  /* level 22 */
+    { 25, 25, 23,  7,  3, 64, ZSTD_btopt2  },  /* level 20 */
+    { 26, 26, 23,  7,  3,256, ZSTD_btopt2  },  /* level 21 */
+    { 27, 27, 25,  9,  3,512, ZSTD_btopt2  },  /* level 22 */
 },
 {   /* for srcSize <= 256 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3090,9 +3173,9 @@
     { 18, 19, 18,  8,  3, 64, ZSTD_btopt   },  /* level 17.*/
     { 18, 19, 18,  9,  3,128, ZSTD_btopt   },  /* level 18.*/
     { 18, 19, 18, 10,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 18, 19, 18, 11,  3,512, ZSTD_btopt   },  /* level 20.*/
-    { 18, 19, 18, 12,  3,512, ZSTD_btopt   },  /* level 21.*/
-    { 18, 19, 18, 13,  3,512, ZSTD_btopt   },  /* level 22.*/
+    { 18, 19, 18, 11,  3,512, ZSTD_btopt2  },  /* level 20.*/
+    { 18, 19, 18, 12,  3,512, ZSTD_btopt2  },  /* level 21.*/
+    { 18, 19, 18, 13,  3,512, ZSTD_btopt2  },  /* level 22.*/
 },
 {   /* for srcSize <= 128 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3116,9 +3199,9 @@
     { 17, 18, 17,  7,  3, 64, ZSTD_btopt   },  /* level 17.*/
     { 17, 18, 17,  7,  3,256, ZSTD_btopt   },  /* level 18.*/
     { 17, 18, 17,  8,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 17, 18, 17,  9,  3,256, ZSTD_btopt   },  /* level 20.*/
-    { 17, 18, 17, 10,  3,256, ZSTD_btopt   },  /* level 21.*/
-    { 17, 18, 17, 11,  3,512, ZSTD_btopt   },  /* level 22.*/
+    { 17, 18, 17,  9,  3,256, ZSTD_btopt2  },  /* level 20.*/
+    { 17, 18, 17, 10,  3,256, ZSTD_btopt2  },  /* level 21.*/
+    { 17, 18, 17, 11,  3,512, ZSTD_btopt2  },  /* level 22.*/
 },
 {   /* for srcSize <= 16 KB */
     /* W,  C,  H,  S,  L,  T, strat */
@@ -3142,9 +3225,9 @@
     { 14, 15, 15,  6,  3,128, ZSTD_btopt   },  /* level 17.*/
     { 14, 15, 15,  6,  3,256, ZSTD_btopt   },  /* level 18.*/
     { 14, 15, 15,  7,  3,256, ZSTD_btopt   },  /* level 19.*/
-    { 14, 15, 15,  8,  3,256, ZSTD_btopt   },  /* level 20.*/
-    { 14, 15, 15,  9,  3,256, ZSTD_btopt   },  /* level 21.*/
-    { 14, 15, 15, 10,  3,256, ZSTD_btopt   },  /* level 22.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btopt2  },  /* level 20.*/
+    { 14, 15, 15,  9,  3,256, ZSTD_btopt2  },  /* level 21.*/
+    { 14, 15, 15, 10,  3,256, ZSTD_btopt2  },  /* level 22.*/
 },
 };
 
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index cb58729..90d511c 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
@@ -16,6 +16,7 @@
 
 
 #define ZSTD_FREQ_DIV   5
+#define ZSTD_MAX_PRICE  (1<<30)
 
 /*-*************************************
 *  Price functions for optimal parser
@@ -120,12 +121,14 @@
 }
 
 
-FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
+FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
 {
     /* offset */
     BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
     U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
 
+    if (!ultra && offCode >= 20) price += (offCode-19)*2;
+
     /* match Length */
     {   const BYTE ML_deltaCode = 36;
         const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
@@ -171,7 +174,7 @@
 
 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
     {                                                 \
-        while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
+        while (last_pos < pos)  { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
         opt[pos].mlen = mlen_;                         \
         opt[pos].off = offset_;                        \
         opt[pos].litlen = litlen_;                     \
@@ -375,7 +378,7 @@
 *********************************/
 FORCE_INLINE
 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
-                                    const void* src, size_t srcSize)
+                                    const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -401,7 +404,6 @@
     ZSTD_rescaleFreqs(seqStorePtr);
     ip += (ip==prefixStart);
     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
-    inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -424,7 +426,7 @@
                     }
                     best_off = i - (ip == anchor);
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -449,7 +451,7 @@
             mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
             best_mlen = matches[u].len;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
                 mlen++;
@@ -496,7 +498,7 @@
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
            }
 
-           best_mlen = minMatch;
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
                     const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
@@ -510,21 +512,20 @@
                        }
 
                        best_off = i - (opt[cur].mlen != 1);
+                       if (mlen > best_mlen) best_mlen = mlen;
 
-                       if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
+                       do {
+                           if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                            }
 
-                        if (mlen > best_mlen) best_mlen = mlen;
-
-                        do {
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -549,12 +550,12 @@
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
@@ -626,7 +627,7 @@
 
 FORCE_INLINE
 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
-                                     const void* src, size_t srcSize)
+                                     const void* src, size_t srcSize, const int ultra)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -657,7 +658,6 @@
     ctx->nextToUpdate3 = ctx->nextToUpdate;
     ZSTD_rescaleFreqs(seqStorePtr);
     ip += (ip==prefixStart);
-    inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -666,7 +666,6 @@
         U32 current = (U32)(ip-base);
         memset(opt, 0, sizeof(ZSTD_optimal_t));
         last_pos = 0;
-        inr = ip;
         opt[0].litlen = (U32)(ip - anchor);
 
         /* check repCode */
@@ -691,7 +690,7 @@
                     best_off = i - (ip==anchor);
                     litlen = opt[0].litlen;
                     do {
-                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                         if (mlen > last_pos || price < opt[mlen].price)
                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                         mlen--;
@@ -721,7 +720,7 @@
             best_mlen = matches[u].len;
             litlen = opt[0].litlen;
             while (mlen <= best_mlen) {
-                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                 if (mlen > last_pos || price < opt[mlen].price)
                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                 mlen++;
@@ -765,8 +764,7 @@
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
             }
 
-            best_mlen = 0;
-
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i = (mlen != 1); i<last_i; i++) {
                     const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
@@ -786,20 +784,20 @@
                         }
 
                         best_off = i - (opt[cur].mlen != 1);
-                        if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
-
-                        best_mlen = mlen;
+                        if (mlen > best_mlen) best_mlen = mlen;
 
                         do {
+                            if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
+                            }
+
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -815,8 +813,6 @@
                 goto _storeSequence;
             }
 
-            best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch;
-
             /* set prices using matches at position = cur */
             for (u = 0; u < match_num; u++) {
                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
@@ -826,12 +822,12 @@
                     if (opt[cur].mlen == 1) {
                         litlen = opt[cur].litlen;
                         if (cur > litlen)
-                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH);
+                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                         else
-                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH);
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                     } else {
                         litlen = 0;
-                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH);
+                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                     }
 
                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 47b5f42..15f9898 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -170,20 +170,22 @@
 static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 {
     ZSTD_decompressBegin(dstDCtx);  /* init */
-    dstDCtx->dictEnd = srcDCtx->dictEnd;
-    dstDCtx->vBase = srcDCtx->vBase;
-    dstDCtx->base = srcDCtx->base;
-    dstDCtx->previousDstEnd = srcDCtx->previousDstEnd;
-    dstDCtx->dictID = srcDCtx->dictID;
-    dstDCtx->litEntropy = srcDCtx->litEntropy;
-    dstDCtx->fseEntropy = srcDCtx->fseEntropy;
-    dstDCtx->LLTptr = srcDCtx->LLTable;
-    dstDCtx->MLTptr = srcDCtx->MLTable;
-    dstDCtx->OFTptr = srcDCtx->OFTable;
-    dstDCtx->HUFptr = srcDCtx->hufTable;
-    dstDCtx->rep[0] = srcDCtx->rep[0];
-    dstDCtx->rep[1] = srcDCtx->rep[1];
-    dstDCtx->rep[2] = srcDCtx->rep[2];
+    if (srcDCtx) {   /* support refDCtx on NULL */
+        dstDCtx->dictEnd = srcDCtx->dictEnd;
+        dstDCtx->vBase = srcDCtx->vBase;
+        dstDCtx->base = srcDCtx->base;
+        dstDCtx->previousDstEnd = srcDCtx->previousDstEnd;
+        dstDCtx->dictID = srcDCtx->dictID;
+        dstDCtx->litEntropy = srcDCtx->litEntropy;
+        dstDCtx->fseEntropy = srcDCtx->fseEntropy;
+        dstDCtx->LLTptr = srcDCtx->LLTable;
+        dstDCtx->MLTptr = srcDCtx->MLTable;
+        dstDCtx->OFTptr = srcDCtx->OFTable;
+        dstDCtx->HUFptr = srcDCtx->hufTable;
+        dstDCtx->rep[0] = srcDCtx->rep[0];
+        dstDCtx->rep[1] = srcDCtx->rep[1];
+        dstDCtx->rep[2] = srcDCtx->rep[2];
+    }
 }
 
 
@@ -191,7 +193,7 @@
 *   Decompression section
 ***************************************************************/
 
-/* See compression format details in : zstd_compression_format.md */
+/* See compression format details in : doc/zstd_compression_format.md */
 
 /** ZSTD_frameHeaderSize() :
 *   srcSize must be >= ZSTD_frameHeaderSize_prefix.
@@ -248,7 +250,7 @@
         if (!singleSegment) {
             BYTE const wlByte = ip[pos++];
             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-            if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported);
+            if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_windowTooLarge);  /* avoids issue with 1 << windowLog */
             windowSize = (1U << windowLog);
             windowSize += (windowSize >> 3) * (wlByte&7);
         }
@@ -270,7 +272,7 @@
             case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
         }
         if (!windowSize) windowSize = (U32)frameContentSize;
-        if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported);
+        if (windowSize > windowSizeMax) return ERROR(frameParameter_windowTooLarge);
         fparamsPtr->frameContentSize = frameContentSize;
         fparamsPtr->windowSize = windowSize;
         fparamsPtr->dictID = dictID;
@@ -301,14 +303,16 @@
 
 
 /** ZSTD_decodeFrameHeader() :
-*   `srcSize` must be the size provided by ZSTD_frameHeaderSize().
+*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
 *   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t srcSize)
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
 {
-    size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize);
+    size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize);
+    if (ZSTD_isError(result)) return result;  /* invalid header */
+    if (result>0) return ERROR(srcSize_wrong);   /* headerSize too small */
     if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
     if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
-    return result;
+    return 0;
 }
 
 
@@ -710,10 +714,13 @@
     {   int nbSeq = *ip++;
         if (!nbSeq) { *nbSeqPtr=0; return 1; }
         if (nbSeq > 0x7F) {
-            if (nbSeq == 0xFF)
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
                 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
-            else
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
                 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
         }
         *nbSeqPtr = nbSeq;
     }
@@ -807,7 +814,8 @@
         if (ofCode <= 1) {
             offset += (llCode==0);
             if (offset) {
-                size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
                 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
                 seqState->prevOffset[1] = seqState->prevOffset[0];
                 seqState->prevOffset[0] = offset = temp;
@@ -839,6 +847,53 @@
 }
 
 
+FORCE_NOINLINE
+size_t ZSTD_execSequenceLast7(BYTE* op,
+                              BYTE* const oend, seq_t sequence,
+                              const BYTE** litPtr, const BYTE* const litLimit_w,
+                              const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    /* check */
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (iLitEnd > litLimit_w) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd <= oend_w) return ERROR(GENERIC);   /* Precondition */
+
+    /* copy literals */
+    if (op < oend_w) {
+        ZSTD_wildcopy(op, *litPtr, oend_w - op);
+        *litPtr += oend_w - op;
+        op = oend_w;
+    }
+    while (op < oLitEnd) *op++ = *(*litPtr)++;
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - base)) {
+        /* offset beyond prefix */
+        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
+        match = dictEnd - (base-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = base;
+    }   }
+    while (op < oMatchEnd) *op++ = *match++;
+    return sequenceLength;
+}
+
+
 FORCE_INLINE
 size_t ZSTD_execSequence(BYTE* op,
                                 BYTE* const oend, seq_t sequence,
@@ -853,8 +908,9 @@
     const BYTE* match = oLitEnd - sequence.offset;
 
     /* check */
-    if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
     if (iLitEnd > litLimit_w) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
+    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit_w, base, vBase, dictEnd);
 
     /* copy Literals */
     ZSTD_copy8(op, *litPtr);
@@ -878,7 +934,13 @@
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = base;
+            if (op > oend_w) {
+              U32 i;
+              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
+              return sequenceLength;
+            }
     }   }
+    /* Requirement: op <= oend_w */
 
     /* match within prefix */
     if (sequence.offset < 8) {
@@ -1310,25 +1372,28 @@
     }
 
     {   short offcodeNCount[MaxOff+1];
-        U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
+        U32 offcodeMaxValue=MaxOff, offcodeLog;
         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
         CHECK_E(FSE_buildDTable(dctx->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
         CHECK_E(FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
         CHECK_E(FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
         dictPtr += litlengthHeaderSize;
     }
@@ -1397,7 +1462,9 @@
             return NULL;
         }
 
-        memcpy(dictContent, dict, dictSize);
+        if (dictSize) {
+            memcpy(dictContent, dict, dictSize);
+        }
         {   size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
             if (ZSTD_isError(errorCode)) {
                 ZSTD_free(dictContent, customMem);
@@ -1467,7 +1534,8 @@
 /* *** Resource management *** */
 struct ZSTD_DStream_s {
     ZSTD_DCtx* dctx;
-    ZSTD_DDict* ddict;
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;
     ZSTD_frameParams fParams;
     ZSTD_dStreamStage stage;
     char*  inBuff;
@@ -1517,7 +1585,7 @@
     if (zds==NULL) return 0;   /* support free on null */
     {   ZSTD_customMem const cMem = zds->customMem;
         ZSTD_freeDCtx(zds->dctx);
-        ZSTD_freeDDict(zds->ddict);
+        ZSTD_freeDDict(zds->ddictLocal);
         ZSTD_free(zds->inBuff, cMem);
         ZSTD_free(zds->outBuff, cMem);
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
@@ -1539,9 +1607,12 @@
 {
     zds->stage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-    ZSTD_freeDDict(zds->ddict);
-    zds->ddict = ZSTD_createDDict(dict, dictSize);
-    if (zds->ddict == NULL) return ERROR(memory_allocation);
+    ZSTD_freeDDict(zds->ddictLocal);
+    if (dict) {
+        zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
+        if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
+    } else zds->ddictLocal = NULL;
+    zds->ddict = zds->ddictLocal;
     zds->legacyVersion = 0;
     zds->hostageByte = 0;
     return ZSTD_frameHeaderSize_prefix;
@@ -1552,9 +1623,15 @@
     return ZSTD_initDStream_usingDict(zds, NULL, 0);
 }
 
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)  /**< note : ddict will just be referenced, and must outlive decompression session */
+{
+    size_t const initResult = ZSTD_initDStream(zds);
+    zds->ddict = ddict;
+    return initResult;
+}
+
 size_t ZSTD_resetDStream(ZSTD_DStream* zds)
 {
-    if (zds->ddict == NULL) return ERROR(stage_wrong);  /* must be init at least once */
     zds->stage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
     zds->legacyVersion = 0;
@@ -1568,7 +1645,7 @@
     switch(paramType)
     {
         default : return ERROR(parameter_unknown);
-        case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue; break;
+        case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
     }
     return 0;
 }
@@ -1577,7 +1654,7 @@
 size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
 {
     if (zds==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddict) + zds->inBuffSize + zds->outBuffSize;
+    return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddictLocal) + zds->inBuffSize + zds->outBuffSize;
 }
 
 
@@ -1618,15 +1695,17 @@
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
                 {   U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
                     if (legacyVersion) {
+                        const void* const dict = zds->ddict ? zds->ddict->dict : NULL;
+                        size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
                         CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
-                                                       zds->ddict->dict, zds->ddict->dictSize));
+                                                       dict, dictSize));
                         zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
                         return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
                     } else {
                         return hSize; /* error */
                 }   }
 #else
-                    return hSize;
+                return hSize;
 #endif
                 if (hSize != 0) {   /* need more input */
                     size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
@@ -1641,7 +1720,9 @@
             }   }
 
             /* Consume header */
-            ZSTD_refDCtx(zds->dctx, zds->ddict->refContext);
+            {   const ZSTD_DCtx* refContext = zds->ddict ? zds->ddict->refContext : NULL;
+                ZSTD_refDCtx(zds->dctx, refContext);
+            }
             {   size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);  /* == ZSTD_frameHeaderSize_prefix */
                 CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
                 {   size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
@@ -1649,7 +1730,7 @@
             }   }
 
             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-            if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_unsupported);
+            if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
 
             /* Adapt buffer sizes to frame header instructions */
             {   size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 47a82af..b3f20b1 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -371,21 +371,22 @@
 static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
 {
     const U32 tableSize = table->pos;
-    const U32 max = elt.pos + (elt.length-1);
+    const U32 eltEnd = elt.pos + elt.length;
 
     /* tail overlap */
     U32 u; for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos > elt.pos) && (table[u].pos < max)) {  /* overlap */
+        if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) {  /* overlap, existing > new */
             /* append */
             U32 addedLength = table[u].pos - elt.pos;
             table[u].length += addedLength;
             table[u].pos = elt.pos;
             table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
-            table[u].savings += elt.length / 8;    /* rough approx */
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
             elt = table[u];
+            /* sort : improve rank */
             while ((u>1) && (table[u-1].savings < elt.savings))
-                table[u] = table[u-1], u--;
+            table[u] = table[u-1], u--;
             table[u] = elt;
             return u;
     }   }
@@ -393,14 +394,15 @@
     /* front overlap */
     for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) {  /* overlap */
+        if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) {  /* overlap, existing < new */
             /* append */
-            int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
-            table[u].savings += elt.length / 8;    /* rough approx */
-            if (addedLength > 0) {   /* otherwise, already included */
+            int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
+            if (addedLength > 0) {   /* otherwise, elt fully included into existing */
                 table[u].length += addedLength;
                 table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
             }
+            /* sort : improve rank */
             elt = table[u];
             while ((u>1) && (table[u-1].savings < elt.savings))
                 table[u] = table[u-1], u--;
@@ -811,7 +813,7 @@
     MEM_writeLE32(dstPtr+4, repStartValue[1]);
     MEM_writeLE32(dstPtr+8, repStartValue[2]);
 #endif
-    dstPtr += 12;
+    //dstPtr += 12;
     eSize += 12;
 
 _cleanup:
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index fe9c5cc..5c36c21 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -958,13 +958,16 @@
     U32 weightTotal;
     U32 maxBits;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
     U32 n;
     U32 nextRankStart;
     void* ptr = DTable+1;
     HUF_DElt* const dt = (HUF_DElt*)ptr;
 
+    if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
+    iSize = ip[0];
+
     FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16));   /* if compilation fails here, assertion is false */
     //memset(huffWeight, 0, sizeof(huffWeight));   /* should not be necessary, but some analyzer complain ... */
     if (iSize >= 128)  /* special header */
@@ -1005,6 +1008,7 @@
         rankVal[huffWeight[n]]++;
         weightTotal += (1 << huffWeight[n]) >> 1;
     }
+    if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     maxBits = FSE_highbit32(weightTotal) + 1;
@@ -1533,6 +1537,7 @@
         {
             size_t rleSize = litbp.origSize;
             if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
+            if (!srcSize) return ERROR(srcSize_wrong);
             memset(oend - rleSize, *ip, rleSize);
             *litStart = oend - rleSize;
             *litSize = rleSize;
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index de1592e..24498fe 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -1607,10 +1607,12 @@
     U32 weightTotal;
     U32 tableLog;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
     U32 n;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  /* special header */
@@ -1652,6 +1654,7 @@
         rankStats[huffWeight[n]]++;
         weightTotal += (1 << huffWeight[n]) >> 1;
     }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     tableLog = BIT_highbit32(weightTotal) + 1;
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index caad331..a3bd1da 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -1604,10 +1604,12 @@
     U32 weightTotal;
     U32 tableLog;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
     U32 n;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  /* special header */
@@ -1649,6 +1651,7 @@
         rankStats[huffWeight[n]]++;
         weightTotal += (1 << huffWeight[n]) >> 1;
     }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     tableLog = BIT_highbit32(weightTotal) + 1;
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index c9dcb94..0a740ba 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -1896,10 +1896,12 @@
     U32 weightTotal;
     U32 tableLog;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
     U32 n;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  /* special header */
@@ -1941,6 +1943,7 @@
         rankStats[huffWeight[n]]++;
         weightTotal += (1 << huffWeight[n]) >> 1;
     }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     tableLog = BIT_highbit32(weightTotal) + 1;
@@ -3107,8 +3110,13 @@
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = base;
+            if (op > oend_8) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
         }
     }
+    /* Requirement: op <= oend_8 */
 
     /* match within prefix */
     if (sequence.offset < 8)
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 5027e2b..201bf3c 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -1873,10 +1873,12 @@
     U32 weightTotal;
     U32 tableLog;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
     U32 n;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  { /* special header */
@@ -1910,6 +1912,7 @@
         rankStats[huffWeight[n]]++;
         weightTotal += (1 << huffWeight[n]) >> 1;
     }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     tableLog = BITv05_highbit32(weightTotal) + 1;
@@ -2032,13 +2035,14 @@
 {
     BYTE* op = (BYTE*)dst;
     BYTE* const oend = op + dstSize;
-    size_t errorCode;
     const U32 dtLog = DTable[0];
     const void* dtPtr = DTable;
     const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1;
     BITv05_DStream_t bitD;
-    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
-    if (HUFv05_isError(errorCode)) return errorCode;
+
+    if (dstSize <= cSrcSize) return ERROR(dstSize_tooSmall);
+    { size_t const errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUFv05_isError(errorCode)) return errorCode; }
 
     HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog);
 
@@ -2942,6 +2946,7 @@
         {
             size_t litSize, litCSize, singleStream=0;
             U32 lhSize = ((istart[0]) >> 4) & 3;
+            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
             switch(lhSize)
             {
             case 0: case 1: default:   /* note : default is impossible, since lhSize into [0..3] */
@@ -2965,6 +2970,7 @@
                 break;
             }
             if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
+            if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
 
             if (HUFv05_isError(singleStream ?
                             HUFv05_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
@@ -2990,6 +2996,7 @@
             lhSize=3;
             litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
             litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + litSize > srcSize) return ERROR(corruption_detected);
 
             errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
             if (HUFv05_isError(errorCode)) return ERROR(corruption_detected);
@@ -3046,6 +3053,7 @@
                 break;
             case 3:
                 litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
+                if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
                 break;
             }
             if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
@@ -3063,7 +3071,7 @@
 
 size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
                          FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
-                         const void* src, size_t srcSize)
+                         const void* src, size_t srcSize, U32 flagStaticTable)
 {
     const BYTE* const istart = (const BYTE* const)src;
     const BYTE* ip = istart;
@@ -3079,17 +3087,22 @@
     /* SeqHead */
     *nbSeq = *ip++;
     if (*nbSeq==0) return 1;
-    if (*nbSeq >= 128)
+    if (*nbSeq >= 128) {
+        if (ip >= iend) return ERROR(srcSize_wrong);
         *nbSeq = ((nbSeq[0]-128)<<8) + *ip++;
+    }
 
+    if (ip >= iend) return ERROR(srcSize_wrong);
     LLtype  = *ip >> 6;
     Offtype = (*ip >> 4) & 3;
     MLtype  = (*ip >> 2) & 3;
     if (*ip & 2) {
+        if (ip+3 > iend) return ERROR(srcSize_wrong);
         dumpsLength  = ip[2];
         dumpsLength += ip[1] << 8;
         ip += 3;
     } else {
+        if (ip+2 > iend) return ERROR(srcSize_wrong);
         dumpsLength  = ip[1];
         dumpsLength += (ip[0] & 1) << 8;
         ip += 2;
@@ -3118,6 +3131,7 @@
             FSEv05_buildDTable_raw(DTableLL, LLbits);
             break;
         case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
             break;
         case FSEv05_ENCODING_DYNAMIC :
         default :   /* impossible */
@@ -3141,6 +3155,7 @@
             FSEv05_buildDTable_raw(DTableOffb, Offbits);
             break;
         case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
             break;
         case FSEv05_ENCODING_DYNAMIC :
         default :   /* impossible */
@@ -3164,6 +3179,7 @@
             FSEv05_buildDTable_raw(DTableML, MLbits);
             break;
         case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
             break;
         case FSEv05_ENCODING_DYNAMIC :
         default :   /* impossible */
@@ -3312,7 +3328,12 @@
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = base;
+            if (op > oend_8) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
     }   }
+    /* Requirement: op <= oend_8 */
 
     /* match within prefix */
     if (sequence.offset < 8) {
@@ -3371,7 +3392,7 @@
     /* Build Decoding Tables */
     errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
                                       DTableLL, DTableML, DTableOffb,
-                                      ip, seqSize);
+                                      ip, seqSize, dctx->flagStaticTables);
     if (ZSTDv05_isError(errorCode)) return errorCode;
     ip += errorCode;
 
@@ -3660,11 +3681,11 @@
 {
     size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
     short offcodeNCount[MaxOff+1];
-    U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSEv05Log;
+    U32 offcodeMaxValue=MaxOff, offcodeLog;
     short matchlengthNCount[MaxML+1];
-    unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSEv05Log;
+    unsigned matchlengthMaxValue = MaxML, matchlengthLog;
     short litlengthNCount[MaxLL+1];
-    unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSEv05Log;
+    unsigned litlengthMaxValue = MaxLL, litlengthLog;
 
     hSize = HUFv05_readDTableX4(dctx->hufTableX4, dict, dictSize);
     if (HUFv05_isError(hSize)) return ERROR(dictionary_corrupted);
@@ -3673,6 +3694,7 @@
 
     offcodeHeaderSize = FSEv05_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
     if (FSEv05_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+    if (offcodeLog > OffFSEv05Log) return ERROR(dictionary_corrupted);
     errorCode = FSEv05_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
     if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
     dict = (const char*)dict + offcodeHeaderSize;
@@ -3680,12 +3702,14 @@
 
     matchlengthHeaderSize = FSEv05_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
     if (FSEv05_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+    if (matchlengthLog > MLFSEv05Log) return ERROR(dictionary_corrupted);
     errorCode = FSEv05_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
     if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
     dict = (const char*)dict + matchlengthHeaderSize;
     dictSize -= matchlengthHeaderSize;
 
     litlengthHeaderSize = FSEv05_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+    if (litlengthLog > LLFSEv05Log) return ERROR(dictionary_corrupted);
     if (FSEv05_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
     errorCode = FSEv05_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
     if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted);
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index d9e89f8..b6fde3a 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -1932,9 +1932,11 @@
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  { /* special header */
@@ -1969,6 +1971,7 @@
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     {   U32 const tableLog = BITv06_highbit32(weightTotal) + 1;
@@ -3183,6 +3186,7 @@
             lhSize=3;
             litSize  = ((istart[0] & 15) << 6) + (istart[1] >> 2);
             litCSize = ((istart[1] &  3) << 8) + istart[2];
+            if (litCSize + litSize > srcSize) return ERROR(corruption_detected);
 
             {   size_t const errorCode = HUFv06_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
                 if (HUFv06_isError(errorCode)) return ERROR(corruption_detected);
@@ -3302,10 +3306,13 @@
     {   int nbSeq = *ip++;
         if (!nbSeq) { *nbSeqPtr=0; return 1; }
         if (nbSeq > 0x7F) {
-            if (nbSeq == 0xFF)
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
                 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
-            else
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
                 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
         }
         *nbSeqPtr = nbSeq;
     }
@@ -3466,7 +3473,12 @@
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = base;
+            if (op > oend_8) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
     }   }
+    /* Requirement: op <= oend_8 */
 
     /* match within prefix */
     if (sequence.offset < 8) {
@@ -3822,9 +3834,10 @@
     dictSize -= hSize;
 
     {   short offcodeNCount[MaxOff+1];
-        U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
+        U32 offcodeMaxValue=MaxOff, offcodeLog;
         offcodeHeaderSize = FSEv06_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
         if (FSEv06_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dict = (const char*)dict + offcodeHeaderSize;
@@ -3832,9 +3845,10 @@
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         matchlengthHeaderSize = FSEv06_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
         if (FSEv06_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dict = (const char*)dict + matchlengthHeaderSize;
@@ -3842,9 +3856,10 @@
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         litlengthHeaderSize = FSEv06_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
         if (FSEv06_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv06_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); }
     }
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index f4c8073..c7693f2 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -1382,9 +1382,11 @@
 {
     U32 weightTotal;
     const BYTE* ip = (const BYTE*) src;
-    size_t iSize = ip[0];
+    size_t iSize;
     size_t oSize;
 
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
     //memset(huffWeight, 0, hwSize);   /* is not necessary, even though some analyzer complain ... */
 
     if (iSize >= 128)  { /* special header */
@@ -1419,6 +1421,7 @@
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
 
     /* get last non-null symbol weight (implied, total must be 2^n) */
     {   U32 const tableLog = BITv07_highbit32(weightTotal) + 1;
@@ -3529,10 +3532,13 @@
     {   int nbSeq = *ip++;
         if (!nbSeq) { *nbSeqPtr=0; return 1; }
         if (nbSeq > 0x7F) {
-            if (nbSeq == 0xFF)
+            if (nbSeq == 0xFF) {
+                if (ip+2 > iend) return ERROR(srcSize_wrong);
                 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
-            else
+            } else {
+                if (ip >= iend) return ERROR(srcSize_wrong);
                 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+            }
         }
         *nbSeqPtr = nbSeq;
     }
@@ -3690,7 +3696,12 @@
             op = oLitEnd + length1;
             sequence.matchLength -= length1;
             match = base;
+            if (op > oend_w) {
+              while (op < oMatchEnd) *op++ = *match++;
+              return sequenceLength;
+            }
     }   }
+    /* Requirement: op <= oend_w */
 
     /* match within prefix */
     if (sequence.offset < 8) {
@@ -4097,27 +4108,30 @@
     }
 
     {   short offcodeNCount[MaxOff+1];
-        U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
+        U32 offcodeMaxValue=MaxOff, offcodeLog;
         size_t const offcodeHeaderSize = FSEv07_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSEv07_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv07_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
           if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
         size_t const matchlengthHeaderSize = FSEv07_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSEv07_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv07_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
           if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
         size_t const litlengthHeaderSize = FSEv07_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSEv07_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSEv07_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dictPtr += litlengthHeaderSize;
diff --git a/lib/zstd.h b/lib/zstd.h
index dd3f5df..ea5caf1 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
@@ -14,12 +14,12 @@
 extern "C" {
 #endif
 
-/*======   Dependency   ======*/
+/* ======   Dependency   ======*/
 #include <stddef.h>   /* size_t */
 
 
-/*======  Export for Windows  ======*/
-/*!
+/* ======  Export for Windows  ======*/
+/*
 *  ZSTD_DLL_EXPORT :
 *  Enable exporting of functions when building a Windows DLL
 */
@@ -30,10 +30,32 @@
 #endif
 
 
-/*=======   Version   =======*/
+/*******************************************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios
+  at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and
+  decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22.
+  Levels >= 20, labelled `--ultra`, should be used with caution, as they require more memory.
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit memory management)
+    - unbounded multiple steps (described as Streaming compression)
+  The compression ratio achievable on small data can be highly improved using compression with a dictionary in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Fast dictionary API)
+
+  Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
+  These APIs shall never be used with a dynamic library.
+  They are not "stable", their definition may change in the future. Only static linking is allowed.
+*********************************************************************************************************/
+
+/*------   Version   ------*/
+ZSTDLIB_API unsigned ZSTD_versionNumber (void);  /**< returns version number of ZSTD */
+
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    1
-#define ZSTD_VERSION_RELEASE  0
+#define ZSTD_VERSION_RELEASE  1
 
 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
@@ -41,10 +63,9 @@
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
 
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
-ZSTDLIB_API unsigned ZSTD_versionNumber (void);
 
 
-/* *************************************
+/***************************************
 *  Simple API
 ***************************************/
 /*! ZSTD_compress() :
@@ -91,29 +112,33 @@
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
 
 
-/*-*************************************
+/***************************************
 *  Explicit memory management
 ***************************************/
-/** Compression context */
+/*= Compression context
+*   When compressing many messages / blocks,
+*   it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+*   This will make the situation much easier for the system's memory.
+*   Use one context per thread for parallel execution in multi-threaded environments. */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
 
-/** ZSTD_compressCCtx() :
+/*! ZSTD_compressCCtx() :
     Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
 
-/** Decompression context */
+/*= Decompression context */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 
-/** ZSTD_decompressDCtx() :
+/*! ZSTD_decompressDCtx() :
 *   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
 
-/*-************************
+/**************************
 *  Simple dictionary API
 ***************************/
 /*! ZSTD_compress_usingDict() :
@@ -135,14 +160,20 @@
                                        const void* dict,size_t dictSize);
 
 
-/*-**************************
-*  Fast Dictionary API
+/****************************
+*  Fast dictionary API
 ****************************/
-/*! ZSTD_createCDict() :
-*   Create a digested dictionary, ready to start compression operation without startup delay.
-*   `dict` can be released after ZSTD_CDict creation */
 typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+*   When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+*   ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+*   ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
+*   `dict` can be released after ZSTD_CDict creation */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+*   Function frees memory allocated by ZSTD_createCDict() */
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
 
 /*! ZSTD_compress_usingCDict() :
@@ -154,11 +185,16 @@
                                       const void* src, size_t srcSize,
                                       const ZSTD_CDict* cdict);
 
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
 /*! ZSTD_createDDict() :
 *   Create a digested dictionary, ready to start decompression operation without startup delay.
 *   `dict` can be released after creation */
-typedef struct ZSTD_DDict_s ZSTD_DDict;
 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+*   Function frees memory allocated with ZSTD_createDDict() */
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
 
 /*! ZSTD_decompress_usingDDict() :
@@ -170,7 +206,7 @@
                                         const ZSTD_DDict* ddict);
 
 
-/*-**************************
+/****************************
 *  Streaming
 ****************************/
 
@@ -187,16 +223,18 @@
 } ZSTD_outBuffer;
 
 
-/*======   streaming compression   ======*/
 
 /*-***********************************************************************
-*  Streaming compression - howto
+*  Streaming compression - HowTo
 *
 *  A ZSTD_CStream object is required to track streaming operation.
 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
 *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively,
+*  since it will play nicer with system's memory, by re-using already allocated memory.
+*  Use one separate ZSTD_CStream per thread for parallel execution.
 *
-*  Start by initializing ZSTD_CStream.
+*  Start a new compression by initializing ZSTD_CStream.
 *  Use ZSTD_initCStream() to start a new compression operation.
 *  Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary.
 *
@@ -225,23 +263,22 @@
 *
 * *******************************************************************/
 
+/*=====   Streaming compression functions   ======*/
 typedef struct ZSTD_CStream_s ZSTD_CStream;
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
-
-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
-
 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
 
-/*======   decompression   ======*/
+
 
 /*-***************************************************************************
-*  Streaming decompression howto
+*  Streaming decompression - HowTo
 *
 *  A ZSTD_DStream object is required to track streaming operations.
 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
@@ -262,28 +299,29 @@
 *            The return value is a suggested next input size (just an hint, to help latency).
 * *******************************************************************************/
 
+/*=====   Streaming decompression functions   =====*/
 typedef struct ZSTD_DStream_s ZSTD_DStream;
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 
 ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
 ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
 
-ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
-ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-
 
 
 #ifdef ZSTD_STATIC_LINKING_ONLY
 
-/* ====================================================================================
+/****************************************************************************************
+ * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
  * The definitions in this section are considered experimental.
  * They should never be used with a dynamic library, as they may change in the future.
  * They are provided for advanced usages.
  * Use them only in association with static linking.
- * ==================================================================================== */
+ * ***************************************************************************************/
 
-/*--- Constants ---*/
+/* --- Constants ---*/
 #define ZSTD_MAGICNUMBER            0xFD2FB528   /* v0.8 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
 
@@ -310,8 +348,8 @@
 static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable frame length */
 
 
-/*--- Types ---*/
-typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   /* from faster to stronger */
+/*--- Advanced types ---*/
+typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btopt2 } ZSTD_strategy;   /* from faster to stronger */
 
 typedef struct {
     unsigned windowLog;      /**< largest match distance : larger == more compression, more memory needed during decompression */
@@ -334,13 +372,13 @@
     ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 
-/* custom memory allocation functions */
+/*= Custom memory allocation functions */
 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
 
 
-/*-*************************************
+/***************************************
 *  Advanced compression functions
 ***************************************/
 /*! ZSTD_estimateCCtxSize() :
@@ -393,7 +431,7 @@
                                            ZSTD_parameters params);
 
 
-/*--- Advanced Decompression functions ---*/
+/*--- Advanced decompression functions ---*/
 
 /*! ZSTD_estimateDCtxSize() :
  *  Gives the potential amount of memory allocated to create a ZSTD_DCtx */
@@ -412,47 +450,41 @@
 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 
 
-/* ******************************************************************
-*  Advanced Streaming functions
+/********************************************************************
+*  Advanced streaming functions
 ********************************************************************/
 
-/*======   compression   ======*/
-
+/*=====   Advanced Streaming compression functions  =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
                                              ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  /**< re-use compression parameters from previous init; saves dictionary loading */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  /**< note : cdict will just be referenced, and must outlive compression session */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before */
 ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
 
 
-/*======   decompression   ======*/
-
+/*=====   Advanced Streaming decompression functions  =====*/
 typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
-
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  /**< note : ddict will just be referenced, and must outlive decompression session */
 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompression parameters from previous init; saves dictionary loading */
 ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 
 
-/* ******************************************************************
+/*********************************************************************
 *  Buffer-less and synchronous inner streaming functions
-********************************************************************/
-/* This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
 *  But it's also a complex one, with many restrictions (documented below).
-*  Prefer using normal streaming API for an easier experience */
+*  Prefer using normal streaming API for an easier experience
+********************************************************************* */
 
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
-ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+/**
+  Buffer-less streaming compression (synchronous mode)
 
-ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-/*
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -481,26 +513,17 @@
   You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
 */
 
-typedef struct {
-    unsigned long long frameContentSize;
-    unsigned windowSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameParams;
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
 
-ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 
-ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-
-/*
+/*-
   Buffer-less streaming decompression (synchronous mode)
 
   A ZSTD_DCtx object is required to track streaming operations.
@@ -557,11 +580,27 @@
   It also returns Frame Size as fparamsPtr->frameContentSize.
 */
 
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameParams;
 
-/* **************************************
-*  Block functions
-****************************************/
-/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+/*=====   Buffer-less streaming decompression functions  =====*/
+ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+/**
+    Block functions
+
+    Block functions produce and decode raw zstd blocks, without frame metadata.
     Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
 
@@ -585,6 +624,7 @@
 */
 
 #define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)   /* define, for static allocation */
+/*=====   Raw zstd block functions  =====*/
 ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
diff --git a/programs/Makefile b/programs/Makefile
index ed26f07..f5e625f 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -57,8 +57,8 @@
 ifneq (,$(filter Windows%,$(OS)))
 EXT =.exe
 VOID = nul
-RES64_FILE = ..\build\VS2010\zstd\generate_res\zstd64.res
-RES32_FILE = ..\build\VS2010\zstd\generate_res\zstd32.res
+RES64_FILE = windres\zstd64.res
+RES32_FILE = windres\zstd32.res
 ifneq (,$(filter x86_64%,$(shell $(CC) -dumpmachine)))
     RES_FILE = $(RES64_FILE)
 else
@@ -70,25 +70,32 @@
 endif
 
 
-.PHONY: default all clean clean_decomp_o install uninstall
+.PHONY: default all clean clean_decomp_o install uninstall generate_res
 
 default: zstd
 
 all: zstd
 
-
+$(ZSTDDECOMP_O): CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
+$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
 $(ZSTDDECOMP_O): $(ZSTDDIR)/decompress/zstd_decompress.c
-	$(CC)    $(ALIGN_LOOP) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -c -o $@
+
+zstd  : $(ZSTDDECOMP_O) $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZDICT_FILES) \
+        zstdcli.c fileio.c bench.c datagen.c dibio.c
+ifneq (,$(filter Windows%,$(OS)))
+	windres\generate_res.bat
+endif
+	$(CC)      $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(RES_FILE) -o $@$(EXT)
+
 
 $(ZSTDDECOMP32_O): $(ZSTDDIR)/decompress/zstd_decompress.c
 	$(CC)  -m32 $(ALIGN_LOOP) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -c -o $@
 
-zstd  : $(ZSTDDECOMP_O) $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZDICT_FILES) \
-        zstdcli.c fileio.c bench.c datagen.c dibio.c
-	$(CC)      $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(RES_FILE) -o $@$(EXT)
-
 zstd32 : $(ZSTDDECOMP32_O) $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZDICT_FILES) \
         zstdcli.c fileio.c bench.c datagen.c dibio.c
+ifneq (,$(filter Windows%,$(OS)))
+	windres\generate_res.bat
+endif
 	$(CC)  -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(RES32_FILE) -o $@$(EXT)
 
 
@@ -122,6 +129,8 @@
 zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c fileio.c
 	$(CC)      $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
 
+generate_res:
+	windres\generate_res.bat
 
 clean:
 	$(MAKE) -C ../lib clean
@@ -153,11 +162,11 @@
 	@echo zstd installation completed
 
 uninstall:
-	$(RM) $(DESTDIR)$(BINDIR)/zstdcat
-	$(RM) $(DESTDIR)$(BINDIR)/unzstd
-	$(RM) $(DESTDIR)$(BINDIR)/zstd$(EXT)
-	$(RM) $(DESTDIR)$(MANDIR)/zstdcat.1
-	$(RM) $(DESTDIR)$(MANDIR)/unzstd.1
-	$(RM) $(DESTDIR)$(MANDIR)/zstd.1
+	@$(RM) $(DESTDIR)$(BINDIR)/zstdcat
+	@$(RM) $(DESTDIR)$(BINDIR)/unzstd
+	@$(RM) $(DESTDIR)$(BINDIR)/zstd$(EXT)
+	@$(RM) $(DESTDIR)$(MANDIR)/zstdcat.1
+	@$(RM) $(DESTDIR)$(MANDIR)/unzstd.1
+	@$(RM) $(DESTDIR)$(MANDIR)/zstd.1
 	@echo zstd programs successfully uninstalled
 endif
diff --git a/programs/fileio.c b/programs/fileio.c
index 56f22fe..c4c308e 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -8,13 +8,6 @@
  */
 
 
-/*
-  Note : this file is part of zstd command line, which is not library.
-  The license of ZSTD library is BSD.
-  The license of this file is GPLv2.
-*/
-
-
 /* *************************************
  *  Tuning options
  ***************************************/
@@ -127,6 +120,9 @@
 void FIO_setChecksumFlag(unsigned checksumFlag) { g_checksumFlag = checksumFlag; }
 static U32 g_removeSrcFile = 0;
 void FIO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }
+static U32 g_memLimit = 0;
+void FIO_setMemLimit(unsigned memLimit) { g_memLimit = memLimit; }
+
 
 
 /*-*************************************
@@ -487,6 +483,7 @@
     /* Allocation */
     ress.dctx = ZSTD_createDStream();
     if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZSTD_DStream");
+    ZSTD_setDStreamParameter(ress.dctx, ZSTDdsp_maxWindowSize, g_memLimit);
     ress.srcBufferSize = ZSTD_DStreamInSize();
     ress.srcBuffer = malloc(ress.srcBufferSize);
     ress.dstBufferSize = ZSTD_DStreamOutSize();
diff --git a/programs/fileio.h b/programs/fileio.h
index 1e89aec..60a7e0d 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -15,7 +15,6 @@
 extern "C" {
 #endif
 
-
 /* *************************************
 *  Special i/o constants
 **************************************/
@@ -37,6 +36,7 @@
 void FIO_setDictIDFlag(unsigned dictIDFlag);
 void FIO_setChecksumFlag(unsigned checksumFlag);
 void FIO_setRemoveSrcFile(unsigned flag);
+void FIO_setMemLimit(unsigned memLimit);
 
 
 /*-*************************************
diff --git a/programs/windres/generate_res.bat b/programs/windres/generate_res.bat
new file mode 100644
index 0000000..76e47fa
--- /dev/null
+++ b/programs/windres/generate_res.bat
@@ -0,0 +1,11 @@
+@echo off
+REM http://stackoverflow.com/questions/708238/how-do-i-add-an-icon-to-a-mingw-gcc-compiled-executable
+
+where /q windres.exe
+IF ERRORLEVEL 1 (
+    ECHO The windres.exe is missing. Ensure it is installed and placed in your PATH.
+    EXIT /B
+) ELSE (
+    windres.exe -I ..\lib -I windres -i windres\zstd.rc -O coff -F pe-x86-64 -o windres\zstd64.res
+    windres.exe -I ..\lib -I windres -i windres\zstd.rc -O coff -F pe-i386 -o windres\zstd32.res
+)
diff --git a/programs/windres/verrsrc.h b/programs/windres/verrsrc.h
new file mode 100644
index 0000000..e282add
--- /dev/null
+++ b/programs/windres/verrsrc.h
@@ -0,0 +1,8 @@
+/* minimal set of defines required to generate zstd.res from zstd.rc */
+
+#define VS_VERSION_INFO         1
+
+#define VS_FFI_FILEFLAGSMASK    0x0000003FL
+#define VOS_NT_WINDOWS32        0x00040004L
+#define VFT_DLL                 0x00000002L
+#define VFT2_UNKNOWN            0x00000000L
diff --git a/programs/windres/zstd.rc b/programs/windres/zstd.rc
new file mode 100644
index 0000000..f5e4047
--- /dev/null
+++ b/programs/windres/zstd.rc
@@ -0,0 +1,51 @@
+// Microsoft Visual C++ generated resource script.
+//
+
+#include "zstd.h" /* ZSTD_VERSION_STRING */
+#define APSTUDIO_READONLY_SYMBOLS
+#include "verrsrc.h"
+#undef APSTUDIO_READONLY_SYMBOLS
+
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+LANGUAGE 9, 1
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO     VERSIONINFO
+  FILEVERSION       ZSTD_VERSION_MAJOR,ZSTD_VERSION_MINOR,ZSTD_VERSION_RELEASE,0
+  PRODUCTVERSION    ZSTD_VERSION_MAJOR,ZSTD_VERSION_MINOR,ZSTD_VERSION_RELEASE,0
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+#ifdef _DEBUG
+ FILEFLAGS VS_FF_DEBUG
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS VOS_NT_WINDOWS32
+ FILETYPE VFT_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904B0"
+        BEGIN
+            VALUE "CompanyName", "Yann Collet, Facebook, Inc."
+            VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm"
+            VALUE "FileVersion", ZSTD_VERSION_STRING
+            VALUE "InternalName", "zstd.exe"
+            VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc."
+            VALUE "OriginalFilename", "zstd.exe"
+            VALUE "ProductName", "Zstandard"
+            VALUE "ProductVersion", ZSTD_VERSION_STRING
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x0409, 1200
+    END
+END
+
+#endif
diff --git a/programs/windres/zstd32.res b/programs/windres/zstd32.res
new file mode 100644
index 0000000..aec8fcf
--- /dev/null
+++ b/programs/windres/zstd32.res
Binary files differ
diff --git a/programs/windres/zstd64.res b/programs/windres/zstd64.res
new file mode 100644
index 0000000..0fa5040
--- /dev/null
+++ b/programs/windres/zstd64.res
Binary files differ
diff --git a/programs/zstd.1 b/programs/zstd.1
index c262a0c..8128837 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -35,22 +35,85 @@
 \fBzstd\fR command line is generally similar to gzip, but features the following differences :
  - Source files are preserved by default
    It's possible to remove them automatically by using \fB--rm\fR command
- - By default, when compressing a single file, \fBzstd\fR displays progress notifications and result summary.
+ - When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default.
      Use \fB-q\fR to turn them off
 
 
 
 .SH OPTIONS
+
+.
+.SS "Integer suffixes and special values"
+In most places where an integer argument is expected,
+an optional suffix is supported to easily indicate large integers.
+There must be no space between the integer and the suffix.
+.TP
+.B KiB
+Multiply the integer by 1,024 (2^10).
+.BR Ki ,
+.BR K ,
+and
+.B KB
+are accepted as synonyms for
+.BR KiB .
+.TP
+.B MiB
+Multiply the integer by 1,048,576 (2^20).
+.BR Mi ,
+.BR M ,
+and
+.B MB
+are accepted as synonyms for
+.BR MiB .
+
+.
+.SS "Operation mode"
+If multiple operation mode options are given,
+the last one takes effect.
+.TP
+.BR \-z ", " \-\-compress
+Compress.
+This is the default operation mode when no operation mode option
+is specified and no other operation mode is implied from
+the command name (for example,
+.B unzstd
+implies
+.BR \-\-decompress ).
+.TP
+.BR \-d ", " \-\-decompress ", " \-\-uncompress
+Decompress.
+.TP
+.BR \-t ", " \-\-test
+Test the integrity of compressed
+.IR files .
+This option is equivalent to
+.B "\-\-decompress \-\-stdout"
+except that the decompressed data is discarded instead of being
+written to standard output.
+No files are created or removed.
+.TP
+.B \-b#
+ benchmark file(s) using compression level #
+.TP
+.B \--train FILEs
+ use FILEs as training set to create a dictionary. The training set should contain a lot of small files (> 100).
+
+.
+.SS "Operation modifiers"
 .TP
 .B \-#
- # compression level [1-22] (default:3)
+ # compression level [1-19] (default:3)
 .TP
-.BR \-d ", " --decompress
- decompression
+.BR \--ultra
+ unlocks high compression levels 20+ (maximum 22), using a lot more memory
 .TP
 .B \-D file
  use `file` as Dictionary to compress or decompress FILE(s)
 .TP
+.BR \--no-dictID
+ do not store dictionary ID within frame header (dictionary compression).
+ the decoder will have to rely on implicit knowledge about which dictionary to use, it won't be able to check if it's correct.
+.TP
 .B \-o file
  save result into `file` (only possible with a single INPUT-FILE)
 .TP
@@ -60,6 +123,11 @@
 .BR \-c ", " --stdout
  force write to standard output, even if it is the console
 .TP
+.BR \--[no-]sparse
+ enable / disable sparse FS support, to make files with many zeroes smaller on disk.
+ default : enabled when output is into a file, and disabled when output is stdout.
+ This setting overrides default and can force sparse mode over stdout.
+.TP
 .BR \--rm
  remove source file(s) after successful compression or decompression
 .TP
@@ -83,8 +151,8 @@
  suppress warnings, interactivity and notifications.
  specify twice to suppress errors too.
 .TP
-.BR \-C ", " --check
- add integrity check computed from uncompressed data
+.BR \-C ", " --[no-]check
+ add integrity check computed from uncompressed data (default : enabled)
 .TP
 .BR \-t ", " --test
  Test the integrity of compressed files. This option is equivalent to \fB--decompress --stdout > /dev/null\fR.
@@ -103,10 +171,8 @@
 Typical gains range from ~10% (at 64KB) to x5 better (at <1KB).
 .TP
 .B \--train FILEs
- use FILEs as training set to create a dictionary.
- The training set should contain a lot of small files (> 100).
- and weight typically 100x the target dictionary size
- (for example, 10 MB for a 100 KB dictionary)
+ use FILEs as training set to create a dictionary. The training set should contain a lot of small files (> 100),
+and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)
 .TP
 .B \-o file
  dictionary saved into `file` (default: dictionary)
@@ -131,6 +197,9 @@
 .B \-b#
  benchmark file(s) using compression level #
 .TP
+.B \-e#
+ benchmark file(s) using multiple compression levels, from -b# to -e# (included).
+.TP
 .B \-i#
  minimum evaluation time, in seconds (default : 3s), benchmark mode only
 .TP
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index fe97f96..db4d6ac 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -8,13 +8,6 @@
  */
 
 
-/*
-  Note : this is a user program, not part of libzstd.
-  The license of libzstd is BSD.
-  The license of this command line program is GPLv2.
-*/
-
-
 /*-************************************
 *  Tuning parameters
 **************************************/
@@ -32,7 +25,6 @@
 **************************************/
 #include "util.h"     /* Compiler options, UTIL_HAS_CREATEFILELIST */
 #include <string.h>   /* strcmp, strlen */
-#include <ctype.h>    /* toupper */
 #include <errno.h>    /* errno */
 #include "fileio.h"
 #ifndef ZSTD_NOBENCH
@@ -142,6 +134,7 @@
     DISPLAY( "--test  : test compressed file integrity \n");
     DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n");
 #endif
+    DISPLAY( " -M#    : Set a memory usage limit for decompression \n");
     DISPLAY( "--      : All arguments after \"--\" are treated as files \n");
 #ifndef ZSTD_NODICT
     DISPLAY( "\n");
@@ -179,40 +172,55 @@
 }
 
 /*! readU32FromChar() :
-    @return : unsigned integer value reach from input in `char` format
+    @return : unsigned integer value read from input in `char` format
+    allows and interprets K, KB, KiB, M, MB and MiB suffix.
     Will also modify `*stringPtr`, advancing it to position where it stopped reading.
-    Note : this function can overflow if digit string > MAX_UINT */
+    Note : function result can overflow if digit string > MAX_UINT */
 static unsigned readU32FromChar(const char** stringPtr)
 {
     unsigned result = 0;
     while ((**stringPtr >='0') && (**stringPtr <='9'))
         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+    if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        result <<= 10;
+        if (**stringPtr=='M') result <<= 10;
+        (*stringPtr)++ ;
+        if (**stringPtr=='i') (*stringPtr)++;
+        if (**stringPtr=='B') (*stringPtr)++;
+    }
     return result;
 }
 
+static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+{
+    size_t const comSize = strlen(longCommand);
+    unsigned const result = !strncmp(*stringPtr, longCommand, comSize);
+    if (result) *stringPtr += comSize;
+    return result;
+}
+
+typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train } zstd_operation_mode;
 
 #define CLEAN_RETURN(i) { operationResult = (i); goto _end; }
 
 int main(int argCount, const char* argv[])
 {
     int argNb,
-        bench=0,
-        decode=0,
-        testmode=0,
         forceStdout=0,
         main_pause=0,
         nextEntryIsDictionary=0,
         operationResult=0,
-        dictBuild=0,
         nextArgumentIsOutFileName=0,
         nextArgumentIsMaxDict=0,
         nextArgumentIsDictID=0,
         nextArgumentIsFile=0,
         ultra=0,
         lastCommand = 0;
+    zstd_operation_mode operation = zom_compress;
     int cLevel = ZSTDCLI_CLEVEL_DEFAULT;
     int cLevelLast = 1;
     unsigned recursive = 0;
+    unsigned memLimit = 0;
     const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));   /* argCount >= 1 */
     unsigned filenameIdx = 0;
     const char* programName = argv[0];
@@ -231,8 +239,8 @@
     /* init */
     (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
     (void)dictCLevel; (void)dictSelect; (void)dictID;  /* not used when ZSTD_NODICT set */
-    (void)decode; (void)cLevel; (void)testmode;/* not used when ZSTD_NOCOMPRESS set */
-    (void)ultra; /* not used when ZSTD_NODECOMPRESS set */
+    (void)cLevel; /* not used when ZSTD_NOCOMPRESS set */
+    (void)ultra; (void)memLimit;   /* not used when ZSTD_NODECOMPRESS set */
     if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
     filenameTable[0] = stdinmark;
     displayOut = stderr;
@@ -243,20 +251,22 @@
     }
 
     /* preset behaviors */
-    if (!strcmp(programName, ZSTD_UNZSTD)) decode=1;
-    if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; }
+    if (!strcmp(programName, ZSTD_UNZSTD)) operation=zom_decompress;
+    if (!strcmp(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; displayLevel=1; outFileName=stdoutmark; }
 
     /* command switches */
-    for(argNb=1; argNb<argCount; argNb++) {
+    for (argNb=1; argNb<argCount; argNb++) {
         const char* argument = argv[argNb];
         if(!argument) continue;   /* Protection if argument empty */
 
         if (nextArgumentIsFile==0) {
 
             /* long commands (--long-word) */
-            if (!strcmp(argument, "--")) { nextArgumentIsFile=1; continue; }
-            if (!strcmp(argument, "--decompress")) { decode=1; continue; }
-            if (!strcmp(argument, "--force")) {  FIO_overwriteMode(); continue; }
+            if (!strcmp(argument, "--")) { nextArgumentIsFile=1; continue; }   /* only file names allowed from now on */
+            if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
+            if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
+            if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
+            if (!strcmp(argument, "--force")) { FIO_overwriteMode(); continue; }
             if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
             if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
             if (!strcmp(argument, "--verbose")) { displayLevel++; continue; }
@@ -268,13 +278,18 @@
             if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
             if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
             if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
-            if (!strcmp(argument, "--test")) { testmode=1; decode=1; continue; }
-            if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
+            if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
+            if (!strcmp(argument, "--train")) { operation=zom_train; outFileName=g_defaultDictName; continue; }
             if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; }
             if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; }
             if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
             if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
 
+            /* long commands with arguments */
+            if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
+            if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
+            if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
+
             /* '-' means stdin/stdout */
             if (!strcmp(argument, "-")){
                 if (!filenameIdx) {
@@ -308,8 +323,11 @@
                     case 'H':
                     case 'h': displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
 
+                         /* Compress */
+                    case 'z': operation=zom_compress; argument++; break;
+
                          /* Decoding */
-                    case 'd': decode=1; argument++; break;
+                    case 'd': operation=zom_decompress; argument++; break;
 
                         /* Force stdout, even if stdout==console */
                     case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
@@ -333,11 +351,17 @@
                     case 'C': argument++; FIO_setChecksumFlag(2); break;
 
                         /* test compressed file */
-                    case 't': testmode=1; decode=1; argument++; break;
+                    case 't': operation=zom_test; argument++; break;
 
                         /* destination file name */
                     case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
 
+                        /* limit decompression memory */
+                    case 'M':
+                        argument++;
+                        memLimit = readU32FromChar(&argument);
+                        break;
+
 #ifdef UTIL_HAS_CREATEFILELIST
                         /* recursive */
                     case 'r': recursive=1; argument++; break;
@@ -345,7 +369,7 @@
 
 #ifndef ZSTD_NOBENCH
                         /* Benchmark */
-                    case 'b': bench=1; argument++; break;
+                    case 'b': operation=zom_bench; argument++; break;
 
                         /* range bench (benchmark only) */
                     case 'e':
@@ -366,10 +390,7 @@
                         /* cut input into blocks (benchmark only) */
                     case 'B':
                         argument++;
-                        {   size_t bSize = readU32FromChar(&argument);
-                            if (toupper(*argument)=='K') bSize<<=10, argument++;  /* allows using KB notation */
-                            if (toupper(*argument)=='M') bSize<<=20, argument++;
-                            if (toupper(*argument)=='B') argument++;
+                        {   size_t const bSize = readU32FromChar(&argument);
                             BMK_setNotificationLevel(displayLevel);
                             BMK_SetBlockSize(bSize);
                         }
@@ -402,8 +423,6 @@
                 nextArgumentIsMaxDict = 0;
                 lastCommand = 0;
                 maxDictSize = readU32FromChar(&argument);
-                if (toupper(*argument)=='K') maxDictSize <<= 10;
-                if (toupper(*argument)=='M') maxDictSize <<= 20;
                 continue;
             }
 
@@ -454,7 +473,7 @@
 #endif
 
     /* Check if benchmark is selected */
-    if (bench) {
+    if (operation==zom_bench) {
 #ifndef ZSTD_NOBENCH
         BMK_setNotificationLevel(displayLevel);
         BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast);
@@ -463,7 +482,7 @@
     }
 
     /* Check if dictionary builder is selected */
-    if (dictBuild) {
+    if (operation==zom_train) {
 #ifndef ZSTD_NODICT
         ZDICT_params_t dictParams;
         memset(&dictParams, 0, sizeof(dictParams));
@@ -482,7 +501,7 @@
 
     /* Check if input/output defined as console; trigger an error in this case */
     if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName));
-    if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && strcmp(filenameTable[0], stdinmark) && !(forceStdout && decode))
+    if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && strcmp(filenameTable[0], stdinmark) && !(forceStdout && (operation==zom_decompress)))
         CLEAN_RETURN(badusage(programName));
 
     /* user-selected output filename, only possible with a single file */
@@ -506,7 +525,7 @@
 
     /* IO Stream/File */
     FIO_setNotificationLevel(displayLevel);
-    if (!decode) {
+    if (operation==zom_compress) {
 #ifndef ZSTD_NOCOMPRESS
         if ((filenameIdx==1) && outFileName)
           operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel);
@@ -515,9 +534,10 @@
 #else
         DISPLAY("Compression not supported\n");
 #endif
-    } else {  /* decompression */
+    } else {  /* decompression or test */
 #ifndef ZSTD_NODECOMPRESS
-        if (testmode) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */
+        if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */
+        FIO_setMemLimit(memLimit);
         if (filenameIdx==1 && outFileName)
             operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName);
         else
diff --git a/tests/Makefile b/tests/Makefile
index 3ce9f31..ecff182 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,25 +1,10 @@
 # ##########################################################################
-# ZSTD tests - Makefile
-# Copyright (C) Yann Collet 2015-2016
+# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# All rights reserved.
 #
-# GPL v2 License
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# You can contact the author at :
-#  - zstd homepage : http://www.zstd.net/
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
 # ##########################################################################
 # datagen : Synthetic and parametrable data generator, for tests
 # fullbench  : Precisely measure speed for each zstd inner functions
@@ -94,10 +79,10 @@
 zstd_nolegacy:
 	$(MAKE) -C $(PRGDIR) $@
 
-fullbench  : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/datagen.c fullbench.c
+fullbench  : $(ZSTD_FILES) $(PRGDIR)/datagen.c fullbench.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)
 
-fullbench32 : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/datagen.c fullbench.c
+fullbench32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c fullbench.c
 	$(CC)  -m32  $(FLAGS) $^ -o $@$(EXT)
 
 fuzzer  : CPPFLAGS += -I$(ZSTDDIR)/dictBuilder
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 670b516..ffc32f9 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -22,7 +22,6 @@
 #include "zstd.h"            /* ZSTD_VERSION_STRING */
 #define FSE_STATIC_LINKING_ONLY   /* FSE_DTABLE_SIZE_U32 */
 #include "fse.h"
-#include "zbuff.h"
 #include "datagen.h"
 
 
@@ -130,29 +129,39 @@
 }
 
 
-static ZBUFF_CCtx* g_zbcc = NULL;
-size_t local_ZBUFF_compress(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
+static ZSTD_CStream* g_cstream= NULL;
+size_t local_ZSTD_compressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
 {
-    size_t compressedSize;
-    size_t srcRead = srcSize, dstWritten = dstCapacity;
+    ZSTD_outBuffer buffOut;
+    ZSTD_inBuffer buffIn;
     (void)buff2;
-    ZBUFF_compressInit(g_zbcc, 1);
-    ZBUFF_compressContinue(g_zbcc, dst, &dstWritten, src, &srcRead);
-    compressedSize = dstWritten;
-    dstWritten = dstCapacity-compressedSize;
-    ZBUFF_compressEnd(g_zbcc, ((char*)dst)+compressedSize, &dstWritten);
-    compressedSize += dstWritten;
-    return compressedSize;
+    ZSTD_initCStream(g_cstream, 1);
+    buffOut.dst = dst;
+    buffOut.size = dstCapacity;
+    buffOut.pos = 0;
+    buffIn.src = src;
+    buffIn.size = srcSize;
+    buffIn.pos = 0;
+    ZSTD_compressStream(g_cstream, &buffOut, &buffIn);
+    ZSTD_endStream(g_cstream, &buffOut);
+    return buffOut.pos;
 }
 
-static ZBUFF_DCtx* g_zbdc = NULL;
-static size_t local_ZBUFF_decompress(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
+static ZSTD_DStream* g_dstream= NULL;
+static size_t local_ZSTD_decompressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize)
 {
-    size_t srcRead = g_cSize, dstWritten = dstCapacity;
+    ZSTD_outBuffer buffOut;
+    ZSTD_inBuffer buffIn;
     (void)src; (void)srcSize;
-    ZBUFF_decompressInit(g_zbdc);
-    ZBUFF_decompressContinue(g_zbdc, dst, &dstWritten, buff2, &srcRead);
-    return dstWritten;
+    ZSTD_initDStream(g_dstream);
+    buffOut.dst = dst;
+    buffOut.size = dstCapacity;
+    buffOut.pos = 0;
+    buffIn.src = buff2;
+    buffIn.size = g_cSize;
+    buffIn.pos = 0;
+    ZSTD_decompressStream(g_dstream, &buffOut, &buffIn);
+    return buffOut.pos;
 }
 
 static ZSTD_CCtx* g_zcc = NULL;
@@ -220,10 +229,10 @@
         benchFunction = local_ZSTD_decodeSeqHeaders; benchName = "ZSTD_decodeSeqHeaders";
         break;
     case 41:
-        benchFunction = local_ZBUFF_compress; benchName = "ZBUFF_compressContinue";
+        benchFunction = local_ZSTD_compressStream; benchName = "ZSTD_compressStream";
         break;
     case 42:
-        benchFunction = local_ZBUFF_decompress; benchName = "ZBUFF_decompressContinue";
+        benchFunction = local_ZSTD_decompressStream; benchName = "ZSTD_decompressStream";
         break;
     default :
         return 0;
@@ -296,10 +305,10 @@
             break;
         }
     case 41 :
-        if (g_zbcc==NULL) g_zbcc = ZBUFF_createCCtx();
+        if (g_cstream==NULL) g_cstream = ZSTD_createCStream();
         break;
     case 42 :
-        if (g_zbdc==NULL) g_zbdc = ZBUFF_createDCtx();
+        if (g_dstream==NULL) g_dstream = ZSTD_createDStream();
         g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, 1);
         break;
 
@@ -311,27 +320,27 @@
 
     { size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; }     /* warming up memory */
 
-    { U32 loopNb;
-    for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) {
-        clock_t const timeLoop = TIMELOOP_S * CLOCKS_PER_SEC;
-        clock_t clockStart;
-        U32 nbRounds;
-        size_t benchResult=0;
-        double averageTime;
+    {   U32 loopNb;
+        for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) {
+            clock_t const timeLoop = TIMELOOP_S * CLOCKS_PER_SEC;
+            clock_t clockStart;
+            U32 nbRounds;
+            size_t benchResult=0;
+            double averageTime;
 
-        DISPLAY("%2i- %-30.30s : \r", loopNb, benchName);
+            DISPLAY("%2i- %-30.30s : \r", loopNb, benchName);
 
-        clockStart = clock();
-        while (clock() == clockStart);
-        clockStart = clock();
-        for (nbRounds=0; BMK_clockSpan(clockStart) < timeLoop; nbRounds++) {
-            benchResult = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);
-            if (ZSTD_isError(benchResult)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(benchResult)); exit(1); }
-        }
-        averageTime = (((double)BMK_clockSpan(clockStart)) / CLOCKS_PER_SEC) / nbRounds;
-        if (averageTime < bestTime) bestTime = averageTime;
-        DISPLAY("%2i- %-30.30s : %7.1f MB/s  (%9u)\r", loopNb, benchName, (double)srcSize / (1 MB) / bestTime, (U32)benchResult);
-    }}
+            clockStart = clock();
+            while (clock() == clockStart);
+            clockStart = clock();
+            for (nbRounds=0; BMK_clockSpan(clockStart) < timeLoop; nbRounds++) {
+                benchResult = benchFunction(dstBuff, dstBuffSize, buff2, src, srcSize);
+                if (ZSTD_isError(benchResult)) { DISPLAY("ERROR ! %s() => %s !! \n", benchName, ZSTD_getErrorName(benchResult)); exit(1); }
+            }
+            averageTime = (((double)BMK_clockSpan(clockStart)) / CLOCKS_PER_SEC) / nbRounds;
+            if (averageTime < bestTime) bestTime = averageTime;
+            DISPLAY("%2i- %-30.30s : %7.1f MB/s  (%9u)\r", loopNb, benchName, (double)srcSize / (1 MB) / bestTime, (U32)benchResult);
+    }   }
     DISPLAY("%2u\n", benchNb);
 
 _cleanOut:
@@ -466,7 +475,7 @@
                 switch(argument[0])
                 {
                     /* Display help on usage */
-                case 'h' :
+                case 'h':
                 case 'H': return usage_advanced(exename);
 
                     /* Pause at the end (hidden option) */
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index ae8450e..22f034d 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -27,7 +27,7 @@
 #include <time.h>         /* clock_t */
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressContinue, ZSTD_compressBlock */
 #include "zstd.h"         /* ZSTD_VERSION_STRING */
-#include "error_public.h" /* ZSTD_getErrorCode */
+#include "zstd_errors.h" /* ZSTD_getErrorCode */
 #include "zdict.h"        /* ZDICT_trainFromBuffer */
 #include "datagen.h"      /* RDG_genBuffer */
 #include "mem.h"
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index f253fa3..5eabcba 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -302,7 +302,8 @@
                               "ZSTD_lazy   ",
                               "ZSTD_lazy2  ",
                               "ZSTD_btlazy2",
-                              "ZSTD_btopt  " };
+                              "ZSTD_btopt  ",
+                              "ZSTD_btopt2 "};
 
 static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compressionParameters params, size_t srcSize)
 {
@@ -442,7 +443,7 @@
         g_params.chainLog = 0, g_params.searchLog = 0;
     if (params.strategy == ZSTD_dfast)
         g_params.searchLog = 0;
-    if (params.strategy != ZSTD_btopt )
+    if (params.strategy != ZSTD_btopt && params.strategy != ZSTD_btopt2)
         g_params.targetLength = 0;
     return &g_params;
 }
@@ -548,7 +549,7 @@
         p.windowLog  = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN;
         p.searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN;
         p.targetLength=FUZ_rand(&g_rand) % (ZSTD_TARGETLENGTH_MAX+1 - ZSTD_TARGETLENGTH_MIN) + ZSTD_TARGETLENGTH_MIN;
-        p.strategy   = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btopt +1));
+        p.strategy   = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btopt2 +1));
         validated = !ZSTD_isError(ZSTD_checkCParams(p));
     }
     return p;
diff --git a/tests/playTests.sh b/tests/playTests.sh
index d94d8fa..ad70538 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -81,6 +81,11 @@
 $ZSTD -dc - < tmp.zst > $INTOVOID
 $ZSTD -d    < tmp.zst > $INTOVOID   # implicit stdout when stdin is used
 $ZSTD -d  - < tmp.zst > $INTOVOID
+$ECHO "test : impose memory limitation (must fail)"
+$ZSTD -d -f tmp.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed"
+$ZSTD -d -f tmp.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
+$ZSTD -d -f tmp.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
+$ZSTD -d -f tmp.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
 $ECHO "test : overwrite protection"
 $ZSTD -q tmp && die "overwrite check failed!"
 $ECHO "test : force overwrite"
@@ -253,6 +258,17 @@
 ls -ls tmp1.zst  # check file is still present
 
 
+$ECHO "\n**** benchmark mode tests **** "
+
+$ECHO "bench one file"
+./datagen > tmp1
+$ZSTD -bi1 tmp1
+$ECHO "bench multiple levels"
+$ZSTD -i1b1e3 tmp1
+$ECHO "with recursive and quiet modes"
+$ZSTD -rqi1b1e3 tmp1
+
+
 $ECHO "\n**** zstd round-trip tests **** "
 
 roundTripTest
diff --git a/tests/test-zstd-speed.py b/tests/test-zstd-speed.py
index 56b4d46..23d4f47 100755
--- a/tests/test-zstd-speed.py
+++ b/tests/test-zstd-speed.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 #
 # Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
@@ -21,7 +21,7 @@
 import traceback
 import hashlib
 
-script_version = 'v1.0.1 (2016-09-15)'
+script_version = 'v1.1.1 (2016-10-28)'
 default_repo_url = 'https://github.com/facebook/zstd.git'
 working_dir_name = 'speedTest'
 working_path = os.getcwd() + '/' + working_dir_name     # /path/to/zstd/tests/speedTest
@@ -31,7 +31,7 @@
 verbose = False
 clang_version = "unknown"
 gcc_version = "unknown"
-
+args = None
 
 
 def hashfile(hasher, fname, blocksize=65536):
@@ -48,17 +48,20 @@
 def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
     if print_command:
         log("> " + command)
-    popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                             shell=param_shell, cwd=execute.cwd)
-    stdout = popen.communicate()[0]
-    stdout_lines = stdout.splitlines()
+    popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd)
+    stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout)
+    stderr_lines = stderr_lines.decode("utf-8")
+    stdout_lines = stdout_lines.decode("utf-8")
     if print_output:
-        print('\n'.join(stdout_lines))
+        if stdout_lines:
+            print(stdout_lines)
+        if stderr_lines:
+            print(stderr_lines)
     if popen.returncode is not None and popen.returncode != 0:
-        if not print_output and print_error:
-            print('\n'.join(stdout_lines))
-        raise RuntimeError('\n'.join(stdout_lines))
-    return stdout_lines
+        if stderr_lines and not print_output and print_error:
+            print(stderr_lines)
+        raise RuntimeError(stdout_lines + stderr_lines)
+    return (stdout_lines + stderr_lines).splitlines()
 execute.cwd = None
 
 
@@ -183,8 +186,10 @@
     last_commit = None
     commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt"
     if os.path.isfile(commitFileName):
-        last_commit = file(commitFileName, 'r').read()
-    file(commitFileName, 'w').write(commit)
+        with open(commitFileName, 'r') as infile:
+            last_commit = infile.read()
+    with open(commitFileName, 'w') as outfile:
+        outfile.write(commit)
     return last_commit
 
 
@@ -199,7 +204,7 @@
 
 
 def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail):
-    local_branch = string.split(branch, '/')[1]
+    local_branch = branch.split('/')[1]
     version = local_branch.rpartition('-')[2] + '_' + commit
     if not args.dry_run:
         execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version +
@@ -255,6 +260,7 @@
     parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75)
     parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5)
     parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300)
+    parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800)
     parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False)
     parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False)
     args = parser.parse_args()
@@ -301,6 +307,7 @@
         print("ratioLimit=%s" % args.ratioLimit)
         print("lastCLevel=%s" % args.lastCLevel)
         print("sleepTime=%s" % args.sleepTime)
+        print("timeout=%s" % args.timeout)
         print("dry_run=%s" % args.dry_run)
         print("verbose=%s" % args.verbose)
         print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))
@@ -323,10 +330,18 @@
         exit(1)
 
     send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
-    file(pidfile, 'w').write(pid)
+    with open(pidfile, 'w') as the_file:
+        the_file.write(pid)
 
+    branch = ""
+    commit = ""
+    first_time = True
     while True:
         try:
+            if first_time:
+                first_time = False
+            else:
+                time.sleep(args.sleepTime)
             loadavg = os.getloadavg()[0]
             if (loadavg <= args.maxLoadAvg):
                 branches = git_get_branches()
@@ -344,13 +359,11 @@
                 log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg))
             if verbose:
                 log("sleep for %s seconds" % args.sleepTime)
-            time.sleep(args.sleepTime)
         except Exception as e:
             stack = traceback.format_exc()
             email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit)
             send_email(args.emails, email_topic, stack, have_mutt, have_mail)
             print(stack)
-            time.sleep(args.sleepTime)
         except KeyboardInterrupt:
             os.unlink(pidfile)
             send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 8486013..38b6a76 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -77,7 +77,7 @@
 /*! FUZ_rand() :
     @return : a 27 bits random value, from a 32-bits `seed`.
     `seed` is also modified */
-#  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
 unsigned int FUZ_rand(unsigned int* seedPtr)
 {
     U32 rand32 = *seedPtr;
@@ -281,12 +281,53 @@
     }
     DISPLAYLEVEL(4, "OK \n");
 
+    /* CDict scenario */
+    DISPLAYLEVEL(4, "test%3i : digested dictionary : ", testNb++);
+    {   ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, 128 KB, 1);
+        size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict);
+        if (ZSTD_isError(initError)) goto _output_error;
+        cSize = 0;
+        outBuff.dst = compressedBuffer;
+        outBuff.size = compressedBufferSize;
+        outBuff.pos = 0;
+        inBuff.src = CNBuffer;
+        inBuff.size = CNBufferSize;
+        inBuff.pos = 0;
+        { size_t const r = ZSTD_compressStream(zc, &outBuff, &inBuff);
+          if (ZSTD_isError(r)) goto _output_error; }
+        if (inBuff.pos != inBuff.size) goto _output_error;   /* entire input should be consumed */
+        { size_t const r = ZSTD_endStream(zc, &outBuff);
+          if (r != 0) goto _output_error; }  /* error, or some data not flushed */
+        cSize = outBuff.pos;
+        ZSTD_freeCDict(cdict);
+        DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBufferSize*100);
+    }
+
     DISPLAYLEVEL(4, "test%3i : check CStream size : ", testNb++);
     { size_t const s = ZSTD_sizeof_CStream(zc);
       if (ZSTD_isError(s)) goto _output_error;
       DISPLAYLEVEL(4, "OK (%u bytes) \n", (U32)s);
     }
 
+    /* DDict scenario */
+    DISPLAYLEVEL(4, "test%3i : decompress %u bytes with digested dictionary : ", testNb++, (U32)CNBufferSize);
+    {   ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, 128 KB);
+        size_t const initError = ZSTD_initDStream_usingDDict(zd, ddict);
+        if (ZSTD_isError(initError)) goto _output_error;
+        inBuff.src = compressedBuffer;
+        inBuff.size = cSize;
+        inBuff.pos = 0;
+        outBuff.dst = decodedBuffer;
+        outBuff.size = CNBufferSize;
+        outBuff.pos = 0;
+        { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff);
+          if (r != 0) goto _output_error; }  /* should reach end of frame == 0; otherwise, some data left, or an error */
+        if (outBuff.pos != CNBufferSize) goto _output_error;   /* should regenerate the same amount */
+        if (inBuff.pos != inBuff.size) goto _output_error;   /* should have read the entire frame */
+        ZSTD_freeDDict(ddict);
+        DISPLAYLEVEL(4, "OK \n");
+    }
+
     /* test ZSTD_setDStreamParameter() resilience */
     DISPLAYLEVEL(4, "test%3i : wrong parameter for ZSTD_setDStreamParameter(): ", testNb++);
     { size_t const r = ZSTD_setDStreamParameter(zd, (ZSTD_DStreamParameter_e)999, 1);  /* large limit */
@@ -511,7 +552,7 @@
 
         /* multi - fragments decompression test */
         if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) {
-            CHECK( ZSTD_isError(ZSTD_resetDStream(zd)), "ZSTD_resetDStream failed");
+            CHECK (ZSTD_isError(ZSTD_resetDStream(zd)), "ZSTD_resetDStream failed");
         } else
             ZSTD_initDStream_usingDict(zd, dict, dictSize);
         {   size_t decompressionResult = 1;
diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile
index 69c976f..0e4ca9e 100644
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@@ -14,6 +14,7 @@
 ZLIBWRAPPER_PATH = .
 EXAMPLE_PATH = examples
 PROGRAMS_PATH = ../programs
+TEST_FILE = ../doc/zstd_compression_format.md
 CC ?= gcc
 CFLAGS ?= -O3 
 CFLAGS += $(LOC) -I$(PROGRAMS_PATH) -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH) -std=gnu99
@@ -27,11 +28,11 @@
 test: example fitblk example_zstd fitblk_zstd zwrapbench
 	./example
 	./example_zstd
-	./fitblk 10240 <../zstd_compression_format.md
-	./fitblk 40960 <../zstd_compression_format.md
-	./fitblk_zstd 10240 <../zstd_compression_format.md
-	./fitblk_zstd 40960 <../zstd_compression_format.md
-	./zwrapbench -qb3B1K ../zstd_compression_format.md
+	./fitblk 10240 <$(TEST_FILE)
+	./fitblk 40960 <$(TEST_FILE)
+	./fitblk_zstd 10240 <$(TEST_FILE)
+	./fitblk_zstd 40960 <$(TEST_FILE)
+	./zwrapbench -qb3B1K $(TEST_FILE)
 	./zwrapbench -rqb1e5 ../lib ../programs ../tests
 
 #valgrindTest: ZSTDLIBRARY = $(ZSTDLIBDIR)/libzstd.so
@@ -40,11 +41,11 @@
 	@echo "\n ---- valgrind tests ----"
 	$(VALGRIND) ./example
 	$(VALGRIND) ./example_zstd
-	$(VALGRIND) ./fitblk 10240 <../zstd_compression_format.md
-	$(VALGRIND) ./fitblk 40960 <../zstd_compression_format.md
-	$(VALGRIND) ./fitblk_zstd 10240 <../zstd_compression_format.md
-	$(VALGRIND) ./fitblk_zstd 40960 <../zstd_compression_format.md
-	$(VALGRIND) ./zwrapbench -qb3B1K ../zstd_compression_format.md
+	$(VALGRIND) ./fitblk 10240 <$(TEST_FILE)
+	$(VALGRIND) ./fitblk 40960 <$(TEST_FILE)
+	$(VALGRIND) ./fitblk_zstd 10240 <$(TEST_FILE)
+	$(VALGRIND) ./fitblk_zstd 40960 <$(TEST_FILE)
+	$(VALGRIND) ./zwrapbench -qb3B1K $(TEST_FILE)
 	$(VALGRIND) ./zwrapbench -rqb1e5 ../lib ../programs ../tests
 
 .c.o: