Merge pull request #2348 from dscheg/dev

Fix dll path in case of cross-compilation
diff --git a/.github/workflows/generic-release.yml b/.github/workflows/generic-release.yml
index d9e7109..de4a1cb 100644
--- a/.github/workflows/generic-release.yml
+++ b/.github/workflows/generic-release.yml
@@ -3,17 +3,17 @@
 on:
   pull_request:
     # This will eventually only be for pushes to master
-    # but for dogfooding purposes, I'm running it even 
+    # but for dogfooding purposes, I'm running it even
     # on dev pushes
     branches: [ dev, master, actionsTest ]
 
 jobs:
   # missing jobs
-  # 
+  #
   # ppc64le + fuzz test
   # Qemu PPC64 + Fuzz test
   # Qemu aarch64 + Fuzz Test (on Xenial)
-  # versions comp   
+  # versions comp
   # meson test
 
   osx:
@@ -24,7 +24,7 @@
       run: |
         make test
         # make -c lib all (need to fix. not working right now)
-  
+
   zbuff:
     runs-on: ubuntu-16.04
     steps:
@@ -32,7 +32,7 @@
     - name: zbuff test
       run: |
         make -C tests test-zbuff
-        
+
   tsan:
     runs-on: ubuntu-16.04
     steps:
@@ -53,7 +53,7 @@
         make gpp6install valgrindinstall
         make -C zlibWrapper test
         make -C zlibWrapper valgrindTest
-  
+
   lz4-threadpool-partial-libs:
     runs-on: ubuntu-16.04
     steps:
@@ -62,6 +62,7 @@
       run: |
         make lz4install
         make -C tests test-lz4
+        make check < /dev/null | tee   # mess with lz4 console detection
         make clean
         make -C tests test-pool
         make clean
diff --git a/.travis.yml b/.travis.yml
index fd9a435..226d4c0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -76,14 +76,14 @@
 
     - name: Minimal Decompressor Macros    # ~5mn
       script:
-        - make clean
-        - make -j all check ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
-        - make clean
-        - make -j all check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
-        - make clean
-        - make -j all check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
-        - make clean
-        - make -j all check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
+        - make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
+        - make clean && make check ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
+        - make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
+        - make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
+        - make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
+        - make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
+        - make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
+        - make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
 
     - name: cmake build and test check    # ~6mn
       script:
@@ -216,11 +216,12 @@
         - make -C zlibWrapper test
         - make -C zlibWrapper valgrindTest
 
-    - name: LZ4, thread pool, and partial libs tests    # ~2mn
+    - name: LZ4, thread pool, and partial libs tests    # ~4mn
       if: branch = master
       script:
         - make lz4install
         - make -C tests test-lz4
+        - make check < /dev/null | tee    # mess with lz4 console detection
         - make clean
         - make -C tests test-pool
         - make clean
diff --git a/Makefile b/Makefile
index 1735ab8..9be0557 100644
--- a/Makefile
+++ b/Makefile
@@ -31,9 +31,9 @@
 TARGET_SYSTEM ?= $(OS)
 
 ifneq (,$(filter Windows%,$(TARGET_SYSTEM)))
-EXT =.exe
+  EXT =.exe
 else
-EXT =
+  EXT =
 endif
 
 ## default: Build lib-release and zstd-release
@@ -65,7 +65,7 @@
 .PHONY: zstd zstd-release
 zstd zstd-release:
 	$(Q)$(MAKE) -C $(PRGDIR) $@
-	$(Q)cp $(PRGDIR)/zstd$(EXT) .
+	$(Q)ln -sf $(PRGDIR)/zstd$(EXT) zstd$(EXT)
 
 .PHONY: zstdmt
 zstdmt:
@@ -79,9 +79,9 @@
 ## test: run long-duration tests
 .PHONY: test
 DEBUGLEVEL ?= 1
-test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werror
+test: MOREFLAGS += -g -Werror
 test:
-	MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
+	DEBUGLEVEL=$(DEBUGLEVEL) MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
 	$(MAKE) -C $(TESTDIR) $@
 	ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder $@
 
diff --git a/appveyor.yml b/appveyor.yml
index 5d77b31..6b5b976 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -194,7 +194,7 @@
     - COMPILER: "gcc"
       HOST:     "mingw"
       PLATFORM: "x64"
-      SCRIPT:   "CPPFLAGS=-DDEBUGLEVEL=2 CFLAGS=-Werror make -j allzstd DEBUGLEVEL=2"
+      SCRIPT:   "CFLAGS=-Werror make -j allzstd DEBUGLEVEL=2"
     - COMPILER: "gcc"
       HOST:     "mingw"
       PLATFORM: "x86"
@@ -285,5 +285,6 @@
   - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
   - if [%HOST%]==[mingw] (
       set "CC=%COMPILER%" &&
+      make clean &&
       make check
     )
diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt
index 9b5d7ef..a050577 100644
--- a/build/cmake/CMakeLists.txt
+++ b/build/cmake/CMakeLists.txt
@@ -7,7 +7,7 @@
 # in the COPYING file in the root directory of this source tree).
 # ################################################################
 
-cmake_minimum_required(VERSION 2.8.9 FATAL_ERROR)
+cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
   
 # As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies. 
 # Set and use the newest cmake policies that are validated to work 
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index fe58f78..77d4b8e 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1158,6 +1158,12 @@
   note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef 
 </p></pre><BR>
 
+<pre><b>unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+</b><p>  Provides the dictID of the dictionary loaded into `cdict`.
+  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
+</p></pre><BR>
+
 <pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 </b><p> @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  `estimatedSrcSize` value is optional, select 0 if not known 
diff --git a/lib/Makefile b/lib/Makefile
index c1650b9..7a736d3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,15 +8,16 @@
 # You may select, at your option, one of the above-listed licenses.
 # ################################################################
 
-Q = $(if $(filter 1,$(V) $(VERBOSE)),,@)
+.PHONY: default
+default: lib-release
 
-# When cross-compiling from linux to windows, you might
-# need to specify this as "Windows." Fedora build fails
-# without it.
-#
-# Note: mingw-w64 build from linux to windows does not
-# fail on other tested distros (ubuntu, debian) even
-# without manually specifying the TARGET_SYSTEM.
+# define silent mode as default (verbose mode with V=1 or VERBOSE=1)
+$(V)$(VERBOSE).SILENT:
+
+# When cross-compiling from linux to windows,
+# one might need to specify TARGET_SYSTEM as "Windows."
+# Building from Fedora fails without it.
+# (but Ubuntu and Debian don't need to set anything)
 TARGET_SYSTEM ?= $(OS)
 
 # Version numbers
@@ -31,45 +32,46 @@
 VERSION?= $(LIBVER)
 CCVER := $(shell $(CC) --version)
 
-# This is a helper variable that configures a bunch of other variables to new,
-# space-optimized defaults.
+# ZSTD_LIB_MINIFY is a helper variable that
+# configures a bunch of other variables to space-optimized defaults.
 ZSTD_LIB_MINIFY ?= 0
 ifneq ($(ZSTD_LIB_MINIFY), 0)
-	HAVE_CC_OZ ?= $(shell echo "" | $(CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
-	ZSTD_LEGACY_SUPPORT ?= 0
-	ZSTD_LIB_DEPRECATED ?= 0
-	HUF_FORCE_DECOMPRESS_X1 ?= 1
-	ZSTD_FORCE_DECOMPRESS_SHORT ?= 1
-	ZSTD_NO_INLINE ?= 1
-	ZSTD_STRIP_ERROR_STRINGS ?= 1
-	ifneq ($(HAVE_CC_OZ), 0)
-		# Some compilers (clang) support an even more space-optimized setting.
-		CFLAGS += -Oz
-	else
-		CFLAGS += -Os
-	endif
-	CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
-	          -DDYNAMIC_BMI2=0 -DNDEBUG
+  HAVE_CC_OZ ?= $(shell echo "" | $(CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
+  ZSTD_LEGACY_SUPPORT ?= 0
+  ZSTD_LIB_DEPRECATED ?= 0
+  HUF_FORCE_DECOMPRESS_X1 ?= 1
+  ZSTD_FORCE_DECOMPRESS_SHORT ?= 1
+  ZSTD_NO_INLINE ?= 1
+  ZSTD_STRIP_ERROR_STRINGS ?= 1
+ifneq ($(HAVE_CC_OZ), 0)
+    # Some compilers (clang) support an even more space-optimized setting.
+    CFLAGS += -Oz
 else
-	CFLAGS += -O3
+    CFLAGS += -Os
+endif
+  CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
+            -DDYNAMIC_BMI2=0 -DNDEBUG
+else
+  CFLAGS += -O3
 endif
 
-CPPFLAGS+= -DXXH_NAMESPACE=ZSTD_
+DEBUGLEVEL ?= 0
+CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -DDEBUGLEVEL=$(DEBUGLEVEL)
 ifeq ($(TARGET_SYSTEM),Windows_NT)   # MinGW assumed
-CPPFLAGS   += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
+  CPPFLAGS += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
 endif
 DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
             -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
             -Wstrict-prototypes -Wundef -Wpointer-arith \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
             -Wredundant-decls -Wmissing-prototypes -Wc++-compat
-CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
-FLAGS    = $(CPPFLAGS) $(CFLAGS)
+CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
+FLAGS     = $(CPPFLAGS) $(CFLAGS)
 
 HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 GREP_OPTIONS ?=
 ifeq ($HAVE_COLORNEVER, 1)
-GREP_OPTIONS += --color=never
+  GREP_OPTIONS += --color=never
 endif
 GREP = grep $(GREP_OPTIONS)
 SED_ERE_OPT ?= -E
@@ -82,7 +84,7 @@
 ZSTD_FILES := $(ZSTDCOMMON_FILES)
 
 ifeq ($(findstring GCC,$(CCVER)),GCC)
-decompress/zstd_decompress_block.o :	CFLAGS+=-fno-tree-vectorize
+decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
 endif
 
 # Modules
@@ -104,96 +106,146 @@
 ZSTD_STRIP_ERROR_STRINGS ?= 0
 
 ifeq ($(ZSTD_LIB_COMPRESSION), 0)
-	ZSTD_LIB_DICTBUILDER = 0
-	ZSTD_LIB_DEPRECATED = 0
+  ZSTD_LIB_DICTBUILDER = 0
+  ZSTD_LIB_DEPRECATED = 0
 endif
 
 ifeq ($(ZSTD_LIB_DECOMPRESSION), 0)
-	ZSTD_LEGACY_SUPPORT = 0
-	ZSTD_LIB_DEPRECATED = 0
+  ZSTD_LEGACY_SUPPORT = 0
+  ZSTD_LIB_DEPRECATED = 0
 endif
 
 ifneq ($(ZSTD_LIB_COMPRESSION), 0)
-	ZSTD_FILES += $(ZSTDCOMP_FILES)
+  ZSTD_FILES += $(ZSTDCOMP_FILES)
 endif
 
 ifneq ($(ZSTD_LIB_DECOMPRESSION), 0)
-	ZSTD_FILES += $(ZSTDDECOMP_FILES)
+  ZSTD_FILES += $(ZSTDDECOMP_FILES)
 endif
 
 ifneq ($(ZSTD_LIB_DEPRECATED), 0)
-	ZSTD_FILES += $(ZDEPR_FILES)
+  ZSTD_FILES += $(ZDEPR_FILES)
 endif
 
 ifneq ($(ZSTD_LIB_DICTBUILDER), 0)
-	ZSTD_FILES += $(ZDICT_FILES)
+  ZSTD_FILES += $(ZDICT_FILES)
 endif
 
 ifneq ($(HUF_FORCE_DECOMPRESS_X1), 0)
-	CFLAGS += -DHUF_FORCE_DECOMPRESS_X1
+  CFLAGS += -DHUF_FORCE_DECOMPRESS_X1
 endif
 
 ifneq ($(HUF_FORCE_DECOMPRESS_X2), 0)
-	CFLAGS += -DHUF_FORCE_DECOMPRESS_X2
+  CFLAGS += -DHUF_FORCE_DECOMPRESS_X2
 endif
 
 ifneq ($(ZSTD_FORCE_DECOMPRESS_SHORT), 0)
-	CFLAGS += -DZSTD_FORCE_DECOMPRESS_SHORT
+  CFLAGS += -DZSTD_FORCE_DECOMPRESS_SHORT
 endif
 
 ifneq ($(ZSTD_FORCE_DECOMPRESS_LONG), 0)
-	CFLAGS += -DZSTD_FORCE_DECOMPRESS_LONG
+  CFLAGS += -DZSTD_FORCE_DECOMPRESS_LONG
 endif
 
 ifneq ($(ZSTD_NO_INLINE), 0)
-	CFLAGS += -DZSTD_NO_INLINE
+  CFLAGS += -DZSTD_NO_INLINE
 endif
 
 ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0)
-	CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
+  CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
 endif
 
 ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0)
-	CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
+  CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
 endif
 
 ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
 ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
-	ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
+  ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
 endif
 endif
 CPPFLAGS  += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
 
-ZSTD_OBJ   := $(patsubst %.c,%.o,$(ZSTD_FILES))
+ZSTD_LOCAL_SRC := $(notdir $(ZSTD_FILES))
+ZSTD_LOCAL_OBJ := $(ZSTD_LOCAL_SRC:.c=.o)
+
+ZSTD_SUBDIR := common compress decompress dictBuilder legacy deprecated
+vpath %.c $(ZSTD_SUBDIR)
+
+UNAME := $(shell uname)
+
+ifndef BUILD_DIR
+ifeq ($(UNAME), Darwin)
+  HASH ?= md5
+else ifeq ($(UNAME), FreeBSD)
+  HASH ?= gmd5sum
+else ifeq ($(UNAME), OpenBSD)
+  HASH ?= md5
+endif
+HASH ?= md5sum
+
+HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " " )
+HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
+ifeq ($(HAVE_HASH),0)
+  $(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
+  BUILD_DIR := obj/generic_noconf
+endif
+endif # BUILD_DIR
+
 
 # macOS linker doesn't support -soname, and use different extension
 # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
-ifeq ($(shell uname), Darwin)
-	SHARED_EXT = dylib
-	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
-	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
-	SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+ifeq ($(UNAME), Darwin)
+  SHARED_EXT = dylib
+  SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
+  SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
+  SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
 else
-	SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
-	SHARED_EXT = so
-	SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
-	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
+  SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
+  SHARED_EXT = so
+  SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
+  SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
 endif
 
+SET_CACHE_DIRECTORY = \
+	$(MAKE) --no-print-directory $@ \
+    BUILD_DIR=obj/$(HASH_DIR) \
+    CPPFLAGS="$(CPPFLAGS)" \
+    CFLAGS="$(CFLAGS)" \
+    LDFLAGS="$(LDFLAGS)"
 
-.PHONY: default lib-all all clean install uninstall
 
-default: lib-release
+.PHONY: lib-all all clean install uninstall
 
 # alias
 lib-all: all
 
 all: lib
 
-libzstd.a: ARFLAGS = rcs
-libzstd.a: $(ZSTD_OBJ)
+.PHONY: libzstd.a  # must be run every time
+
+ifndef BUILD_DIR
+# determine BUILD_DIR from compilation flags
+
+libzstd.a:
+	$(SET_CACHE_DIRECTORY)
+
+else
+# BUILD_DIR is defined
+
+ZSTD_STATLIB_DIR := $(BUILD_DIR)/static
+ZSTD_STATLIB := $(ZSTD_STATLIB_DIR)/libzstd.a
+ZSTD_STATLIB_OBJ := $(addprefix $(ZSTD_STATLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
+$(ZSTD_STATLIB): ARFLAGS = rcs
+$(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR)
+$(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ)
 	@echo compiling static library
-	$(Q)$(AR) $(ARFLAGS) $@ $^
+	$(AR) $(ARFLAGS) $@ $^
+
+libzstd.a: $(ZSTD_STATLIB)
+	ln -sf $< $@
+
+endif
 
 ifneq (,$(filter Windows%,$(TARGET_SYSTEM)))
 
@@ -202,18 +254,39 @@
 	@echo compiling dynamic library $(LIBVER)
 	$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll/libzstd.dll.a -shared $^ -o $@
 
-else
+else  # not Windows
 
 LIBZSTD = libzstd.$(SHARED_EXT_VER)
-$(LIBZSTD): LDFLAGS += -shared -fPIC -fvisibility=hidden
-$(LIBZSTD): $(ZSTD_FILES)
-	@echo compiling dynamic library $(LIBVER)
-	$(Q)$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
-	@echo creating versioned links
-	$(Q)ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
-	$(Q)ln -sf $@ libzstd.$(SHARED_EXT)
+.PHONY: $(LIBZSTD)  # must be run every time
+$(LIBZSTD): CFLAGS += -fPIC
+$(LIBZSTD): LDFLAGS += -shared -fvisibility=hidden
 
-endif
+ifndef BUILD_DIR
+# determine BUILD_DIR from compilation flags
+
+$(LIBZSTD):
+	$(SET_CACHE_DIRECTORY)
+
+else
+# BUILD_DIR is defined
+
+ZSTD_DYNLIB_DIR := $(BUILD_DIR)/dynamic
+ZSTD_DYNLIB := $(ZSTD_DYNLIB_DIR)/$(LIBZSTD)
+ZSTD_DYNLIB_OBJ := $(addprefix $(ZSTD_DYNLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
+
+$(ZSTD_DYNLIB): | $(ZSTD_DYNLIB_DIR)
+$(ZSTD_DYNLIB): $(ZSTD_DYNLIB_OBJ)
+	@echo compiling dynamic library $(LIBVER)
+	$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
+	@echo creating versioned links
+	ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
+	ln -sf $@ libzstd.$(SHARED_EXT)
+
+$(LIBZSTD): $(ZSTD_DYNLIB)
+	ln -sf $< $@
+
+endif  # ifndef BUILD_DIR
+endif  # if windows
 
 .PHONY: libzstd
 libzstd : $(LIBZSTD)
@@ -221,6 +294,7 @@
 .PHONY: lib
 lib : libzstd.a libzstd
 
+
 # note : do not define lib-mt or lib-release as .PHONY
 # make does not consider implicit pattern rule for .PHONY target
 
@@ -234,6 +308,28 @@
 	@echo release build completed
 
 
+# Generate .h dependencies automatically
+
+DEPFLAGS = -MT $@ -MMD -MP -MF
+
+$(ZSTD_DYNLIB_DIR)/%.o : %.c $(ZSTD_DYNLIB_DIR)/%.d | $(ZSTD_DYNLIB_DIR)
+	@echo CC $@
+	$(COMPILE.c) $(DEPFLAGS) $(ZSTD_DYNLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
+
+$(ZSTD_STATLIB_DIR)/%.o : %.c $(ZSTD_STATLIB_DIR)/%.d | $(ZSTD_STATLIB_DIR)
+	@echo CC $@
+	$(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
+
+MKDIR ?= mkdir
+$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR):
+	$(MKDIR) -p $@
+
+DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATLIB_OBJ:.o=.d)
+$(DEPFILES):
+
+include $(wildcard $(DEPFILES))
+
+
 # Special case : building library in single-thread mode _and_ without zstdmt_compress.c
 ZSTDMT_FILES = compress/zstdmt_compress.c
 ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES))
@@ -241,19 +337,19 @@
 libzstd-nomt: $(ZSTD_NOMT_FILES)
 	@echo compiling single-thread dynamic library $(LIBVER)
 	@echo files : $(ZSTD_NOMT_FILES)
-	$(Q)$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
+	$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
 
 clean:
-	$(Q)$(RM) -r *.dSYM   # macOS-specific
-	$(Q)$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
-	$(Q)$(RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt*
-	$(Q)$(RM) common/*.o compress/*.o decompress/*.o dictBuilder/*.o legacy/*.o deprecated/*.o
+	$(RM) -r *.dSYM   # macOS-specific
+	$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
+	$(RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt*
+	$(RM) -r obj/*
 	@echo Cleaning library completed
 
 #-----------------------------------------------------------------------------
 # make install is validated only for below listed environments
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
+ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
 
 all: libzstd.pc
 
@@ -277,7 +373,7 @@
 # Additional prefix check is required, since the empty string is technically a
 # valid PCLIBDIR
 ifeq (,$(shell echo "$(LIBDIR)" | sed -n $(SED_ERE_OPT) -e "\\@^$(EXEC_PREFIX)(/|$$)@ p"))
-$(error configured libdir ($(LIBDIR)) is outside of prefix ($(PREFIX)), can't generate pkg-config file)
+$(error configured libdir ($(LIBDIR)) is outside of prefix ($(EXEC_PREFIX)), can't generate pkg-config file)
 endif
 endif
 
@@ -285,20 +381,20 @@
 # Additional prefix check is required, since the empty string is technically a
 # valid PCINCDIR
 ifeq (,$(shell echo "$(INCLUDEDIR)" | sed -n $(SED_ERE_OPT) -e "\\@^$(PREFIX)(/|$$)@ p"))
-$(error configured includedir ($(INCLUDEDIR)) is outside of exec_prefix ($(EXEC_PREFIX)), can't generate pkg-config file)
+$(error configured includedir ($(INCLUDEDIR)) is outside of exec_prefix ($(PREFIX)), can't generate pkg-config file)
 endif
 endif
 
-ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
-PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
+ifneq (,$(filter $(UNAME),FreeBSD NetBSD DragonFly))
+  PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
 else
-PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
+  PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
 endif
 
-ifneq (,$(filter $(shell uname),SunOS))
-INSTALL ?= ginstall
+ifneq (,$(filter $(UNAME),SunOS))
+  INSTALL ?= ginstall
 else
-INSTALL ?= install
+  INSTALL ?= install
 endif
 
 INSTALL_PROGRAM ?= $(INSTALL)
@@ -308,7 +404,7 @@
 libzstd.pc:
 libzstd.pc: libzstd.pc.in
 	@echo creating pkgconfig
-	$(Q)@sed $(SED_ERE_OPT) -e 's|@PREFIX@|$(PREFIX)|' \
+	@sed $(SED_ERE_OPT) -e 's|@PREFIX@|$(PREFIX)|' \
           -e 's|@LIBDIR@|$(PCLIBDIR)|' \
           -e 's|@INCLUDEDIR@|$(PCINCDIR)|' \
           -e 's|@VERSION@|$(VERSION)|' \
@@ -318,37 +414,41 @@
 	@echo zstd static and shared library installed
 
 install-pc: libzstd.pc
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
-	$(Q)$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/
+	[ -e $(DESTDIR)$(PKGCONFIGDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
+	$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/
 
-install-static: libzstd.a
+install-static:
+	# only generate libzstd.a if it's not already present
+	[ -e libzstd.a ] || $(MAKE) libzstd.a-release
+	[ -e $(DESTDIR)$(LIBDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
 	@echo Installing static library
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
-	$(Q)$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)
+	$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)
 
-install-shared: libzstd
+install-shared:
+	# only generate libzstd.so if it's not already present
+	[ -e $(LIBZSTD) ] || $(MAKE) libzstd-release
+	[ -e $(DESTDIR)$(LIBDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
 	@echo Installing shared library
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
-	$(Q)$(INSTALL_PROGRAM) $(LIBZSTD) $(DESTDIR)$(LIBDIR)
-	$(Q)ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
-	$(Q)ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+	$(INSTALL_PROGRAM) $(LIBZSTD) $(DESTDIR)$(LIBDIR)
+	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
 
 install-includes:
+	[ -e $(DESTDIR)$(INCLUDEDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
 	@echo Installing includes
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
-	$(Q)$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
-	$(Q)$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
-	$(Q)$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
 
 uninstall:
-	$(Q)$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
-	$(Q)$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
-	$(Q)$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
-	$(Q)$(RM) $(DESTDIR)$(LIBDIR)/$(LIBZSTD)
-	$(Q)$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libzstd.pc
-	$(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h
-	$(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h
-	$(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h
+	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
+	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+	$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+	$(RM) $(DESTDIR)$(LIBDIR)/$(LIBZSTD)
+	$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libzstd.pc
+	$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h
+	$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd libraries successfully uninstalled
 
 endif
diff --git a/lib/README.md b/lib/README.md
index ff62b66..db9170a 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -168,6 +168,26 @@
 The compiled executable will require ZSTD DLL which is available at `dll\libzstd.dll`.
 
 
+#### Advanced Build options
+
+The build system requires a hash function in order to
+separate object files created with different compilation flags.
+By default, it tries to use `md5sum` or equivalent.
+The hash function can be manually switched by setting the `HASH` variable.
+For example : `make HASH=xxhsum`
+The hash function needs to generate at least 64-bit using hexadecimal format.
+When no hash function is found,
+the Makefile just generates all object files into the same default directory,
+irrespective of compilation flags.
+This functionality only matters if `libzstd` is compiled multiple times
+with different build flags.
+
+The build directory, where object files are stored
+can also be manually controlled using variable `BUILD_DIR`,
+for example `make BUILD_DIR=objectDir/v1`.
+In which case, the hash function doesn't matter.
+
+
 #### Deprecated API
 
 Obsolete API on their way out are stored in directory `lib/deprecated`.
diff --git a/lib/common/error_private.c b/lib/common/error_private.c
index cd43752..45bba53 100644
--- a/lib/common/error_private.c
+++ b/lib/common/error_private.c
@@ -48,6 +48,7 @@
     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
     case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
     case PREFIX(maxCode):
     default: return notErrorCode;
     }
diff --git a/lib/common/mem.h b/lib/common/mem.h
index c8361ab..4728ef7 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -45,7 +45,11 @@
 *  Basic Types
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+#  if defined(_AIX)
+#    include <inttypes.h>
+#  else
+#    include <stdint.h> /* intptr_t */
+#  endif
   typedef   uint8_t BYTE;
   typedef  uint16_t U16;
   typedef   int16_t S16;
diff --git a/lib/common/zstd_errors.h b/lib/common/zstd_errors.h
index 998398e..6d0d003 100644
--- a/lib/common/zstd_errors.h
+++ b/lib/common/zstd_errors.h
@@ -77,6 +77,7 @@
   ZSTD_error_frameIndex_tooLarge = 100,
   ZSTD_error_seekableIO          = 102,
   ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
 } ZSTD_ErrorCode;
 
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index f57f8f8..0991f20 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -19,7 +19,7 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
-#if !defined(ZSTD_NO_INTRINSICS) && defined(__aarch64__)
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
 #include <arm_neon.h>
 #endif
 #include "compiler.h"
@@ -242,7 +242,7 @@
 *  Shared functions to include for inlining
 *********************************************/
 static void ZSTD_copy8(void* dst, const void* src) {
-#if !defined(ZSTD_NO_INTRINSICS) && defined(__aarch64__)
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
     vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
 #else
     ZSTD_memcpy(dst, src, 8);
@@ -251,7 +251,7 @@
 
 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
 static void ZSTD_copy16(void* dst, const void* src) {
-#if !defined(ZSTD_NO_INTRINSICS) && defined(__aarch64__)
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
     vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
 #else
     ZSTD_memcpy(dst, src, 16);
@@ -336,28 +336,39 @@
  * In which case, resize it down to free some memory */
 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
 
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+
 
 /*-*******************************************
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;
+    U32 offset;         /* Offset code of the sequence */
     U16 litLength;
     U16 matchLength;
 } seqDef;
 
 typedef struct {
     seqDef* sequencesStart;
-    seqDef* sequences;
+    seqDef* sequences;      /* ptr to end of sequences */
     BYTE* litStart;
-    BYTE* lit;
+    BYTE* lit;              /* ptr to end of literals */
     BYTE* llCode;
     BYTE* mlCode;
     BYTE* ofCode;
     size_t maxNbSeq;
     size_t maxNbLit;
-    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
-    U32   longLengthPos;
+
+    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000. 
+     */
+    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
+    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
 } seqStore_t;
 
 typedef struct {
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index ea4b04a..92f0308 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -64,6 +64,7 @@
 struct ZSTD_CDict_s {
     const void* dictContent;
     size_t dictContentSize;
+    ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
     U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
     ZSTD_cwksp workspace;
     ZSTD_matchState_t matchState;
@@ -201,6 +202,14 @@
 /* private API call, for dictBuilder only */
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
 
+/* Returns 1 if compression parameters are such that we should
+ * enable long distance matching (wlog >= 27, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
+}
+
 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
         ZSTD_compressionParameters cParams)
 {
@@ -208,6 +217,16 @@
     /* should not matter, as all cParams are presumed properly defined */
     ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
     cctxParams.cParams = cParams;
+
+    if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
+        cctxParams.ldmParams.enableLdm = 1;
+        /* LDM is enabled by default for optimal parser and window size >= 128MB */
+        ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
+        assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
+        assert(cctxParams.ldmParams.hashRateLog < 32);
+    }
+
     assert(!ZSTD_checkCParams(cParams));
     return cctxParams;
 }
@@ -433,6 +452,22 @@
         bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
         return bounds;
 
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+        bounds.lowerBound = (int)ZSTD_bm_buffered;
+        bounds.upperBound = (int)ZSTD_bm_stable;
+        return bounds;
+    
+    case ZSTD_c_blockDelimiters:
+        bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
+        bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
+        return bounds;
+    
+    case ZSTD_c_validateSequences:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
     default:
         bounds.error = ERROR(parameter_unsupported);
         return bounds;
@@ -490,6 +525,10 @@
     case ZSTD_c_literalCompressionMode:
     case ZSTD_c_targetCBlockSize:
     case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
     default:
         return 0;
     }
@@ -538,6 +577,10 @@
     case ZSTD_c_ldmBucketSizeLog:
     case ZSTD_c_targetCBlockSize:
     case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
         break;
 
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@@ -729,6 +772,26 @@
         CCtxParams->srcSizeHint = value;
         return CCtxParams->srcSizeHint;
 
+    case ZSTD_c_stableInBuffer:
+        BOUNDCHECK(ZSTD_c_stableInBuffer, value);
+        CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->inBufferMode;
+
+    case ZSTD_c_stableOutBuffer:
+        BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
+        CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->outBufferMode;
+    
+    case ZSTD_c_blockDelimiters:
+        BOUNDCHECK(ZSTD_c_blockDelimiters, value);
+        CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
+        return CCtxParams->blockDelimiters;
+    
+    case ZSTD_c_validateSequences:
+        BOUNDCHECK(ZSTD_c_validateSequences, value);
+        CCtxParams->validateSequences = value;
+        return CCtxParams->validateSequences;
+
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
 }
@@ -840,6 +903,18 @@
     case ZSTD_c_srcSizeHint :
         *value = (int)CCtxParams->srcSizeHint;
         break;
+    case ZSTD_c_stableInBuffer :
+        *value = (int)CCtxParams->inBufferMode;
+        break;
+    case ZSTD_c_stableOutBuffer :
+        *value = (int)CCtxParams->outBufferMode;
+        break;
+    case ZSTD_c_blockDelimiters :
+        *value = (int)CCtxParams->blockDelimiters;
+        break;
+    case ZSTD_c_validateSequences :
+        *value = (int)CCtxParams->validateSequences;
+        break;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;
@@ -877,7 +952,6 @@
 
 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
         int const compressionLevel,
-        unsigned long long srcSizeHint,
         size_t const dictSize);
 static int ZSTD_dedicatedDictSearch_isSupported(
         const ZSTD_compressionParameters* cParams);
@@ -1063,24 +1137,73 @@
     return hashLog - btScale;
 }
 
+/** ZSTD_dictAndWindowLog() :
+ * Returns an adjusted window log that is large enough to fit the source and the dictionary.
+ * The zstd format says that the entire dictionary is valid if one byte of the dictionary
+ * is within the window. So the hashLog and chainLog should be large enough to reference both
+ * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
+ * the hashLog and windowLog.
+ * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
+ */
+static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
+{
+    const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
+    /* No dictionary ==> No change */
+    if (dictSize == 0) {
+        return windowLog;
+    }
+    assert(windowLog <= ZSTD_WINDOWLOG_MAX);
+    assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
+    {
+        U64 const windowSize = 1ULL << windowLog;
+        U64 const dictAndWindowSize = dictSize + windowSize;
+        /* If the window size is already large enough to fit both the source and the dictionary
+         * then just use the window size. Otherwise adjust so that it fits the dictionary and
+         * the window.
+         */
+        if (windowSize >= dictSize + srcSize) {
+            return windowLog; /* Window size large enough already */
+        } else if (dictAndWindowSize >= maxWindowSize) {
+            return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
+        } else  {
+            return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
+        }
+    }
+}
+
 /** ZSTD_adjustCParams_internal() :
  *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
  *  mostly downsize to reduce memory consumption and initialization latency.
  * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
+ * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
  *  note : `srcSize==0` means 0!
  *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
 static ZSTD_compressionParameters
 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
                             unsigned long long srcSize,
-                            size_t dictSize)
+                            size_t dictSize,
+                            ZSTD_cParamMode_e mode)
 {
-    static const U64 minSrcSize = 513; /* (1<<9) + 1 */
-    static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
+    const U64 minSrcSize = 513; /* (1<<9) + 1 */
+    const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
     assert(ZSTD_checkCParams(cPar)==0);
 
     if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
         srcSize = minSrcSize;
 
+    switch (mode) {
+    case ZSTD_cpm_noAttachDict:
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_createCDict:
+        break;
+    case ZSTD_cpm_attachDict:
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
     /* resize windowLog if input is small enough, to use less memory */
     if ( (srcSize < maxWindowResize)
       && (dictSize < maxWindowResize) )  {
@@ -1090,10 +1213,11 @@
                             ZSTD_highbit32(tSize-1) + 1;
         if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
     }
-    if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
-    {   U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
-        if (cycleLog > cPar.windowLog)
-            cPar.chainLog -= (cycleLog - cPar.windowLog);
+    {   U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
+        U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
+        if (cycleLog > dictAndWindowLog)
+            cPar.chainLog -= (cycleLog - dictAndWindowLog);
     }
 
     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
@@ -1109,11 +1233,11 @@
 {
     cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
     if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
-    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
 }
 
-static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
-static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
 
 static void ZSTD_overrideCParams(
               ZSTD_compressionParameters* cParams,
@@ -1129,18 +1253,18 @@
 }
 
 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
-        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
 {
     ZSTD_compressionParameters cParams;
     if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
       srcSizeHint = CCtxParams->srcSizeHint;
     }
-    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize);
+    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
     if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
     ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
     assert(!ZSTD_checkCParams(cParams));
     /* srcSizeHint == 0 means 0 */
-    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
+    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
 }
 
 static size_t
@@ -1218,7 +1342,7 @@
 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
 {
     ZSTD_compressionParameters const cParams =
-                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
 
     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
     /* estimateCCtxSize is for one-shot compression. So no buffers should
@@ -1236,7 +1360,7 @@
 
 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
 {
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
     return ZSTD_estimateCCtxSize_usingCParams(cParams);
 }
 
@@ -1255,10 +1379,14 @@
 {
     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
     {   ZSTD_compressionParameters const cParams =
-                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
-        size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
-        size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
+        size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
+                ? ((size_t)1 << cParams.windowLog) + blockSize
+                : 0;
+        size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
 
         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
             &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
@@ -1274,7 +1402,7 @@
 
 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
 {
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
     return ZSTD_estimateCStreamSize_usingCParams(cParams);
 }
 
@@ -1368,16 +1496,6 @@
 }
 
 /**
- * Indicates whether this compression proceeds directly from user-provided
- * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
- * whether the context needs to buffer the input/output (ZSTDb_buffered).
- */
-typedef enum {
-    ZSTDb_not_buffered,
-    ZSTDb_buffered
-} ZSTD_buffered_policy_e;
-
-/**
  * Controls, for this matchState reset, whether the tables need to be cleared /
  * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
  * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
@@ -1504,8 +1622,12 @@
         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
         U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
         size_t const maxNbSeq = blockSize / divider;
-        size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
-        size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
+                ? windowSize + blockSize
+                : 0;
         size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
 
         int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
@@ -1580,6 +1702,7 @@
         zc->seqStore.maxNbLit = blockSize;
 
         /* buffers */
+        zc->bufferedPolicy = zbuff;
         zc->inBuffSize = buffInSize;
         zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
         zc->outBuffSize = buffOutSize;
@@ -1699,7 +1822,8 @@
             ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
         }
 
-        params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 0);
+        params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
+                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
         params.cParams.windowLog = windowLog;
         FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
                                                  ZSTDcrp_makeClean, zbuff), "");
@@ -1896,7 +2020,7 @@
 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
 {
     ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
+    ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
     ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
     if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
     fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
@@ -2020,10 +2144,10 @@
     return (cctxParams->targetCBlockSize != 0);
 }
 
-/* ZSTD_compressSequences_internal():
+/* ZSTD_entropyCompressSequences_internal():
  * actually compresses both literals and sequences */
 MEM_STATIC size_t
-ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
                           const ZSTD_entropyCTables_t* prevEntropy,
                                 ZSTD_entropyCTables_t* nextEntropy,
                           const ZSTD_CCtx_params* cctxParams,
@@ -2052,7 +2176,7 @@
     entropyWorkspace = count + (MaxSeq + 1);
     entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
 
-    DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
     assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
 
@@ -2214,7 +2338,7 @@
 }
 
 MEM_STATIC size_t
-ZSTD_compressSequences(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
                        const ZSTD_entropyCTables_t* prevEntropy,
                              ZSTD_entropyCTables_t* nextEntropy,
                        const ZSTD_CCtx_params* cctxParams,
@@ -2223,7 +2347,7 @@
                              void* entropyWorkspace, size_t entropyWkspSize,
                              int bmi2)
 {
-    size_t const cSize = ZSTD_compressSequences_internal(
+    size_t const cSize = ZSTD_entropyCompressSequences_internal(
                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
                             dst, dstCapacity,
                             entropyWorkspace, entropyWkspSize, bmi2);
@@ -2233,13 +2357,13 @@
      */
     if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
         return 0;  /* block not compressed */
-    FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed");
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
 
     /* Check compressibility */
     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
         if (cSize >= maxCSize) return 0;  /* block not compressed */
     }
-
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
     return cSize;
 }
 
@@ -2323,7 +2447,11 @@
     /* Assert that we have correctly flushed the ctx params into the ms's copy */
     ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
-        ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
+        if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
+            ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
+        } else {
+            ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
+        }
         return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
     }
     ZSTD_resetSeqStore(&(zc->seqStore));
@@ -2362,7 +2490,7 @@
                                        src, srcSize);
             assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
         } else if (zc->appliedParams.ldmParams.enableLdm) {
-            rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
+            rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
 
             ldmSeqStore.seq = zc->ldmSequences;
             ldmSeqStore.capacity = zc->maxNbLdmSequences;
@@ -2379,6 +2507,7 @@
             assert(ldmSeqStore.pos == ldmSeqStore.size);
         } else {   /* not long range mode */
             ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            ms->ldmSeqStore = NULL;
             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
         }
         {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
@@ -2390,17 +2519,25 @@
 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
 {
     const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
-    const seqDef* seqs = seqStore->sequencesStart;
-    size_t seqsSize = seqStore->sequences - seqs;
+    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
+    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
+    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
+    size_t literalsRead = 0;
+    size_t lastLLSize;
 
     ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
-    size_t i; size_t position; int repIdx;
+    size_t i;
+    repcodes_t updatedRepcodes;
 
     assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
-    for (i = 0, position = 0; i < seqsSize; ++i) {
-        outSeqs[i].offset = seqs[i].offset;
-        outSeqs[i].litLength = seqs[i].litLength;
-        outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
+    /* Ensure we have enough space for last literals "sequence" */
+    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
+    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (i = 0; i < seqStoreSeqSize; ++i) {
+        U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
+        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
+        outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
+        outSeqs[i].rep = 0;
 
         if (i == seqStore->longLengthPos) {
             if (seqStore->longLengthID == 1) {
@@ -2410,36 +2547,41 @@
             }
         }
 
-        if (outSeqs[i].offset <= ZSTD_REP_NUM) {
-            outSeqs[i].rep = outSeqs[i].offset;
-            repIdx = (unsigned int)i - outSeqs[i].offset;
-
-            if (outSeqs[i].litLength == 0) {
-                if (outSeqs[i].offset < 3) {
-                    --repIdx;
+        if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
+            /* Derive the correct offset corresponding to a repcode */
+            outSeqs[i].rep = seqStoreSeqs[i].offset;
+            if (outSeqs[i].litLength != 0) {
+                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+            } else {
+                if (outSeqs[i].rep == 3) {
+                    rawOffset = updatedRepcodes.rep[0] - 1;
                 } else {
-                    repIdx = (unsigned int)i - 1;
+                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
                 }
-                ++outSeqs[i].rep;
             }
-            assert(repIdx >= -3);
-            outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
-            if (outSeqs[i].rep == 4) {
-                --outSeqs[i].offset;
-            }
-        } else {
-            outSeqs[i].offset -= ZSTD_REP_NUM;
         }
-
-        position += outSeqs[i].litLength;
-        outSeqs[i].matchPos = (unsigned int)position;
-        position += outSeqs[i].matchLength;
+        outSeqs[i].offset = rawOffset;
+        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
+           so we provide seqStoreSeqs[i].offset - 1 */
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
+                                         seqStoreSeqs[i].offset - 1,
+                                         seqStoreSeqs[i].litLength == 0);
+        literalsRead += outSeqs[i].litLength;
     }
-    zc->seqCollector.seqIndex += seqsSize;
+    /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
+     * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
+     * for the block boundary, according to the API.
+     */
+    assert(seqStoreLiteralsSize >= literalsRead);
+    lastLLSize = seqStoreLiteralsSize - literalsRead;
+    outSeqs[i].litLength = (U32)lastLLSize;
+    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
+    seqStoreSeqSize++;
+    zc->seqCollector.seqIndex += seqStoreSeqSize;
 }
 
-size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-    size_t outSeqsSize, const void* src, size_t srcSize)
+size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                              size_t outSeqsSize, const void* src, size_t srcSize)
 {
     const size_t dstCapacity = ZSTD_compressBound(srcSize);
     void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
@@ -2458,12 +2600,43 @@
     return zc->seqCollector.seqIndex;
 }
 
-/* Returns true if the given block is a RLE block */
-static int ZSTD_isRLE(const BYTE *ip, size_t length) {
+size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
+    size_t in = 0;
+    size_t out = 0;
+    for (; in < seqsSize; ++in) {
+        if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
+            if (in != seqsSize - 1) {
+                sequences[in+1].litLength += sequences[in].litLength;
+            }
+        } else {
+            sequences[out] = sequences[in];
+            ++out;
+        }
+    }
+    return out;
+}
+
+/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
+static int ZSTD_isRLE(const BYTE* src, size_t length) {
+    const BYTE* ip = src;
+    const BYTE value = ip[0];
+    const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
+    const size_t unrollSize = sizeof(size_t) * 4;
+    const size_t unrollMask = unrollSize - 1;
+    const size_t prefixLength = length & unrollMask;
     size_t i;
-    if (length < 2) return 1;
-    for (i = 1; i < length; ++i) {
-        if (ip[0] != ip[i]) return 0;
+    size_t u;
+    if (length == 1) return 1;
+    /* Check if prefix is RLE first before using unrolled loop */
+    if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
+        return 0;
+    }
+    for (i = prefixLength; i != length; i += unrollSize) {
+        for (u = 0; u < unrollSize; u += sizeof(size_t)) {
+            if (MEM_readST(ip + i + u) != valueST) {
+                return 0;
+            }
+        }
     }
     return 1;
 }
@@ -2510,11 +2683,12 @@
 
     if (zc->seqCollector.collectSequences) {
         ZSTD_copyBlockSequences(zc);
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
         return 0;
     }
 
     /* encode sequences and literals */
-    cSize = ZSTD_compressSequences(&zc->seqStore,
+    cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
             &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
             &zc->appliedParams,
             dst, dstCapacity,
@@ -2522,6 +2696,12 @@
             zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
             zc->bmi2);
 
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        return 0;
+    }
+
+
     if (frame &&
         /* We don't want to emit our first block as a RLE even if it qualifies because
          * doing so will cause the decoder (cli only) to throw a "should consume all input error."
@@ -2669,7 +2849,7 @@
 
     assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
 
-    DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
+    DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
     if (cctx->appliedParams.fParams.checksumFlag && srcSize)
         XXH64_update(&cctx->xxhState, src, srcSize);
 
@@ -2749,7 +2929,6 @@
                     "dst buf is too small to fit worst-case frame header size.");
     DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
                 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
-
     if (params->format == ZSTD_f_zstd1) {
         MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
         pos = 4;
@@ -2801,6 +2980,7 @@
     cctx->externSeqStore.size = nbSeq;
     cctx->externSeqStore.capacity = nbSeq;
     cctx->externSeqStore.pos = 0;
+    cctx->externSeqStore.posInSequence = 0;
     return 0;
 }
 
@@ -3188,7 +3368,7 @@
                 ZSTD_compress_insertDictionary(
                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
-                        cdict->dictContentSize, dictContentType, dtlm,
+                        cdict->dictContentSize, cdict->dictContentType, dtlm,
                         cctx->entropyWorkspace)
               : ZSTD_compress_insertDictionary(
                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
@@ -3235,7 +3415,7 @@
 
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
     ZSTD_CCtx_params const cctxParams =
             ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
@@ -3316,7 +3496,6 @@
     return cSize + endResult;
 }
 
-
 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
@@ -3369,7 +3548,7 @@
                          const void* dict, size_t dictSize,
                                int compressionLevel)
 {
-    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
+    ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
     ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
     DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
     assert(params.fParams.contentSizeFlag == 1);
@@ -3424,7 +3603,7 @@
 
 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
 {
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
 }
 
@@ -3460,6 +3639,7 @@
         ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
     }
     cdict->dictContentSize = dictSize;
+    cdict->dictContentType = dictContentType;
 
     cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
 
@@ -3557,18 +3737,18 @@
 
     if (cctxParams.enableDedicatedDictSearch) {
         cParams = ZSTD_dedicatedDictSearch_getCParams(
-            cctxParams.compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+            cctxParams.compressionLevel, dictSize);
         ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
     } else {
         cParams = ZSTD_getCParamsFromCCtxParams(
-            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     }
 
     if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
         /* Fall back to non-DDSS params */
         cctxParams.enableDedicatedDictSearch = 0;
         cParams = ZSTD_getCParamsFromCCtxParams(
-            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     }
 
     cctxParams.cParams = cParams;
@@ -3590,7 +3770,7 @@
 
 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
                                                   ZSTD_dlm_byCopy, ZSTD_dct_auto,
                                                   cParams, ZSTD_defaultCMem);
@@ -3601,7 +3781,7 @@
 
 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
                                      ZSTD_dlm_byRef, ZSTD_dct_auto,
                                      cParams, ZSTD_defaultCMem);
@@ -3684,6 +3864,17 @@
     return cdict->matchState.cParams;
 }
 
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;
+    return cdict->dictID;
+}
+
+
 /* ZSTD_compressBegin_usingCDict_advanced() :
  * cdict must be != NULL */
 size_t ZSTD_compressBegin_usingCDict_advanced(
@@ -3791,32 +3982,12 @@
     return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
 }
 
-static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
-                    const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
-                    const ZSTD_CDict* const cdict,
-                    ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
+static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
 {
-    DEBUGLOG(4, "ZSTD_resetCStream_internal");
-    /* Finalize the compression parameters */
-    params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
-    /* params are supposed to be fully validated at this point */
-    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
-    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
-
-    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
-                                         dict, dictSize, dictContentType, ZSTD_dtlm_fast,
-                                         cdict,
-                                         &params, pledgedSrcSize,
-                                         ZSTDb_buffered) , "");
-
-    cctx->inToCompress = 0;
-    cctx->inBuffPos = 0;
-    cctx->inBuffTarget = cctx->blockSize
-                      + (cctx->blockSize == pledgedSrcSize);   /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
-    cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
-    cctx->streamStage = zcss_load;
-    cctx->frameEnded = 0;
-    return 0;   /* ready to go */
+    if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
+        return ZSTD_cpm_attachDict;
+    else
+        return ZSTD_cpm_noAttachDict;
 }
 
 /* ZSTD_resetCStream():
@@ -3966,10 +4137,14 @@
 
     /* check expectations */
     DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
-    assert(zcs->inBuff != NULL);
-    assert(zcs->inBuffSize > 0);
-    assert(zcs->outBuff !=  NULL);
-    assert(zcs->outBuffSize > 0);
+    if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->inBuff != NULL);
+        assert(zcs->inBuffSize > 0);
+    }
+    if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->outBuff !=  NULL);
+        assert(zcs->outBuffSize > 0);
+    }
     assert(output->pos <= output->size);
     assert(input->pos <= input->size);
     assert((U32)flushMode <= (U32)ZSTD_e_end);
@@ -3982,7 +4157,8 @@
 
         case zcss_load:
             if ( (flushMode == ZSTD_e_end)
-              && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip))  /* enough dstCapacity */
+              && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip)     /* Enough output space */
+                || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
               && (zcs->inBuffPos == 0) ) {
                 /* shortcut to compression pass directly into output buffer */
                 size_t const cSize = ZSTD_compressEnd(zcs,
@@ -3995,8 +4171,9 @@
                 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
                 someMoreWork = 0; break;
             }
-            /* complete loading into inBuffer */
-            {   size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
+            /* complete loading into inBuffer in buffered mode */
+            if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+                size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
                 size_t const loaded = ZSTD_limitCopy(
                                         zcs->inBuff + zcs->inBuffPos, toLoad,
                                         ip, iend-ip);
@@ -4016,31 +4193,49 @@
             }
             /* compress current block (note : this stage cannot be stopped in the middle) */
             DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
-            {   void* cDst;
+            {   int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
+                void* cDst;
                 size_t cSize;
-                size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
                 size_t oSize = oend-op;
-                unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
-                if (oSize >= ZSTD_compressBound(iSize))
+                size_t const iSize = inputBuffered
+                    ? zcs->inBuffPos - zcs->inToCompress
+                    : MIN((size_t)(iend - ip), zcs->blockSize);
+                if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
                     cDst = op;   /* compress into output buffer, to skip flush stage */
                 else
                     cDst = zcs->outBuff, oSize = zcs->outBuffSize;
-                cSize = lastBlock ?
-                        ZSTD_compressEnd(zcs, cDst, oSize,
-                                    zcs->inBuff + zcs->inToCompress, iSize) :
-                        ZSTD_compressContinue(zcs, cDst, oSize,
-                                    zcs->inBuff + zcs->inToCompress, iSize);
-                FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
-                zcs->frameEnded = lastBlock;
-                /* prepare next block */
-                zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
-                if (zcs->inBuffTarget > zcs->inBuffSize)
-                    zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
-                DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
-                         (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
-                if (!lastBlock)
-                    assert(zcs->inBuffTarget <= zcs->inBuffSize);
-                zcs->inToCompress = zcs->inBuffPos;
+                if (inputBuffered) {
+                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize);
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    /* prepare next block */
+                    zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+                    if (zcs->inBuffTarget > zcs->inBuffSize)
+                        zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                    DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                            (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
+                    if (!lastBlock)
+                        assert(zcs->inBuffTarget <= zcs->inBuffSize);
+                    zcs->inToCompress = zcs->inBuffPos;
+                } else {
+                    unsigned const lastBlock = (ip + iSize == iend);
+                    assert(flushMode == ZSTD_e_end /* Already validated */);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
+                    /* Consume the input prior to error checking to mirror buffered mode. */
+                    if (iSize > 0)
+                        ip += iSize;
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    if (lastBlock)
+                        assert(ip == iend);
+                }
                 if (cDst == op) {  /* no need to flush */
                     op += cSize;
                     if (zcs->frameEnded) {
@@ -4057,6 +4252,7 @@
 	    /* fall-through */
         case zcss_flush:
             DEBUGLOG(5, "flush stage");
+            assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
             {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
                 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
                             zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
@@ -4111,6 +4307,116 @@
     return ZSTD_nextInputSizeHint_MTorST(zcs);
 }
 
+/* After a compression call set the expected input/output buffer.
+ * This is validated at the start of the next compression call.
+ */
+static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        cctx->expectedInBuffer = *input;
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        cctx->expectedOutBufferSize = output->size - output->pos;
+    }
+}
+
+/* Validate that the input/output buffers match the expectations set by
+ * ZSTD_setBufferExpectations.
+ */
+static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
+                                        ZSTD_outBuffer const* output,
+                                        ZSTD_inBuffer const* input,
+                                        ZSTD_EndDirective endOp)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        ZSTD_inBuffer const expect = cctx->expectedInBuffer;
+        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
+        if (endOp != ZSTD_e_end)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        size_t const outBufferSize = output->size - output->pos;
+        if (cctx->expectedOutBufferSize != outBufferSize)
+            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
+    }
+    return 0;
+}
+
+static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+                                             ZSTD_EndDirective endOp,
+                                             size_t inSize) {
+    ZSTD_CCtx_params params = cctx->requestedParams;
+    ZSTD_prefixDict const prefixDict = cctx->prefixDict;
+    FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
+    assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
+    if (cctx->cdict)
+        params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
+    DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
+    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
+    {
+        size_t const dictSize = prefixDict.dict
+                ? prefixDict.dictSize
+                : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
+        ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
+        params.cParams = ZSTD_getCParamsFromCCtxParams(
+                &params, cctx->pledgedSrcSizePlusOne-1,
+                dictSize, mode);
+    }
+
+    if (ZSTD_CParams_shouldEnableLdm(&params.cParams)) {
+        /* Enable LDM by default for optimal parser and window size >= 128MB */
+        DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
+        params.ldmParams.enableLdm = 1;
+    }
+
+#ifdef ZSTD_MULTITHREAD
+    if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
+        params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
+    }
+    if (params.nbWorkers > 0) {
+        /* mt context creation */
+        if (cctx->mtctx == NULL) {
+            DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
+                        params.nbWorkers);
+            cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);
+            RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
+        }
+        /* mt compression */
+        DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
+        FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
+                    cctx->mtctx,
+                    prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
+                    cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
+        cctx->streamStage = zcss_load;
+        cctx->appliedParams = params;
+    } else
+#endif
+    {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
+        assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+        FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
+                cctx->cdict,
+                &params, pledgedSrcSize,
+                ZSTDb_buffered) , "");
+        assert(cctx->appliedParams.nbWorkers == 0);
+        cctx->inToCompress = 0;
+        cctx->inBuffPos = 0;
+        if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+            /* for small input: avoid automatic flush on reaching end of block, since
+            * it would require to add a 3-bytes null block to end frame
+            */
+            cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
+        } else {
+            cctx->inBuffTarget = 0;
+        }
+        cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+        cctx->streamStage = zcss_load;
+        cctx->frameEnded = 0;
+    }
+    return 0;
+}
 
 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
                              ZSTD_outBuffer* output,
@@ -4126,49 +4432,12 @@
 
     /* transparent initialization stage */
     if (cctx->streamStage == zcss_init) {
-        ZSTD_CCtx_params params = cctx->requestedParams;
-        ZSTD_prefixDict const prefixDict = cctx->prefixDict;
-        FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
-        ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
-        assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
-        if (cctx->cdict)
-            params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
-        DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
-        if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1;  /* auto-fix pledgedSrcSize */
-        params.cParams = ZSTD_getCParamsFromCCtxParams(&params, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
-
-
-#ifdef ZSTD_MULTITHREAD
-        if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
-            params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
-        }
-        if (params.nbWorkers > 0) {
-            /* mt context creation */
-            if (cctx->mtctx == NULL) {
-                DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
-                            params.nbWorkers);
-                cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);
-                RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
-            }
-            /* mt compression */
-            DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
-            FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
-                        cctx->mtctx,
-                        prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
-                        cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
-            cctx->streamStage = zcss_load;
-            cctx->appliedParams.nbWorkers = params.nbWorkers;
-        } else
-#endif
-        {   FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
-                            prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
-                            cctx->cdict,
-                            params, cctx->pledgedSrcSizePlusOne-1) , "");
-            assert(cctx->streamStage == zcss_load);
-            assert(cctx->appliedParams.nbWorkers == 0);
-    }   }
+        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
+        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
+    }
     /* end of transparent initialization stage */
 
+    FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
     /* compression stage */
 #ifdef ZSTD_MULTITHREAD
     if (cctx->appliedParams.nbWorkers > 0) {
@@ -4192,11 +4461,13 @@
          * flush, or we are out of output space.
          */
         assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size);
+        ZSTD_setBufferExpectations(cctx, output, input);
         return flushMin;
     }
 #endif
     FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
     DEBUGLOG(5, "completed ZSTD_compressStream2");
+    ZSTD_setBufferExpectations(cctx, output, input);
     return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
 }
 
@@ -4219,14 +4490,22 @@
                       void* dst, size_t dstCapacity,
                       const void* src, size_t srcSize)
 {
+    ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
+    ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
     DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
     ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+    /* Enable stable input/output buffers. */
+    cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
+    cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
     {   size_t oPos = 0;
         size_t iPos = 0;
         size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
                                         dst, dstCapacity, &oPos,
                                         src, srcSize, &iPos,
                                         ZSTD_e_end);
+        /* Reset to the original values. */
+        cctx->requestedParams.inBufferMode = originalInBufferMode;
+        cctx->requestedParams.outBufferMode = originalOutBufferMode;
         FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
         if (result != 0) {  /* compression not completed, due to lack of output space */
             assert(oPos == dstCapacity);
@@ -4237,6 +4516,406 @@
     }
 }
 
+typedef struct {
+    U32 idx;             /* Index in array of ZSTD_Sequence */
+    U32 posInSequence;   /* Position within sequence at idx */
+    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
+} ZSTD_sequencePosition;
+
+/* Returns a ZSTD error code if sequence is not valid */
+static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
+                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
+    size_t offsetBound;
+    U32 windowSize = 1 << windowLog;
+    /* posInSrc represents the amount of data the the decoder would decode up to this point.
+     * As long as the amount of data decoded is less than or equal to window size, offsets may be
+     * larger than the total length of output decoded in order to reference the dict, even larger than
+     * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
+     */
+    offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+    RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
+    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
+    return 0;
+}
+
+/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
+    U32 offCode = rawOffset + ZSTD_REP_MOVE;
+    U32 repCode = 0;
+
+    if (!ll0 && rawOffset == rep[0]) {
+        repCode = 1;
+    } else if (rawOffset == rep[1]) {
+        repCode = 2 - ll0;
+    } else if (rawOffset == rep[2]) {
+        repCode = 3 - ll0;
+    } else if (ll0 && rawOffset == rep[0] - 1) {
+        repCode = 3;
+    }
+    if (repCode) {
+        /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
+        offCode = repCode - 1;
+    }
+    return offCode;
+}
+
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ */
+static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                             const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                             const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    BYTE const* ip = (BYTE const*)(src);
+    const BYTE* const iend = ip + blockSize;
+    repcodes_t updatedRepcodes;
+    U32 dictSize;
+    U32 litLength;
+    U32 matchLength;
+    U32 ll0;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = (U32)cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = (U32)cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
+        litLength = inSeqs[idx].litLength;
+        matchLength = inSeqs[idx].matchLength;
+        ll0 = litLength == 0;
+        offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                cctx->appliedParams.cParams.windowLog, dictSize,
+                                                cctx->appliedParams.cParams.minMatch),
+                                                "Sequence validation failed");
+        }
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    if (inSeqs[idx].litLength) {
+        DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
+        ip += inSeqs[idx].litLength;
+        seqPos->posInSrc += inSeqs[idx].litLength;
+    }
+    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
+    seqPos->idx = idx+1;
+    return 0;
+}
+
+/* Returns the number of bytes to move the current read position back by. Only non-zero
+ * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+ * went wrong.
+ * 
+ * This function will attempt to scan through blockSize bytes represented by the sequences
+ * in inSeqs, storing any (partial) sequences. 
+ * 
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+ */
+static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                       const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    U32 startPosInSequence = seqPos->posInSequence;
+    U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
+    size_t dictSize;
+    BYTE const* ip = (BYTE const*)(src);
+    BYTE const* iend = ip + blockSize;  /* May be adjusted if we decide to process fewer than blockSize bytes */
+    repcodes_t updatedRepcodes;
+    U32 bytesAdjustment = 0;
+    U32 finalMatchSplit = 0;
+    U32 litLength;
+    U32 matchLength;
+    U32 rawOffset;
+    U32 offCode;
+    
+    if (cctx->cdict) {
+        dictSize = cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
+    DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
+        const ZSTD_Sequence currSeq = inSeqs[idx];
+        litLength = currSeq.litLength;
+        matchLength = currSeq.matchLength;
+        rawOffset = currSeq.offset;
+
+        /* Modify the sequence depending on where endPosInSequence lies */
+        if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
+            if (startPosInSequence >= litLength) {
+                startPosInSequence -= litLength;
+                litLength = 0;
+                matchLength -= startPosInSequence;
+            } else {
+                litLength -= startPosInSequence;
+            }
+            /* Move to the next sequence */
+            endPosInSequence -= currSeq.litLength + currSeq.matchLength;
+            startPosInSequence = 0;
+            idx++;
+        } else {
+            /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
+               does not reach the end of the match. So, we have to split the sequence */
+            DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
+                     currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
+            if (endPosInSequence > litLength) {
+                U32 firstHalfMatchLength;
+                litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
+                firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
+                if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
+                    /* Only ever split the match if it is larger than the block size */
+                    U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
+                    if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
+                        /* Move the endPosInSequence backward so that it creates match of minMatch length */
+                        endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        firstHalfMatchLength -= bytesAdjustment;
+                    }
+                    matchLength = firstHalfMatchLength;
+                    /* Flag that we split the last match - after storing the sequence, exit the loop,
+                       but keep the value of endPosInSequence */
+                    finalMatchSplit = 1;
+                } else {
+                    /* Move the position in sequence backwards so that we don't split match, and break to store
+                     * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
+                     * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
+                     * would cause the first half of the match to be too small
+                     */
+                    bytesAdjustment = endPosInSequence - currSeq.litLength;
+                    endPosInSequence = currSeq.litLength;
+                    break;
+                }
+            } else {
+                /* This sequence ends inside the literals, break to store the last literals */
+                break;
+            }
+        }
+        /* Check if this offset can be represented with a repcode */
+        {   U32 ll0 = (litLength == 0);
+            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
+            updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+        }
+
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                   cctx->appliedParams.cParams.windowLog, dictSize,
+                                                   cctx->appliedParams.cParams.minMatch),
+                                                   "Sequence validation failed");
+        }
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
+    seqPos->idx = idx;
+    seqPos->posInSequence = endPosInSequence;
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    iend -= bytesAdjustment;
+    if (ip != iend) {
+        /* Store any last literals */
+        U32 lastLLSize = (U32)(iend - ip);
+        assert(ip <= iend);
+        DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
+        seqPos->posInSrc += lastLLSize;
+    }
+
+    return bytesAdjustment;
+}
+
+typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                       const void* src, size_t blockSize);
+static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+    ZSTD_sequenceCopier sequenceCopier = NULL;
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
+    if (mode == ZSTD_sf_explicitBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
+    } else if (mode == ZSTD_sf_noBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreNoBlockDelim;
+    }
+    assert(sequenceCopier != NULL);
+    return sequenceCopier;
+}
+
+/* Compress, block-by-block, all of the sequences given.
+ *
+ * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
+ */
+static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                              const void* src, size_t srcSize) {
+    size_t cSize = 0;
+    U32 lastBlock;
+    size_t blockSize;
+    size_t compressedSeqsSize;
+    size_t remaining = srcSize;
+    ZSTD_sequencePosition seqPos = {0, 0, 0};
+    
+    BYTE const* ip = (BYTE const*)src;
+    BYTE* op = (BYTE*)dst;
+    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+
+    DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
+    /* Special case: empty frame */
+    if (remaining == 0) {
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+        cSize += ZSTD_blockHeaderSize;
+    }
+
+    while (remaining) {
+        size_t cBlockSize;
+        size_t additionalByteAdjustment;
+        lastBlock = remaining <= cctx->blockSize;
+        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
+        ZSTD_resetSeqStore(&cctx->seqStore);
+        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
+
+        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
+        FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
+        blockSize -= additionalByteAdjustment;
+
+        /* If blocks are too small, emit as a nocompress block */
+        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
+            cSize += cBlockSize;
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            continue;
+        }
+
+        compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
+                                &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
+                                &cctx->appliedParams,
+                                op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
+                                blockSize,
+                                cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                                cctx->bmi2);
+        FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
+        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
+
+        if (!cctx->isFirstBlock &&
+            ZSTD_maybeRLE(&cctx->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize)) {
+            /* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            compressedSeqsSize = 1;
+        }
+
+        if (compressedSeqsSize == 0) {
+            /* ZSTD_noCompressBlock writes the block header as well */
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
+        } else if (compressedSeqsSize == 1) {
+            cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
+            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
+        } else {
+            U32 cBlockHeader;
+            /* Error checking and repcodes update */
+            ZSTD_confirmRepcodesAndEntropyTables(cctx);
+            if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+            /* Write block header into beginning of block*/
+            cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
+            MEM_writeLE24(op, cBlockHeader);
+            cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
+            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
+        }
+
+        cSize += cBlockSize;
+        DEBUGLOG(4, "cSize running total: %zu", cSize);
+        
+        if (lastBlock) {
+            break;
+        } else {
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            cctx->isFirstBlock = 0;
+        }
+    }
+    
+    return cSize;
+}
+
+size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
+                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                              const void* src, size_t srcSize) {
+    BYTE* op = (BYTE*)dst;
+    size_t cSize = 0;
+    size_t compressedBlocksSize = 0;
+    size_t frameHeaderSize = 0;
+
+    /* Transparent initialization stage, same as compressStream2() */
+    DEBUGLOG(3, "ZSTD_compressSequences()");
+    assert(cctx != NULL);
+    FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+    RETURN_ERROR_IF(inSeqsSize > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+    /* Begin writing output, starting with frame header */
+    frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
+    op += frameHeaderSize;
+    dstCapacity -= frameHeaderSize;
+    cSize += frameHeaderSize;
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
+        XXH64_update(&cctx->xxhState, src, srcSize);
+    }
+    /* cSize includes block header size and compressed sequences size */
+    compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
+                                                           op, dstCapacity,
+                                                           inSeqs, inSeqsSize,
+                                                           src, srcSize);
+    FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
+    cSize += compressedBlocksSize;
+    dstCapacity -= compressedBlocksSize;
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32((char*)dst + cSize, checksum);
+        cSize += 4;
+    }
+
+    DEBUGLOG(3, "Final compressed size: %zu", cSize);
+    return cSize;
+}
+
 /*======   Finalize   ======*/
 
 /*! ZSTD_flushStream() :
@@ -4377,9 +5056,9 @@
 },
 };
 
-static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, unsigned long long srcSizeHint, size_t const dictSize)
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
 {
-    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
     switch (cParams.strategy) {
         case ZSTD_fast:
         case ZSTD_dfast:
@@ -4428,15 +5107,34 @@
     }
 }
 
+static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+    case ZSTD_cpm_createCDict:
+        break;
+    case ZSTD_cpm_attachDict:
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+    {   int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
+        size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
+        return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
+    }
+}
+
 /*! ZSTD_getCParams_internal() :
  * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
  *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
- *        Use dictSize == 0 for unknown or unused. */
-static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+ *        Use dictSize == 0 for unknown or unused.
+ *  Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
 {
-    int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
-    size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
-    U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
+    U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
     U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
     int row = compressionLevel;
     DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
@@ -4446,7 +5144,7 @@
     {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
         if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel);   /* acceleration factor */
         /* refine parameters based on srcSize & dictSize */
-        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
     }
 }
 
@@ -4456,16 +5154,16 @@
 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
 {
     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
-    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
+    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
 }
 
 /*! ZSTD_getParams() :
  *  same idea as ZSTD_getCParams()
  * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
  *  Fields of `ZSTD_frameParameters` are set to default values */
-static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
     ZSTD_parameters params;
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
     DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
     ZSTD_memset(&params, 0, sizeof(params));
     params.cParams = cParams;
@@ -4479,5 +5177,5 @@
  *  Fields of `ZSTD_frameParameters` are set to default values */
 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
-    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize);
+    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
 }
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 49ad87b..ee05234 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -82,11 +82,28 @@
 } ZSTD_entropyCTables_t;
 
 typedef struct {
-    U32 off;
-    U32 len;
+    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    U32 len;            /* Raw length of match */
 } ZSTD_match_t;
 
 typedef struct {
+    U32 offset;         /* Offset of sequence */
+    U32 litLength;      /* Length of literals prior to match */
+    U32 matchLength;    /* Raw length of match */
+} rawSeq;
+
+typedef struct {
+  rawSeq* seq;          /* The start of the sequences */
+  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
+  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
+                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
+  size_t size;          /* The number of sequences. <= capacity. */
+  size_t capacity;      /* The capacity starting from `seq` pointer */
+} rawSeqStore_t;
+
+UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+
+typedef struct {
     int price;
     U32 off;
     U32 mlen;
@@ -152,6 +169,7 @@
     optState_t opt;         /* optimal parser state */
     const ZSTD_matchState_t* dictMatchState;
     ZSTD_compressionParameters cParams;
+    const rawSeqStore_t* ldmSeqStore;
 };
 
 typedef struct {
@@ -184,19 +202,6 @@
 } ldmParams_t;
 
 typedef struct {
-    U32 offset;
-    U32 litLength;
-    U32 matchLength;
-} rawSeq;
-
-typedef struct {
-  rawSeq* seq;     /* The start of the sequences */
-  size_t pos;      /* The position where reading stopped. <= size. */
-  size_t size;     /* The number of sequences. <= capacity. */
-  size_t capacity; /* The capacity starting from `seq` pointer */
-} rawSeqStore_t;
-
-typedef struct {
     int collectSequences;
     ZSTD_Sequence* seqStart;
     size_t seqIndex;
@@ -233,6 +238,14 @@
     /* Dedicated dict search algorithm trigger */
     int enableDedicatedDictSearch;
 
+    /* Input/output buffer modes */
+    ZSTD_bufferMode_e inBufferMode;
+    ZSTD_bufferMode_e outBufferMode;
+
+    /* Sequence compression API */
+    ZSTD_sequenceFormat_e blockDelimiters;
+    int validateSequences;
+
     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
     ZSTD_customMem customMem;
 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
@@ -240,6 +253,16 @@
 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
 
+/**
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+    ZSTDb_not_buffered,
+    ZSTDb_buffered
+} ZSTD_buffered_policy_e;
+
 struct ZSTD_CCtx_s {
     ZSTD_compressionStage_e stage;
     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -269,6 +292,9 @@
     ZSTD_blockState_t blockState;
     U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
 
+    /* Wether we are streaming or not */
+    ZSTD_buffered_policy_e bufferedPolicy;
+
     /* streaming */
     char*  inBuff;
     size_t inBuffSize;
@@ -282,6 +308,10 @@
     ZSTD_cStreamStage streamStage;
     U32    frameEnded;
 
+    /* Stable in/out buffer verification */
+    ZSTD_inBuffer expectedInBuffer;
+    size_t expectedOutBufferSize;
+
     /* Dictionary */
     ZSTD_localDict localDict;
     const ZSTD_CDict* cdict;
@@ -302,6 +332,25 @@
     ZSTD_dedicatedDictSearch = 3
 } ZSTD_dictMode_e;
 
+typedef enum {
+    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
+                                 * In this mode we use both the srcSize and the dictSize
+                                 * when selecting and adjusting parameters.
+                                 */
+    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
+                                 * In this mode we only take the srcSize into account when selecting
+                                 * and adjusting parameters.
+                                 */
+    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
+                                 * In this mode we take both the source size and the dictionary size
+                                 * into account when selecting and adjusting the parameters.
+                                 */
+    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
+                                 * We don't know what these parameters are for. We default to the legacy
+                                 * behavior of taking both the source size and the dict size into account
+                                 * when selecting and adjusting parameters.
+                                 */
+} ZSTD_cParamMode_e;
 
 typedef size_t (*ZSTD_blockCompressor) (
         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -1090,7 +1139,7 @@
  * Note: srcSizeHint == 0 means 0!
  */
 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
-        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
 
 /*! ZSTD_initCStream_internal() :
  *  Private use only. Init streaming operation.
diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
index 77b91a9..5d07352 100644
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@@ -178,6 +178,8 @@
  * else is though.
  */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+    if (size == 0)
+        return 0;
 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
     return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #else
@@ -228,6 +230,9 @@
     ZSTD_cwksp_internal_advance_phase(ws, phase);
     alloc = (BYTE *)ws->allocStart - bytes;
 
+    if (bytes == 0)
+        return NULL;
+
 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
     /* over-reserve space */
     alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index dbfce3d..3f3d7c4 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -27,13 +27,6 @@
     DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
     if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
     if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
-    if (cParams->strategy >= ZSTD_btopt) {
-      /* Get out of the way of the optimal parser */
-      U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
-      assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
-      assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
-      params->minMatchLength = minMatch;
-    }
     if (params->hashLog == 0) {
         params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
@@ -150,10 +143,10 @@
  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
 static size_t ZSTD_ldm_countBackwardsMatch(
             const BYTE* pIn, const BYTE* pAnchor,
-            const BYTE* pMatch, const BYTE* pBase)
+            const BYTE* pMatch, const BYTE* pMatchBase)
 {
     size_t matchLength = 0;
-    while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
+    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
         pIn--;
         pMatch--;
         matchLength++;
@@ -161,6 +154,27 @@
     return matchLength;
 }
 
+/** ZSTD_ldm_countBackwardsMatch_2segments() :
+ *  Returns the number of bytes that match backwards from pMatch,
+ *  even with the backwards match spanning 2 different segments.
+ *
+ *  On reaching `pMatchBase`, start counting from mEnd */
+static size_t ZSTD_ldm_countBackwardsMatch_2segments(
+                    const BYTE* pIn, const BYTE* pAnchor,
+                    const BYTE* pMatch, const BYTE* pMatchBase,
+                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
+{
+    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
+    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
+        /* If backwards match is entirely in the extDict or prefix, immediately return */
+        return matchLength;
+    }
+    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
+    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
+    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
+    return matchLength;
+}
+
 /** ZSTD_ldm_fillFastTables() :
  *
  *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
@@ -336,8 +350,9 @@
                         continue;
                     }
                     curBackwardMatchLength =
-                        ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
-                                                     lowMatchPtr);
+                        ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor,
+                                                               pMatch, lowMatchPtr,
+                                                               dictStart, dictEnd);
                     curTotalMatchLength = curForwardMatchLength +
                                           curBackwardMatchLength;
                 } else { /* !extDict */
@@ -562,6 +577,23 @@
     return sequence;
 }
 
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
     void const* src, size_t srcSize)
@@ -577,6 +609,15 @@
     BYTE const* ip = istart;
 
     DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
+    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
+    if (cParams->strategy >= ZSTD_btopt) {
+        size_t lastLLSize;
+        ms->ldmSeqStore = rawSeqStore;
+        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
+        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
+        return lastLLSize;
+    }
+
     assert(rawSeqStore->pos <= rawSeqStore->size);
     assert(rawSeqStore->size <= rawSeqStore->capacity);
     /* Loop through each sequence and apply the block compressor to the lits */
diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h
index 229ea05..6561024 100644
--- a/lib/compress/zstd_ldm.h
+++ b/lib/compress/zstd_ldm.h
@@ -78,6 +78,12 @@
 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
     U32 const minMatch);
 
+/* ZSTD_ldm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
+ * Not to be used in conjunction with ZSTD_ldm_skipSequences().
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
 
 /** ZSTD_ldm_getTableSize() :
  *  Estimate the space needed for long distance matching tables or 0 if LDM is
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 5acc9e0..e55c459 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -764,6 +764,140 @@
     }
 }
 
+/*************************
+*  LDM helper functions  *
+*************************/
+
+/* Struct containing info needed to make decision about ldm inclusion */
+typedef struct {
+    rawSeqStore_t seqStore;         /* External match candidates store for this block */
+    U32 startPosInBlock;            /* Start position of the current match candidate */
+    U32 endPosInBlock;              /* End position of the current match candidate */
+    U32 offset;                     /* Offset of the match candidate */
+} ZSTD_optLdm_t;
+
+/* ZSTD_optLdm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ */
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
+ * Calculates the beginning and end of the next match in the current block.
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
+ */
+static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                                   U32 blockBytesRemaining) {
+    rawSeq currSeq;
+    U32 currBlockEndPos;
+    U32 literalsBytesRemaining;
+    U32 matchBytesRemaining;
+
+    /* Setting match end position to MAX to ensure we never use an LDM during this block */
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        return;
+    }
+    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
+       based on ldmSeqStore->posInSequence */
+    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
+    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
+    currBlockEndPos = currPosInBlock + blockBytesRemaining;
+    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
+            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
+            0;
+    matchBytesRemaining = (literalsBytesRemaining == 0) ?
+            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
+            currSeq.matchLength;
+
+    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
+    if (literalsBytesRemaining >= blockBytesRemaining) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
+        return;
+    }
+
+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+       when we are deciding whether or not to add the ldm */
+    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
+    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
+    optLdm->offset = currSeq.offset;
+
+    if (optLdm->endPosInBlock > currBlockEndPos) {
+        /* Match ends after the block ends, we can't use the whole match */
+        optLdm->endPosInBlock = currBlockEndPos;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
+    } else {
+        /* Consume nb of bytes equal to size of sequence left */
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
+    }
+}
+
+/* ZSTD_optLdm_maybeAddMatch():
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ */
+static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
+    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
+    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+
+    /* Ensure that current block position is not outside of the match */
+    if (currPosInBlock < optLdm->startPosInBlock
+      || currPosInBlock >= optLdm->endPosInBlock
+      || candidateMatchLength < MINMATCH) {
+        return;
+    }
+
+    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
+                 candidateOffCode, candidateMatchLength, currPosInBlock);
+        matches[*nbMatches].len = candidateMatchLength;
+        matches[*nbMatches].off = candidateOffCode;
+        (*nbMatches)++;
+    }
+}
+
+/* ZSTD_optLdm_processMatchCandidate():
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
+ */
+static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
+                                              U32 currPosInBlock, U32 remainingBytes) {
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        return;
+    }
+
+    if (currPosInBlock >= optLdm->endPosInBlock) {
+        if (currPosInBlock > optLdm->endPosInBlock) {
+            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
+             * at the end of a match from the ldm seq store, and will often be some bytes
+             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
+             */
+            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
+        } 
+        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
+    }
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+}
 
 /*-*******************************
 *  Optimal parser
@@ -817,6 +951,11 @@
     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
     ZSTD_match_t* const matches = optStatePtr->matchTable;
     ZSTD_optimal_t lastSequence;
+    ZSTD_optLdm_t optLdm;
+
+    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
+    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
+    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
 
     /* init */
     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
@@ -832,7 +971,9 @@
         /* find first match */
         {   U32 const litlen = (U32)(ip - anchor);
             U32 const ll0 = !litlen;
-            U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                              (U32)(ip-istart), (U32)(iend - ip));
             if (!nbMatches) { ip++; continue; }
 
             /* initialize opt[0] */
@@ -945,8 +1086,12 @@
                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                 U32 const previousPrice = opt[cur].price;
                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
                 U32 matchNb;
+
+                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                                  (U32)(inr-istart), (U32)(iend-inr));
+
                 if (!nbMatches) {
                     DEBUGLOG(7, "rPos:%u : no match found", cur);
                     continue;
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index b809d60..50454a5 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -266,8 +266,6 @@
 
 /* =====   Seq Pool Wrapper   ====== */
 
-static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
-
 typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
 
 static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
@@ -277,7 +275,7 @@
 
 static rawSeqStore_t bufferToSeq(buffer_t buffer)
 {
-    rawSeqStore_t seq = {NULL, 0, 0, 0};
+    rawSeqStore_t seq = kNullRawSeqStore;
     seq.seq = (rawSeq*)buffer.start;
     seq.capacity = buffer.capacity / sizeof(rawSeq);
     return seq;
@@ -819,7 +817,6 @@
     roundBuff_t roundBuff;
     serialState_t serial;
     rsyncState_t rsync;
-    unsigned singleBlockingThread;
     unsigned jobIDMask;
     unsigned doneJobID;
     unsigned nextJobID;
@@ -885,7 +882,7 @@
 
 /* ZSTDMT_CCtxParam_setNbWorkers():
  * Internal use only */
-size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
+static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
 {
     return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
 }
@@ -944,11 +941,6 @@
 #endif
 }
 
-ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
-{
-    return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem, NULL);
-}
-
 
 /* ZSTDMT_releaseAllJobResources() :
  * note : ensure all workers are killed first ! */
@@ -1020,65 +1012,6 @@
             + mtctx->roundBuff.capacity;
 }
 
-/* Internal only */
-size_t
-ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
-                                   ZSTDMT_parameter parameter,
-                                   int value)
-{
-    DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
-    switch(parameter)
-    {
-    case ZSTDMT_p_jobSize :
-        DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
-        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
-    case ZSTDMT_p_overlapLog :
-        DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
-        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
-    case ZSTDMT_p_rsyncable :
-        DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
-        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
-    default :
-        return ERROR(parameter_unsupported);
-    }
-}
-
-size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
-{
-    DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
-    return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
-}
-
-size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
-{
-    switch (parameter) {
-    case ZSTDMT_p_jobSize:
-        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
-    case ZSTDMT_p_overlapLog:
-        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
-    case ZSTDMT_p_rsyncable:
-        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
-    default:
-        return ERROR(parameter_unsupported);
-    }
-}
-
-/* Sets parameters relevant to the compression job,
- * initializing others to default values. */
-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
-{
-    ZSTD_CCtx_params jobParams = *params;
-    /* Clear parameters related to multithreading */
-    jobParams.forceWindow = 0;
-    jobParams.nbWorkers = 0;
-    jobParams.jobSize = 0;
-    jobParams.overlapLog = 0;
-    jobParams.rsyncable = 0;
-    ZSTD_memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
-    ZSTD_memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
-    return jobParams;
-}
-
 
 /* ZSTDMT_resize() :
  * @return : error code if fails, 0 on success */
@@ -1107,7 +1040,7 @@
     DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
                 compressionLevel);
     mtctx->params.compressionLevel = compressionLevel;
-    {   ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+    {   ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
         cParams.windowLog = saved_wlog;
         mtctx->params.cParams = cParams;
     }
@@ -1194,8 +1127,8 @@
     if (params->ldmParams.enableLdm) {
         /* In Long Range Mode, the windowLog is typically oversized.
          * In which case, it's preferable to determine the jobSize
-         * based on chainLog instead. */
-        jobLog = MAX(21, params->cParams.chainLog + 4);
+         * based on cycleLog instead. */
+        jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3);
     } else {
         jobLog = MAX(20, params->cParams.windowLog + 2);
     }
@@ -1249,174 +1182,6 @@
     return (ovLog==0) ? 0 : (size_t)1 << ovLog;
 }
 
-static unsigned
-ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
-{
-    assert(nbWorkers>0);
-    {   size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
-        size_t const jobMaxSize = jobSizeTarget << 2;
-        size_t const passSizeMax = jobMaxSize * nbWorkers;
-        unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
-        unsigned const nbJobsLarge = multiplier * nbWorkers;
-        unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
-        unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
-        return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
-}   }
-
-/* ZSTDMT_compress_advanced_internal() :
- * This is a blocking function : it will only give back control to caller after finishing its compression job.
- */
-static size_t
-ZSTDMT_compress_advanced_internal(
-                ZSTDMT_CCtx* mtctx,
-                void* dst, size_t dstCapacity,
-          const void* src, size_t srcSize,
-          const ZSTD_CDict* cdict,
-                ZSTD_CCtx_params params)
-{
-    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
-    size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
-    unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
-    size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
-    size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize;   /* avoid too small last block */
-    const char* const srcStart = (const char*)src;
-    size_t remainingSrcSize = srcSize;
-    unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize));  /* presumes avgJobSize >= 256 KB, which should be the case */
-    size_t frameStartPos = 0, dstBufferPos = 0;
-    assert(jobParams.nbWorkers == 0);
-    assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
-
-    params.jobSize = (U32)avgJobSize;
-    DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
-                nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
-
-    if ((nbJobs==1) | (params.nbWorkers<=1)) {   /* fallback to single-thread mode : this is a blocking invocation anyway */
-        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
-        DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
-        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
-        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
-    }
-
-    assert(avgJobSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
-    ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
-    /* LDM doesn't even try to load the dictionary in single-ingestion mode */
-    if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
-        return ERROR(memory_allocation);
-
-    FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , "");  /* only expands if necessary */
-
-    {   unsigned u;
-        for (u=0; u<nbJobs; u++) {
-            size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
-            size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
-            buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
-            buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
-            size_t dictSize = u ? overlapSize : 0;
-
-            mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
-            mtctx->jobs[u].prefix.size = dictSize;
-            mtctx->jobs[u].src.start = srcStart + frameStartPos;
-            mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0);  /* avoid job.src.size == 0 */
-            mtctx->jobs[u].consumed = 0;
-            mtctx->jobs[u].cSize = 0;
-            mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
-            mtctx->jobs[u].fullFrameSize = srcSize;
-            mtctx->jobs[u].params = jobParams;
-            /* do not calculate checksum within sections, but write it in header for first section */
-            mtctx->jobs[u].dstBuff = dstBuffer;
-            mtctx->jobs[u].cctxPool = mtctx->cctxPool;
-            mtctx->jobs[u].bufPool = mtctx->bufPool;
-            mtctx->jobs[u].seqPool = mtctx->seqPool;
-            mtctx->jobs[u].serial = &mtctx->serial;
-            mtctx->jobs[u].jobID = u;
-            mtctx->jobs[u].firstJob = (u==0);
-            mtctx->jobs[u].lastJob = (u==nbJobs-1);
-
-            DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u  (%u bytes)", u, (U32)jobSize);
-            DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12);
-            POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
-
-            frameStartPos += jobSize;
-            dstBufferPos += dstBufferCapacity;
-            remainingSrcSize -= jobSize;
-    }   }
-
-    /* collect result */
-    {   size_t error = 0, dstPos = 0;
-        unsigned jobID;
-        for (jobID=0; jobID<nbJobs; jobID++) {
-            DEBUGLOG(5, "waiting for job %u ", jobID);
-            ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
-            while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
-                DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
-                ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
-            }
-            ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
-            DEBUGLOG(5, "ready to write job %u ", jobID);
-
-            {   size_t const cSize = mtctx->jobs[jobID].cSize;
-                if (ZSTD_isError(cSize)) error = cSize;
-                if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
-                if (jobID) {   /* note : job 0 is written directly at dst, which is correct position */
-                    if (!error)
-                        ZSTD_memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize);  /* may overlap when job compressed within dst */
-                    if (jobID >= compressWithinDst) {  /* job compressed into its own buffer, which must be released */
-                        DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
-                        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
-                }   }
-                mtctx->jobs[jobID].dstBuff = g_nullBuffer;
-                mtctx->jobs[jobID].cSize = 0;
-                dstPos += cSize ;
-            }
-        }  /* for (jobID=0; jobID<nbJobs; jobID++) */
-
-        DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
-        if (params.fParams.checksumFlag) {
-            U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
-            if (dstPos + 4 > dstCapacity) {
-                error = ERROR(dstSize_tooSmall);
-            } else {
-                DEBUGLOG(4, "writing checksum : %08X \n", checksum);
-                MEM_writeLE32((char*)dst + dstPos, checksum);
-                dstPos += 4;
-        }   }
-
-        if (!error) DEBUGLOG(4, "compressed size : %u  ", (U32)dstPos);
-        return error ? error : dstPos;
-    }
-}
-
-size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
-                                void* dst, size_t dstCapacity,
-                          const void* src, size_t srcSize,
-                          const ZSTD_CDict* cdict,
-                                ZSTD_parameters params,
-                                int overlapLog)
-{
-    ZSTD_CCtx_params cctxParams = mtctx->params;
-    cctxParams.cParams = params.cParams;
-    cctxParams.fParams = params.fParams;
-    assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
-    cctxParams.overlapLog = overlapLog;
-    return ZSTDMT_compress_advanced_internal(mtctx,
-                                             dst, dstCapacity,
-                                             src, srcSize,
-                                             cdict, cctxParams);
-}
-
-
-size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
-                           void* dst, size_t dstCapacity,
-                     const void* src, size_t srcSize,
-                           int compressionLevel)
-{
-    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
-    int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
-    params.fParams.contentSizeFlag = 1;
-    return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
-}
-
-
 /* ====================================== */
 /* =======      Streaming API     ======= */
 /* ====================================== */
@@ -1441,16 +1206,6 @@
     if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
     if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
 
-    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
-    if (mtctx->singleBlockingThread) {
-        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
-        DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
-        assert(singleThreadParams.nbWorkers == 0);
-        return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
-                                         dict, dictSize, cdict,
-                                         &singleThreadParams, pledgedSrcSize);
-    }
-
     DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
 
     if (mtctx->allJobsCompleted == 0) {   /* previous compression not correctly finished */
@@ -1539,53 +1294,6 @@
     return 0;
 }
 
-size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
-                             const void* dict, size_t dictSize,
-                                   ZSTD_parameters params,
-                                   unsigned long long pledgedSrcSize)
-{
-    ZSTD_CCtx_params cctxParams = mtctx->params;  /* retrieve sticky params */
-    DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
-    cctxParams.cParams = params.cParams;
-    cctxParams.fParams = params.fParams;
-    return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
-                                       cctxParams, pledgedSrcSize);
-}
-
-size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
-                               const ZSTD_CDict* cdict,
-                                     ZSTD_frameParameters fParams,
-                                     unsigned long long pledgedSrcSize)
-{
-    ZSTD_CCtx_params cctxParams = mtctx->params;
-    if (cdict==NULL) return ERROR(dictionary_wrong);   /* method incompatible with NULL cdict */
-    cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
-    cctxParams.fParams = fParams;
-    return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict,
-                                       cctxParams, pledgedSrcSize);
-}
-
-
-/* ZSTDMT_resetCStream() :
- * pledgedSrcSize can be zero == unknown (for the time being)
- * prefer using ZSTD_CONTENTSIZE_UNKNOWN,
- * as `0` might mean "empty" in the future */
-size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
-{
-    if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
-    return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params,
-                                       pledgedSrcSize);
-}
-
-size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
-    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
-    ZSTD_CCtx_params cctxParams = mtctx->params;   /* retrieve sticky params */
-    DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
-    cctxParams.cParams = params.cParams;
-    cctxParams.fParams = params.fParams;
-    return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
-}
-
 
 /* ZSTDMT_writeLastEmptyBlock()
  * Write a single empty block with an end-of-frame to finish a frame.
@@ -1977,6 +1685,16 @@
         pos = 0;
         prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
         hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
+        if ((hash & hitMask) == hitMask) {
+            /* We're already at a sync point so don't load any more until
+             * we're able to flush this sync point.
+             * This likely happened because the job table was full so we
+             * couldn't add our job.
+             */
+            syncPoint.toLoad = 0;
+            syncPoint.flush = 1;
+            return syncPoint;
+        }
     } else {
         /* We don't have enough bytes buffered to initialize the hash, but
          * we know we have at least RSYNC_LENGTH bytes total.
@@ -2031,34 +1749,11 @@
     assert(output->pos <= output->size);
     assert(input->pos  <= input->size);
 
-    if (mtctx->singleBlockingThread) {  /* delegate to single-thread (synchronous) */
-        return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
-    }
-
     if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
         /* current frame being ended. Only flush/end are allowed */
         return ERROR(stage_wrong);
     }
 
-    /* single-pass shortcut (note : synchronous-mode) */
-    if ( (!mtctx->params.rsyncable)   /* rsyncable mode is disabled */
-      && (mtctx->nextJobID == 0)      /* just started */
-      && (mtctx->inBuff.filled == 0)  /* nothing buffered */
-      && (!mtctx->jobReady)           /* no job already created */
-      && (endOp == ZSTD_e_end)        /* end order */
-      && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
-        size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
-                (char*)output->dst + output->pos, output->size - output->pos,
-                (const char*)input->src + input->pos, input->size - input->pos,
-                mtctx->cdict, mtctx->params);
-        if (ZSTD_isError(cSize)) return cSize;
-        input->pos = input->size;
-        output->pos += cSize;
-        mtctx->allJobsCompleted = 1;
-        mtctx->frameEnded = 1;
-        return 0;
-    }
-
     /* fill input buffer */
     if ( (!mtctx->jobReady)
       && (input->size > input->pos) ) {   /* support NULL input */
@@ -2086,8 +1781,16 @@
             mtctx->inBuff.filled += syncPoint.toLoad;
             forwardInputProgress = syncPoint.toLoad>0;
         }
-        if ((input->pos < input->size) && (endOp == ZSTD_e_end))
-            endOp = ZSTD_e_flush;   /* can't end now : not all input consumed */
+    }
+    if ((input->pos < input->size) && (endOp == ZSTD_e_end)) {
+        /* Can't end yet because the input is not fully consumed.
+            * We are in one of these cases:
+            * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job.
+            * - We filled the input buffer: flush this job but don't end the frame.
+            * - We hit a synchronization point: flush this job but don't end the frame.
+            */
+        assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable);
+        endOp = ZSTD_e_flush;
     }
 
     if ( (mtctx->jobReady)
@@ -2106,47 +1809,3 @@
         return remainingToFlush;
     }
 }
-
-
-size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
-{
-    FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
-
-    /* recommended next input size : fill current input buffer */
-    return mtctx->targetSectionSize - mtctx->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
-}
-
-
-static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
-{
-    size_t const srcSize = mtctx->inBuff.filled;
-    DEBUGLOG(5, "ZSTDMT_flushStream_internal");
-
-    if ( mtctx->jobReady     /* one job ready for a worker to pick up */
-      || (srcSize > 0)       /* still some data within input buffer */
-      || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) {  /* need a last 0-size block to end frame */
-           DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
-                        (U32)srcSize, (U32)endFrame);
-        FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
-    }
-
-    /* check if there is any data available to flush */
-    return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
-}
-
-
-size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
-{
-    DEBUGLOG(5, "ZSTDMT_flushStream");
-    if (mtctx->singleBlockingThread)
-        return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
-    return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
-}
-
-size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
-{
-    DEBUGLOG(4, "ZSTDMT_endStream");
-    if (mtctx->singleBlockingThread)
-        return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
-    return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
-}
diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h
index 8aad78c..0a9e551 100644
--- a/lib/compress/zstdmt_compress.h
+++ b/lib/compress/zstdmt_compress.h
@@ -19,24 +19,12 @@
 /* Note : This is an internal API.
  *        These APIs used to be exposed with ZSTDLIB_API,
  *        because it used to be the only way to invoke MT compression.
- *        Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
- *        instead.
- *
- *        If you depend on these APIs and can't switch, then define
- *        ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
- *        However, we may completely remove these functions in a future
- *        release, so please switch soon.
+ *        Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
  *
  *        This API requires ZSTD_MULTITHREAD to be defined during compilation,
  *        otherwise ZSTDMT_createCCtx*() will fail.
  */
 
-#ifdef ZSTD_LEGACY_MULTITHREADED_API
-#  define ZSTDMT_API ZSTDLIB_API
-#else
-#  define ZSTDMT_API
-#endif
-
 /* ===   Dependencies   === */
 #include "../common/zstd_deps.h"   /* size_t */
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
@@ -54,79 +42,34 @@
 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
 
 
+/* ========================================================
+ * ===  Private interface, for use by ZSTD_compress.c   ===
+ * ===  Not exposed in libzstd. Never invoke directly   ===
+ * ======================================================== */
+
 /* ===   Memory management   === */
 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
 /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
-ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
-/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
-ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
-                                                    ZSTD_customMem cMem,
-                                                    ZSTD_threadPool *pool);
-ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
+                                        ZSTD_customMem cMem,
+					ZSTD_threadPool *pool);
+size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
 
-ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
-
-
-/* ===   Simple one-pass compression function   === */
-
-ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
-                                       void* dst, size_t dstCapacity,
-                                 const void* src, size_t srcSize,
-                                       int compressionLevel);
-
-
+size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
 
 /* ===   Streaming functions   === */
 
-ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
-ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);  /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
+size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
 
-ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
-ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-
-ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
-ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
-
-
-/* ===   Advanced functions and parameters  === */
-
-ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
-                                          void* dst, size_t dstCapacity,
-                                    const void* src, size_t srcSize,
-                                    const ZSTD_CDict* cdict,
-                                          ZSTD_parameters params,
-                                          int overlapLog);
-
-ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
-                                        const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
-                                        ZSTD_parameters params,
-                                        unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */
-
-ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
-                                        const ZSTD_CDict* cdict,
-                                        ZSTD_frameParameters fparams,
-                                        unsigned long long pledgedSrcSize);  /* note : zero means empty */
-
-/* ZSTDMT_parameter :
- * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
-typedef enum {
-    ZSTDMT_p_jobSize,     /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
-    ZSTDMT_p_overlapLog,  /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
-    ZSTDMT_p_rsyncable    /* Enables rsyncable mode. */
-} ZSTDMT_parameter;
-
-/* ZSTDMT_setMTCtxParameter() :
- * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
- * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
- * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
- * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
-ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
-
-/* ZSTDMT_getMTCtxParameter() :
- * Query the ZSTDMT_CCtx for a parameter value.
- * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
-ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
-
+/*! ZSTDMT_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+                    const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
+                    const ZSTD_CDict* cdict,
+                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
 
 /*! ZSTDMT_compressStream_generic() :
  *  Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
@@ -135,16 +78,10 @@
  *           0 if fully flushed
  *           or an error code
  *  note : needs to be init using any ZSTD_initCStream*() variant */
-ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
-                                                ZSTD_outBuffer* output,
-                                                ZSTD_inBuffer* input,
-                                                ZSTD_EndDirective endOp);
-
-
-/* ========================================================
- * ===  Private interface, for use by ZSTD_compress.c   ===
- * ===  Not exposed in libzstd. Never invoke directly   ===
- * ======================================================== */
+size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+                                     ZSTD_outBuffer* output,
+                                     ZSTD_inBuffer* input,
+                                     ZSTD_EndDirective endOp);
 
  /*! ZSTDMT_toFlushNow()
   *  Tell how many bytes are ready to be flushed immediately.
@@ -154,15 +91,6 @@
   *  therefore flushing is limited by speed of oldest job. */
 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
 
-/*! ZSTDMT_CCtxParam_setMTCtxParameter()
- *  like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
-size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
-
-/*! ZSTDMT_CCtxParam_setNbWorkers()
- *  Set nbWorkers, and clamp it.
- *  Also reset jobSize and overlapLog */
-size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
-
 /*! ZSTDMT_updateCParams_whileCompressing() :
  *  Updates only a selected set of compression parameters, to remain compatible with current frame.
  *  New parameters will be applied to next compression job. */
@@ -175,17 +103,6 @@
 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
 
 
-/*! ZSTDMT_initCStream_internal() :
- *  Private use only. Init streaming operation.
- *  expects params to be valid.
- *  must receive dict, or cdict, or none, but not both.
- *  @return : 0, or an error code */
-size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
-                    const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
-                    const ZSTD_CDict* cdict,
-                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
-
-
 #if defined (__cplusplus)
 }
 #endif
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index fcf1767..21f846b 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -99,7 +99,7 @@
     assert(dctx->streamStage == zdss_init);
     dctx->format = ZSTD_f_zstd1;
     dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
-    dctx->outBufferMode = ZSTD_obm_buffered;
+    dctx->outBufferMode = ZSTD_bm_buffered;
     dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
 }
 
@@ -525,7 +525,7 @@
             ip += 4;
         }
 
-        frameSizeInfo.compressedSize = ip - ipstart;
+        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
         frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
                                         ? zfh.frameContentSize
                                         : nbBlocks * zfh.blockSizeMax;
@@ -656,13 +656,13 @@
         switch(blockProperties.blockType)
         {
         case bt_compressed:
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
             break;
         case bt_raw :
-            decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
+            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
             break;
         case bt_rle :
-            decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
             break;
         case bt_reserved :
         default:
@@ -699,7 +699,7 @@
     /* Allow caller to get size read */
     *srcPtr = ip;
     *srcSizePtr = remainingSrcSize;
-    return op-ostart;
+    return (size_t)(op-ostart);
 }
 
 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
@@ -732,7 +732,7 @@
             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
             if (ZSTD_isError(decodedSize)) return decodedSize;
 
-            assert(decodedSize <=- dstCapacity);
+            assert(decodedSize <= dstCapacity);
             dst = (BYTE*)dst + decodedSize;
             dstCapacity -= decodedSize;
 
@@ -772,15 +772,13 @@
                 (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
              && (moreThan1Frame==1),
                 srcSize_wrong,
-                "at least one frame successfully completed, but following "
-                "bytes are garbage: it's more likely to be a srcSize error, "
-                "specifying more bytes than compressed size of frame(s). This "
-                "error message replaces ERROR(prefix_unknown), which would be "
-                "confusing, as the first header is actually correct. Note that "
-                "one could be unlucky, it might be a corruption error instead, "
-                "happening right at the place where we expect zstd magic "
-                "bytes. But this is _much_ less likely than a srcSize field "
-                "error.");
+                "At least one frame successfully completed, "
+                "but following bytes are garbage: "
+                "it's more likely to be a srcSize error, "
+                "specifying more input bytes than size of frame(s). "
+                "Note: one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic bytes. "
+                "But this is _much_ less likely than a srcSize field error.");
             if (ZSTD_isError(res)) return res;
             assert(res <= dstCapacity);
             if (res != 0)
@@ -792,7 +790,7 @@
 
     RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
 
-    return (BYTE*)dst - (BYTE*)dststart;
+    return (size_t)((BYTE*)dst - (BYTE*)dststart);
 }
 
 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
@@ -1089,7 +1087,7 @@
                                                 workspace, workspaceSize);
 #else
         size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
-                                                dictPtr, dictEnd - dictPtr,
+                                                dictPtr, (size_t)(dictEnd - dictPtr),
                                                 workspace, workspaceSize);
 #endif
         RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
@@ -1098,7 +1096,7 @@
 
     {   short offcodeNCount[MaxOff+1];
         unsigned offcodeMaxValue = MaxOff, offcodeLog;
-        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
         RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
         RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
         RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
@@ -1113,7 +1111,7 @@
 
     {   short matchlengthNCount[MaxML+1];
         unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
         RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
         RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
         RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
@@ -1128,7 +1126,7 @@
 
     {   short litlengthNCount[MaxLL+1];
         unsigned litlengthMaxValue = MaxLL, litlengthLog;
-        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
         RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
         RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
         RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
@@ -1151,7 +1149,7 @@
             entropy->rep[i] = rep;
     }   }
 
-    return dictPtr - (const BYTE*)dict;
+    return (size_t)(dictPtr - (const BYTE*)dict);
 }
 
 static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
@@ -1414,7 +1412,7 @@
 
 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
 {
-    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
 }
 
 ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
@@ -1431,8 +1429,8 @@
             ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
             return bounds;
         case ZSTD_d_stableOutBuffer:
-            bounds.lowerBound = (int)ZSTD_obm_buffered;
-            bounds.upperBound = (int)ZSTD_obm_stable;
+            bounds.lowerBound = (int)ZSTD_bm_buffered;
+            bounds.upperBound = (int)ZSTD_bm_stable;
             return bounds;
         case ZSTD_d_forceIgnoreChecksum:
             bounds.lowerBound = (int)ZSTD_d_validateChecksum;
@@ -1464,16 +1462,16 @@
 {
     switch (param) {
         case ZSTD_d_windowLogMax:
-            *value = ZSTD_highbit32((U32)dctx->maxWindowSize);
+            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
             return 0;
         case ZSTD_d_format:
-            *value = dctx->format;
+            *value = (int)dctx->format;
             return 0;
         case ZSTD_d_stableOutBuffer:
-            *value = dctx->outBufferMode;
+            *value = (int)dctx->outBufferMode;
             return 0;
         case ZSTD_d_forceIgnoreChecksum:
-            *value = dctx->forceIgnoreChecksum;
+            *value = (int)dctx->forceIgnoreChecksum;
             return 0;
         default:;
     }
@@ -1495,7 +1493,7 @@
             return 0;
         case ZSTD_d_stableOutBuffer:
             CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
-            dctx->outBufferMode = (ZSTD_outBufferMode_e)value;
+            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
             return 0;
         case ZSTD_d_forceIgnoreChecksum:
             CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
@@ -1571,7 +1569,7 @@
 {
     if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
         zds->oversizedDuration++;
-    else 
+    else
         zds->oversizedDuration = 0;
 }
 
@@ -1585,7 +1583,7 @@
 {
     ZSTD_outBuffer const expect = zds->expectedOutBuffer;
     /* No requirement when ZSTD_obm_stable is not enabled. */
-    if (zds->outBufferMode != ZSTD_obm_stable)
+    if (zds->outBufferMode != ZSTD_bm_stable)
         return 0;
     /* Any buffer is allowed in zdss_init, this must be the same for every other call until
      * the context is reset.
@@ -1595,7 +1593,7 @@
     /* The buffer must match our expectation exactly. */
     if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
         return 0;
-    RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!");
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
 }
 
 /* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
@@ -1607,7 +1605,7 @@
             ZSTD_DStream* zds, char** op, char* oend,
             void const* src, size_t srcSize) {
     int const isSkipFrame = ZSTD_isSkipFrame(zds);
-    if (zds->outBufferMode == ZSTD_obm_buffered) {
+    if (zds->outBufferMode == ZSTD_bm_buffered) {
         size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
         size_t const decodedSize = ZSTD_decompressContinue(zds,
                 zds->outBuff + zds->outStart, dstSize, src, srcSize);
@@ -1620,14 +1618,14 @@
         }
     } else {
         /* Write directly into the output buffer */
-        size_t const dstSize = isSkipFrame ? 0 : oend - *op;
+        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
         size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
         FORWARD_IF_ERROR(decodedSize, "");
         *op += decodedSize;
         /* Flushing is not needed. */
         zds->streamStage = zdss_read;
         assert(*op <= oend);
-        assert(zds->outBufferMode == ZSTD_obm_stable);
+        assert(zds->outBufferMode == ZSTD_bm_stable);
     }
     return 0;
 }
@@ -1725,10 +1723,10 @@
             if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
                 && zds->fParams.frameType != ZSTD_skippableFrame
                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
-                size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
                 if (cSize <= (size_t)(iend-istart)) {
                     /* shortcut : using single-pass mode */
-                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
                     DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
                     ip = istart + cSize;
@@ -1740,7 +1738,7 @@
             }   }
 
             /* Check output buffer is large enough for ZSTD_odm_stable. */
-            if (zds->outBufferMode == ZSTD_obm_stable
+            if (zds->outBufferMode == ZSTD_bm_stable
                 && zds->fParams.frameType != ZSTD_skippableFrame
                 && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
                 && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
@@ -1770,7 +1768,7 @@
 
             /* Adapt buffer sizes to frame header instructions */
             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
-                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered
+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
                         ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
                         : 0;
 
@@ -1778,7 +1776,7 @@
 
                 {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
                     int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
-                    
+
                     if (tooSmall || tooLarge) {
                         size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
                         DEBUGLOG(4, "inBuff  : from %u to %u",
@@ -1807,7 +1805,7 @@
 
         case zdss_read:
             DEBUGLOG(5, "stage zdss_read");
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip);
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
                 DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
                 if (neededInSize==0) {  /* end of frame */
                     zds->streamStage = zdss_init;
@@ -1837,7 +1835,7 @@
                     RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
                                     corruption_detected,
                                     "should never happen");
-                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
                 }
                 ip += loadedSize;
                 zds->inPos += loadedSize;
@@ -1851,7 +1849,7 @@
             }
         case zdss_flush:
             {   size_t const toFlushSize = zds->outEnd - zds->outStart;
-                size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
+                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
                 op += flushedSize;
                 zds->outStart += flushedSize;
                 if (flushedSize == toFlushSize) {  /* flush completed */
diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h
index 3c8e955..f80b471 100644
--- a/lib/decompress/zstd_decompress_internal.h
+++ b/lib/decompress/zstd_decompress_internal.h
@@ -99,11 +99,6 @@
     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
 } ZSTD_dictUses_e;
 
-typedef enum {
-    ZSTD_obm_buffered = 0,  /* Buffer the output */
-    ZSTD_obm_stable = 1     /* ZSTD_outBuffer is stable */
-} ZSTD_outBufferMode_e;
-
 struct ZSTD_DCtx_s
 {
     const ZSTD_seqSymbol* LLTptr;
@@ -158,7 +153,7 @@
     U32 legacyVersion;
     U32 hostageByte;
     int noForwardProgress;
-    ZSTD_outBufferMode_e outBufferMode;
+    ZSTD_bufferMode_e outBufferMode;
     ZSTD_outBuffer expectedOutBuffer;
 
     /* workspace */
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index bd9dd86..c78af13 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -955,7 +955,7 @@
   free(selection.dictContent);
 }
 
-COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
         size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
         size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
 
@@ -963,8 +963,8 @@
   size_t largestCompressed = 0;
   BYTE* customDictContentEnd = customDictContent + dictContentSize;
 
-  BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
-  BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
+  BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
+  BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
   double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
 
   if (!largestDictbuffer || !candidateDictBuffer) {
@@ -976,7 +976,7 @@
   /* Initial dictionary size and compressed size */
   memcpy(largestDictbuffer, customDictContent, dictContentSize);
   dictContentSize = ZDICT_finalizeDictionary(
-    largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
+    largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
     samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
 
   if (ZDICT_isError(dictContentSize)) {
@@ -1010,7 +1010,7 @@
   while (dictContentSize < largestDict) {
     memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
     dictContentSize = ZDICT_finalizeDictionary(
-      candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
+      candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
       samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
 
     if (ZDICT_isError(dictContentSize)) {
@@ -1088,7 +1088,7 @@
   {
     const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
                                               dictBufferCapacity, parameters);
-    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
         ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
         totalCompressedSize);
 
diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h
index f2aa0e3..9f1cb5f 100644
--- a/lib/dictBuilder/cover.h
+++ b/lib/dictBuilder/cover.h
@@ -152,6 +152,6 @@
  * smallest dictionary within a specified regression of the compressed size
  * from the largest dictionary.
  */
- COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
                        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
                        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c
index 8265e2c..5e60f24 100644
--- a/lib/dictBuilder/fastcover.c
+++ b/lib/dictBuilder/fastcover.c
@@ -487,7 +487,7 @@
                                                     parameters, segmentFreqs);
 
     const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
-    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
          ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
          totalCompressedSize);
 
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h
index ff2e77f..b782993 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -279,7 +279,7 @@
 #  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
-#  elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
+#  elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
 #  elif (ZDICT_GCC_VERSION >= 301)
 #    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index eb23628..13115be 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -1280,7 +1280,11 @@
 *  Basic Types
 *********************************************************/
 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
 typedef  uint8_t BYTE;
 typedef uint16_t U16;
 typedef  int16_t S16;
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 32d45a6..9abb6d0 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -89,7 +89,11 @@
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
   typedef  uint8_t BYTE;
   typedef uint16_t U16;
   typedef  int16_t S16;
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index b541eae..a19cb20 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -90,7 +90,11 @@
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
   typedef  uint8_t BYTE;
   typedef uint16_t U16;
   typedef  int16_t S16;
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 19fda89..77d5255 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -52,7 +52,11 @@
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
   typedef  uint8_t BYTE;
   typedef uint16_t U16;
   typedef  int16_t S16;
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 243d222..ca8d5c9 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -80,7 +80,11 @@
 *  Basic Types
 *****************************************************************/
 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
   typedef  uint8_t BYTE;
   typedef uint16_t U16;
   typedef  int16_t S16;
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index c56f582..c4ac7db 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -82,7 +82,11 @@
 *  Basic Types
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
   typedef  uint8_t BYTE;
   typedef uint16_t U16;
   typedef  int16_t S16;
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index 9f3a597..049ba47 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -242,7 +242,11 @@
 *  Basic Types
 *****************************************************************/
 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
   typedef  uint8_t BYTE;
   typedef uint16_t U16;
   typedef  int16_t S16;
diff --git a/lib/zstd.h b/lib/zstd.h
index 75d5bb2..b99ce97 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -339,7 +339,9 @@
                                      * for large inputs, by finding large matches at long distance.
                                      * It increases memory usage and window size.
                                      * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
-                                     * except when expressly set to a different value. */
+                                     * except when expressly set to a different value. 
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
     ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
                               * Larger values increase memory usage and compression ratio,
                               * but decrease compression speed.
@@ -413,6 +415,10 @@
      * ZSTD_c_targetCBlockSize
      * ZSTD_c_srcSizeHint
      * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -424,7 +430,11 @@
      ZSTD_c_experimentalParam5=1002,
      ZSTD_c_experimentalParam6=1003,
      ZSTD_c_experimentalParam7=1004,
-     ZSTD_c_experimentalParam8=1005
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009
 } ZSTD_cParameter;
 
 typedef struct {
@@ -1114,21 +1124,40 @@
 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
 
 typedef struct {
-    unsigned int matchPos; /* Match pos in dst */
-    /* If seqDef.offset > 3, then this is seqDef.offset - 3
-     * If seqDef.offset < 3, then this is the corresponding repeat offset
-     * But if seqDef.offset < 3 and litLength == 0, this is the
-     *   repeat offset before the corresponding repeat offset
-     * And if seqDef.offset == 3 and litLength == 0, this is the
-     *   most recent repeat offset - 1
-     */
-    unsigned int offset;
-    unsigned int litLength; /* Literal length */
-    unsigned int matchLength; /* Match length */
-    /* 0 when seq not rep and seqDef.offset otherwise
-     * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
-     */
-    unsigned int rep;
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+    
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               * 
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               * 
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               * 
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
 } ZSTD_Sequence;
 
 typedef struct {
@@ -1273,14 +1302,74 @@
  *           or an error code (if srcSize is too small) */
 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 
-/*! ZSTD_getSequences() :
- * Extract sequences from the sequence store
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ * 
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ * 
  * zc can be used to insert custom compression params.
  * This function invokes ZSTD_compress2
- * @return : number of sequences extracted
+ * 
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
  */
-ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-    size_t outSeqsSize, const void* src, size_t srcSize);
+
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
+
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
+ * 
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ * 
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame. 
+ * 
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ * 
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ * 
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ * 
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ * 
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);
 
 
 /***************************************
@@ -1451,6 +1540,12 @@
  *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
 /*! ZSTD_getCParams() :
  * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  * `estimatedSrcSize` value is optional, select 0 if not known */
@@ -1621,6 +1716,87 @@
  */
 #define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
 
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
+
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ * 
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * 
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
+
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ * 
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ * 
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ * 
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ * 
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
 /*! ZSTD_CCtx_getParameter() :
  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
  *  and store it into int* value.
diff --git a/programs/Makefile b/programs/Makefile
index 6cb4a57..e525d8e 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -15,7 +15,14 @@
 # zstd-decompress : decompressor-only version of zstd
 # ##########################################################################
 
-ZSTDDIR = ../lib
+.PHONY: default
+default: zstd-release
+
+# silent mode by default; verbose can be triggered by V=1 or VERBOSE=1
+$(V)$(VERBOSE).SILENT:
+
+
+ZSTDDIR := ../lib
 
 # Version numbers
 LIBVER_SRC := $(ZSTDDIR)/zstd.h
@@ -33,64 +40,95 @@
 HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 GREP_OPTIONS ?=
 ifeq ($HAVE_COLORNEVER, 1)
-GREP_OPTIONS += --color=never
+  GREP_OPTIONS += --color=never
 endif
 GREP = grep $(GREP_OPTIONS)
 
 ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1)
-ALIGN_LOOP = -falign-loops=32
+  ALIGN_LOOP = -falign-loops=32
 else
-ALIGN_LOOP =
+  ALIGN_LOOP =
 endif
 
-CPPFLAGS+= -DXXH_NAMESPACE=ZSTD_
+DEBUGLEVEL ?= 0
+CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -DDEBUGLEVEL=$(DEBUGLEVEL)
 ifeq ($(OS),Windows_NT)   # MinGW assumed
-CPPFLAGS   += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
+  CPPFLAGS += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
 endif
-CFLAGS  ?= -O3
+CFLAGS   ?= -O3
 DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
             -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
             -Wstrict-prototypes -Wundef -Wpointer-arith \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
             -Wredundant-decls -Wmissing-prototypes -Wc++-compat
-CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
-FLAGS    = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
+FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
 
+ZSTDLIB_COMMON := $(ZSTDDIR)/common
+ZSTDLIB_COMPRESS := $(ZSTDDIR)/compress
+ZSTDLIB_DECOMPRESS := $(ZSTDDIR)/decompress
+ZDICT_DIR := $(ZSTDDIR)/dictBuilder
+ZSTDLEGACY_DIR := $(ZSTDDIR)/legacy
 
-ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
-ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c
-ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
-ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
-ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c
-ZSTDDECOMP_O = $(ZSTDDIR)/decompress/zstd_decompress.o
+vpath %.c $(ZSTDLIB_COMMON) $(ZSTDLIB_COMPRESS) $(ZSTDLIB_DECOMPRESS) $(ZDICT_DIR) $(ZSTDLEGACY_DIR)
+
+ZSTDLIB_COMMON_C := $(wildcard $(ZSTDLIB_COMMON)/*.c)
+ZSTDLIB_COMPRESS_C := $(wildcard $(ZSTDLIB_COMPRESS)/*.c)
+ZSTDLIB_DECOMPRESS_C := $(wildcard $(ZSTDLIB_DECOMPRESS)/*.c)
+ZSTDLIB_CORE_SRC := $(ZSTDLIB_DECOMPRESS_C) $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C)
+ZDICT_SRC := $(wildcard $(ZDICT_DIR)/*.c)
 
 ZSTD_LEGACY_SUPPORT ?= 5
-ZSTDLEGACY_FILES :=
+ZSTDLEGACY_SRC :=
 ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
 ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
-	ZSTDLEGACY_FILES += $(shell ls $(ZSTDDIR)/legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
+  ZSTDLEGACY_SRC += $(shell ls $(ZSTDLEGACY_DIR)/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
 endif
-else
 endif
 
 # Sort files in alphabetical order for reproducible builds
-ZSTDLIB_FILES := $(sort $(wildcard $(ZSTD_FILES)) $(wildcard $(ZSTDLEGACY_FILES)) $(wildcard $(ZDICT_FILES)))
+ZSTDLIB_FULL_SRC = $(sort $(ZSTDLIB_CORE_SRC) $(ZSTDLEGACY_SRC) $(ZDICT_SRC))
+ZSTDLIB_LOCAL_SRC := $(notdir $(ZSTDLIB_FULL_SRC))
+ZSTDLIB_LOCAL_OBJ := $(ZSTDLIB_LOCAL_SRC:.c=.o)
 
-ZSTD_CLI_FILES := $(wildcard *.c)
-ZSTD_CLI_OBJ := $(patsubst %.c,%.o,$(ZSTD_CLI_FILES))
+ZSTD_CLI_SRC := $(wildcard *.c)
+ZSTD_CLI_OBJ := $(ZSTD_CLI_SRC:.c=.o)
+
+ZSTD_ALL_SRC := $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC)
+ZSTD_ALL_OBJ := $(ZSTD_ALL_SRC:.c=.o)
+
+UNAME := $(shell uname)
+
+ifndef BUILD_DIR
+ifeq ($(UNAME), Darwin)
+  HASH ?= md5
+else ifeq ($(UNAME), FreeBSD)
+  HASH ?= gmd5sum
+else ifeq ($(UNAME), OpenBSD)
+  HASH ?= md5
+endif
+HASH ?= md5sum
+
+HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ")
+HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
+ifeq ($(HAVE_HASH),0)
+  $(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
+  BUILD_DIR := obj/generic_noconf
+endif
+endif # BUILD_DIR
 
 # Define *.exe as extension for Windows systems
 ifneq (,$(filter Windows%,$(OS)))
-EXT =.exe
-RES64_FILE = windres/zstd64.res
-RES32_FILE = windres/zstd32.res
+  EXT =.exe
+  RES64_FILE = windres/zstd64.res
+  RES32_FILE = windres/zstd32.res
 ifneq (,$(filter x86_64%,$(shell $(CC) -dumpmachine)))
     RES_FILE = $(RES64_FILE)
 else
     RES_FILE = $(RES32_FILE)
 endif
 else
-EXT =
+  EXT =
 endif
 
 VOID = /dev/null
@@ -103,60 +141,64 @@
 HAVE_PTHREAD := $(shell printf '$(NUM_SYMBOL)include <pthread.h>\nint main(void) { return 0; }' > have_pthread.c && $(CC) $(FLAGS) -o have_pthread$(EXT) have_pthread.c -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0; rm have_pthread.c)
 HAVE_THREAD := $(shell [ "$(HAVE_PTHREAD)" -eq "1" -o -n "$(filter Windows%,$(OS))" ] && echo 1 || echo 0)
 ifeq ($(HAVE_THREAD), 1)
-THREAD_MSG := ==> building with threading support
-THREAD_CPP := -DZSTD_MULTITHREAD
-THREAD_LD := -pthread
+  THREAD_MSG := ==> building with threading support
+  THREAD_CPP := -DZSTD_MULTITHREAD
+  THREAD_LD := -pthread
 else
-THREAD_MSG := $(NO_THREAD_MSG)
+  THREAD_MSG := $(NO_THREAD_MSG)
 endif
 
 # zlib detection
 NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support
 HAVE_ZLIB := $(shell printf '$(NUM_SYMBOL)include <zlib.h>\nint main(void) { return 0; }' > have_zlib.c && $(CC) $(FLAGS) -o have_zlib$(EXT) have_zlib.c -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0; rm have_zlib.c)
 ifeq ($(HAVE_ZLIB), 1)
-ZLIB_MSG := ==> building zstd with .gz compression support
-ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS
-ZLIBLD = -lz
+  ZLIB_MSG := ==> building zstd with .gz compression support
+  ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS
+  ZLIBLD = -lz
 else
-ZLIB_MSG := $(NO_ZLIB_MSG)
+  ZLIB_MSG := $(NO_ZLIB_MSG)
 endif
 
 # lzma detection
 NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support
 HAVE_LZMA := $(shell printf '$(NUM_SYMBOL)include <lzma.h>\nint main(void) { return 0; }' > have_lzma.c && $(CC) $(FLAGS) -o have_lzma$(EXT) have_lzma.c -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0; rm have_lzma.c)
 ifeq ($(HAVE_LZMA), 1)
-LZMA_MSG := ==> building zstd with .xz/.lzma compression support
-LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS
-LZMALD = -llzma
+  LZMA_MSG := ==> building zstd with .xz/.lzma compression support
+  LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS
+  LZMALD = -llzma
 else
-LZMA_MSG := $(NO_LZMA_MSG)
+  LZMA_MSG := $(NO_LZMA_MSG)
 endif
 
 # lz4 detection
 NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support
 HAVE_LZ4 := $(shell printf '$(NUM_SYMBOL)include <lz4frame.h>\n$(NUM_SYMBOL)include <lz4.h>\nint main(void) { return 0; }' > have_lz4.c && $(CC) $(FLAGS) -o have_lz4$(EXT) have_lz4.c -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0; rm have_lz4.c)
 ifeq ($(HAVE_LZ4), 1)
-LZ4_MSG := ==> building zstd with .lz4 compression support
-LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS
-LZ4LD = -llz4
+  LZ4_MSG := ==> building zstd with .lz4 compression support
+  LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS
+  LZ4LD = -llz4
 else
-LZ4_MSG := $(NO_LZ4_MSG)
+  LZ4_MSG := $(NO_LZ4_MSG)
 endif
 
 # explicit backtrace enable/disable for Linux & Darwin
 ifeq ($(BACKTRACE), 0)
-DEBUGFLAGS += -DBACKTRACE_ENABLE=0
+  DEBUGFLAGS += -DBACKTRACE_ENABLE=0
 endif
 ifeq (,$(filter Windows%, $(OS)))
 ifeq ($(BACKTRACE), 1)
-DEBUGFLAGS += -DBACKTRACE_ENABLE=1
-DEBUGFLAGS_LD += -rdynamic
+  DEBUGFLAGS += -DBACKTRACE_ENABLE=1
+  DEBUGFLAGS_LD += -rdynamic
 endif
 endif
 
+SET_CACHE_DIRECTORY = \
+	$(MAKE) --no-print-directory $@ \
+    BUILD_DIR=obj/$(HASH_DIR) \
+    CPPFLAGS="$(CPPFLAGS)" \
+    CFLAGS="$(CFLAGS)" \
+    LDFLAGS="$(LDFLAGS)"
 
-.PHONY: default
-default: zstd-release
 
 .PHONY: all
 all: zstd
@@ -164,21 +206,39 @@
 .PHONY: allVariants
 allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy zstd-dictBuilder
 
-$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
-
+.PHONY: zstd  # must always be run
 zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
 zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
 zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
 ifneq (,$(filter Windows%,$(OS)))
 zstd : $(RES_FILE)
 endif
-zstd : $(ZSTDLIB_FILES) $(ZSTD_CLI_OBJ)
+
+ifndef BUILD_DIR
+# generate BUILD_DIR from flags
+
+zstd:
+	$(SET_CACHE_DIRECTORY)
+
+else
+# BUILD_DIR is defined
+
+ZSTD_OBJ := $(addprefix $(BUILD_DIR)/, $(ZSTD_ALL_OBJ))
+$(BUILD_DIR)/zstd : $(ZSTD_OBJ)
 	@echo "$(THREAD_MSG)"
 	@echo "$(ZLIB_MSG)"
 	@echo "$(LZMA_MSG)"
 	@echo "$(LZ4_MSG)"
+	@echo LINK $@
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
+zstd : $(BUILD_DIR)/zstd
+	ln -sf $< $@
+	@echo zstd build completed
+
+endif  # BUILD_DIR
+
+
 .PHONY: zstd-release
 zstd-release: DEBUGFLAGS := -DBACKTRACE_ENABLE=0
 zstd-release: DEBUGFLAGS_LD :=
@@ -190,12 +250,12 @@
 ifneq (,$(filter Windows%,$(OS)))
 zstd32 : $(RES32_FILE)
 endif
-zstd32 : $(ZSTDLIB_FILES) $(ZSTD_CLI_FILES)
+zstd32 : $(ZSTDLIB_FULL_SRC) $(ZSTD_CLI_SRC)
 	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
 ## zstd-nolegacy: same scope as zstd, with just support of legacy formats removed
 zstd-nolegacy : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
-zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) $(ZSTD_CLI_OBJ)
+zstd-nolegacy : $(ZSTDLIB_CORE_SRC) $(ZDICT_SRC) $(ZSTD_CLI_OBJ)
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
 zstd-nomt : THREAD_CPP :=
@@ -222,7 +282,7 @@
 #        It's unclear at this stage if this is a scenario that must be supported
 .PHONY: zstd-dll
 zstd-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
-zstd-dll : ZSTDLIB_FILES =
+zstd-dll : ZSTDLIB_FULL_SRC =
 zstd-dll : $(ZSTD_CLI_OBJ)
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
@@ -237,24 +297,24 @@
 	./zstd -b $(PROFILE_WITH)
 	./zstd -b7i2 $(PROFILE_WITH)
 	./zstd -b5 $(PROFILE_WITH)
-	$(RM) zstd *.o $(ZSTDDECOMP_O) $(ZSTDDIR)/compress/*.o
+	$(RM) zstd *.o
 	case $(CC) in *clang*) if ! [ -e default.profdata ]; then llvm-profdata merge -output=default.profdata default*.profraw; fi ;; esac
 	$(MAKE) zstd MOREFLAGS=-fprofile-use
 
 ## zstd-small: minimal target, supporting only zstd compression and decompression. no bench. no legacy. no other format.
 zstd-small: CFLAGS = -Os -s
-zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c timefn.c fileio.c
+zstd-frugal zstd-small: $(ZSTDLIB_CORE_SRC) zstdcli.c util.c timefn.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT)
 
-zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c timefn.c fileio.c
+zstd-decompress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_DECOMPRESS_C) zstdcli.c util.c timefn.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT)
 
-zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c timefn.c fileio.c
+zstd-compress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) zstdcli.c util.c timefn.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
 
 ## zstd-dictBuilder: executable supporting dictionary creation and compression (only)
 zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS
-zstd-dictBuilder: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) $(ZDICT_FILES) zstdcli.c util.c timefn.c fileio.c dibio.c
+zstd-dictBuilder: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) $(ZDICT_SRC) zstdcli.c util.c timefn.c fileio.c dibio.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 zstdmt: zstd
@@ -274,12 +334,11 @@
 
 .PHONY: clean
 clean:
-	$(MAKE) -C $(ZSTDDIR) clean
-	@$(RM) $(ZSTDDIR)/decompress/*.o $(ZSTDDIR)/decompress/zstd_decompress.gcda
-	@$(RM) core *.o tmp* result* *.gcda dictionary *.zst \
+	$(RM) core *.o tmp* result* *.gcda dictionary *.zst \
         zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \
         zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \
         zstd-dictBuilder$(EXT) *.gcda default*.profraw default.profdata have_zlib$(EXT)
+	$(RM) -r obj/*
 	@echo Cleaning completed
 
 MD2ROFF = ronn
@@ -309,15 +368,34 @@
 	man ./zstdgrep.1
 	man ./zstdless.1
 
+
+# Generate .h dependencies automatically
+
+DEPFLAGS = -MT $@ -MMD -MP -MF
+
+$(BUILD_DIR)/%.o : %.c $(BUILD_DIR)/%.d | $(BUILD_DIR)
+	@echo CC $@
+	$(COMPILE.c) $(DEPFLAGS) $(BUILD_DIR)/$*.d $(OUTPUT_OPTION) $<
+
+MKDIR ?= mkdir
+$(BUILD_DIR): ; $(MKDIR) -p $@
+
+DEPFILES := $(ZSTD_OBJ:.o=.d)
+$(DEPFILES):
+
+include $(wildcard $(DEPFILES))
+
+
+
 #-----------------------------------------------------------------------------
 # make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
 #-----------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
+ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
 
 HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 EGREP_OPTIONS ?=
 ifeq ($HAVE_COLORNEVER, 1)
-EGREP_OPTIONS += --color=never
+  EGREP_OPTIONS += --color=never
 endif
 EGREP = egrep $(EGREP_OPTIONS)
 AWK = awk
@@ -328,7 +406,7 @@
 ## list: Print all targets and their descriptions (if provided)
 .PHONY: list
 list:
-	@TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \
+	TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \
 		| $(AWK) -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \
 		| $(EGREP) -v  -e '^[^[:alnum:]]' | sort); \
 	{ \
@@ -355,17 +433,17 @@
 mandir      ?= $(datarootdir)/man
 man1dir     ?= $(mandir)/man1
 
-ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS))
-MANDIR  ?= $(PREFIX)/man
-MAN1DIR ?= $(MANDIR)/man1
+ifneq (,$(filter $(UNAME),OpenBSD FreeBSD NetBSD DragonFly SunOS))
+  MANDIR  ?= $(PREFIX)/man
+  MAN1DIR ?= $(MANDIR)/man1
 else
-MAN1DIR ?= $(man1dir)
+  MAN1DIR ?= $(man1dir)
 endif
 
-ifneq (,$(filter $(shell uname),SunOS))
-INSTALL ?= ginstall
+ifneq (,$(filter $(UNAME),SunOS))
+  INSTALL ?= ginstall
 else
-INSTALL ?= install
+  INSTALL ?= install
 endif
 
 INSTALL_PROGRAM ?= $(INSTALL)
@@ -374,36 +452,39 @@
 INSTALL_MAN     ?= $(INSTALL_DATA)
 
 .PHONY: install
-install: zstd
+install:
+	# generate zstd only if not already present
+	[ -e zstd ] || $(MAKE) zstd-release
+	[ -e $(DESTDIR)$(BINDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/
+	[ -e $(DESTDIR)$(MAN1DIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR)/
 	@echo Installing binaries
-	@$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MAN1DIR)/
-	@$(INSTALL_PROGRAM) zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT)
-	@ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat$(EXT)
-	@ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/unzstd$(EXT)
-	@ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdmt$(EXT)
-	@$(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless
-	@$(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep
+	$(INSTALL_PROGRAM) zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT)
+	ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat$(EXT)
+	ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/unzstd$(EXT)
+	ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdmt$(EXT)
+	$(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless
+	$(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep
 	@echo Installing man pages
-	@$(INSTALL_MAN) zstd.1 $(DESTDIR)$(MAN1DIR)/zstd.1
-	@ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/zstdcat.1
-	@ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/unzstd.1
-	@$(INSTALL_MAN) zstdgrep.1 $(DESTDIR)$(MAN1DIR)/zstdgrep.1
-	@$(INSTALL_MAN) zstdless.1 $(DESTDIR)$(MAN1DIR)/zstdless.1
+	$(INSTALL_MAN) zstd.1 $(DESTDIR)$(MAN1DIR)/zstd.1
+	ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/zstdcat.1
+	ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/unzstd.1
+	$(INSTALL_MAN) zstdgrep.1 $(DESTDIR)$(MAN1DIR)/zstdgrep.1
+	$(INSTALL_MAN) zstdless.1 $(DESTDIR)$(MAN1DIR)/zstdless.1
 	@echo zstd installation completed
 
 .PHONY: uninstall
 uninstall:
-	@$(RM) $(DESTDIR)$(BINDIR)/zstdgrep
-	@$(RM) $(DESTDIR)$(BINDIR)/zstdless
-	@$(RM) $(DESTDIR)$(BINDIR)/zstdcat
-	@$(RM) $(DESTDIR)$(BINDIR)/unzstd
-	@$(RM) $(DESTDIR)$(BINDIR)/zstdmt
-	@$(RM) $(DESTDIR)$(BINDIR)/zstd
-	@$(RM) $(DESTDIR)$(MAN1DIR)/zstdless.1
-	@$(RM) $(DESTDIR)$(MAN1DIR)/zstdgrep.1
-	@$(RM) $(DESTDIR)$(MAN1DIR)/zstdcat.1
-	@$(RM) $(DESTDIR)$(MAN1DIR)/unzstd.1
-	@$(RM) $(DESTDIR)$(MAN1DIR)/zstd.1
+	$(RM) $(DESTDIR)$(BINDIR)/zstdgrep
+	$(RM) $(DESTDIR)$(BINDIR)/zstdless
+	$(RM) $(DESTDIR)$(BINDIR)/zstdcat
+	$(RM) $(DESTDIR)$(BINDIR)/unzstd
+	$(RM) $(DESTDIR)$(BINDIR)/zstdmt
+	$(RM) $(DESTDIR)$(BINDIR)/zstd
+	$(RM) $(DESTDIR)$(MAN1DIR)/zstdless.1
+	$(RM) $(DESTDIR)$(MAN1DIR)/zstdgrep.1
+	$(RM) $(DESTDIR)$(MAN1DIR)/zstdcat.1
+	$(RM) $(DESTDIR)$(MAN1DIR)/unzstd.1
+	$(RM) $(DESTDIR)$(MAN1DIR)/zstd.1
 	@echo zstd programs successfully uninstalled
 
 endif
diff --git a/programs/README.md b/programs/README.md
index 53f716b..cf7f5ba 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -3,7 +3,7 @@
 
 Command Line Interface (CLI) can be created using the `make` command without any additional parameters.
 There are however other Makefile targets that create different variations of CLI:
-- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and support for decompression of legacy zstd formats
+- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and supports decompression of legacy zstd formats
 - `zstd_nolegacy` : Same as `zstd` but without support for legacy zstd formats
 - `zstd-small` : CLI optimized for minimal size; no dictionary builder, no benchmark, and no support for legacy zstd formats
 - `zstd-compress` : version of CLI which can only compress into zstd format
@@ -147,76 +147,91 @@
 Arguments :
  -#     : # compression level (1-19, default: 3)
  -d     : decompression
- -D file: use `file` as Dictionary
- -o file: result stored into `file` (only if 1 input file)
- -f     : overwrite output without prompting and (de)compress links
+ -D DICT: use DICT as Dictionary for compression or decompression
+ -o file: result stored into `file` (only 1 output file)
+ -f     : overwrite output without prompting, also (de)compress links
 --rm    : remove source file(s) after successful de/compression
  -k     : preserve source file(s) (default)
  -h/-H  : display help/long help and exit
 
 Advanced arguments :
  -V     : display Version number and exit
+ -c     : force write to standard output, even if it is the console
  -v     : verbose mode; specify multiple times to increase verbosity
  -q     : suppress warnings; specify twice to suppress errors too
- -c     : force write to standard output, even if it is the console
- -l     : print information about zstd compressed files
---exclude-compressed:  only compress files that are not previously compressed
+--no-progress : do not display the progress counter
+ -r     : operate recursively on directories
+--filelist FILE : read list of files to operate upon from FILE
+--output-dir-flat DIR : processed files are stored into DIR
+--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure
+--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled). If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate).
+--      : All arguments after "--" are treated as files
+
+Advanced compression arguments :
 --ultra : enable levels beyond 19, up to 22 (requires more memory)
 --long[=#]: enable long distance matching with given window log (default: 27)
 --fast[=#]: switch to very fast compression levels (default: 1)
 --adapt : dynamically adapt compression level to I/O conditions
---stream-size=# : optimize compression parameters for streaming input of given number of bytes
---size-hint=# optimize compression parameters for streaming input of approximately this size
---target-compressed-block-size=# : make compressed block near targeted size
  -T#    : spawns # compression threads (default: 1, 0==# cores)
  -B#    : select size of each job (default: 0==automatic)
+--single-thread : use a single thread for both I/O and compression (result slightly different than -T1)
 --rsyncable : compress using a rsync-friendly method (-B sets block size)
---no-dictID : don't write dictID into header (dictionary compression)
---[no-]check : integrity check (default: enabled)
+--exclude-compressed: only compress files that are not already compressed
+--stream-size=# : specify size of streaming input from `stdin`
+--size-hint=# optimize compression parameters for streaming input of approximately this size
+--target-compressed-block-size=# : generate compressed block of approximately targeted size
+--no-dictID : don't write dictID into header (dictionary compression only)
 --[no-]compress-literals : force (un)compressed literals
- -r     : operate recursively on directories
---output-dir-flat[=directory]: all resulting files stored into `directory`.
 --format=zstd : compress files to the .zst format (default)
 --format=gzip : compress files to the .gz format
+--format=xz : compress files to the .xz format
+--format=lzma : compress files to the .lzma format
+--format=lz4 : compress files to the .lz4 format
+
+Advanced decompression arguments :
+ -l     : print information about zstd compressed files
 --test  : test compressed file integrity
---[no-]sparse : sparse mode (default: disabled)
  -M#    : Set a memory usage limit for decompression
---no-progress : do not display the progress bar
---      : All arguments after "--" are treated as files
+--[no-]sparse : sparse mode (default: disabled)
 
 Dictionary builder :
 --train ## : create a dictionary from a training set of files
 --train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args
 --train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args
 --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9)
- -o file : `file` is dictionary name (default: dictionary)
+ -o DICT : DICT is dictionary name (default: dictionary)
 --maxdict=# : limit dictionary to specified size (default: 112640)
 --dictID=# : force dictionary ID to specified value (default: random)
 
 Benchmark arguments :
  -b#    : benchmark file(s), using # compression level (default: 3)
- -e#    : test all compression levels from -bX to # (default: 1)
+ -e#    : test all compression levels successively from -b# to -e# (default: 1)
  -i#    : minimum evaluation time in seconds (default: 3s)
  -B#    : cut file into independent blocks of size # (default: no block)
+ -S     : output one benchmark result per input file (default: consolidated result)
 --priority=rt : set process priority to real-time
 ```
 
 ### Passing parameters through Environment Variables
+There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner.
+Using environment variables for this purpose has security implications.
+Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL` and `ZSTD_NBTHREADS`.
+
 `ZSTD_CLEVEL` can be used to modify the default compression level of `zstd`
 (usually set to `3`) to another value between 1 and 19 (the "normal" range).
-`ZSTD_NBTHREADS` can be used to specify number of threads that `zstd` will use during compression, which by default is `1`.
+
+`ZSTD_NBTHREADS` can be used to specify a number of threads
+that `zstd` will use for compression, which by default is `1`.
 This functionality only exists when `zstd` is compiled with multithread support.
+`0` means "use as many threads as detected cpu cores on local system".
 The max # of threads is capped at: `ZSTDMT_NBWORKERS_MAX==200`.
 
 This functionality can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments.
 One such scenario is `tar --zstd`.
 As `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` only replace the default compression level
-and number of threads, respectively, they can both be overridden by corresponding command line arguments:
+and number of threads respectively, they can both be overridden by corresponding command line arguments:
 `-#` for compression level and `-T#` for number of threads.
 
-There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner.
-Using environment variables for this purpose has security implications.
-Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL` and `ZSTD_NBTHREADS`.
 
 ### Long distance matching mode
 The long distance matching mode, enabled with `--long`, is designed to improve
diff --git a/programs/fileio.c b/programs/fileio.c
index e02c3f8..65f2d53 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -909,7 +909,7 @@
     if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
         DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
     comprParams->windowLog = MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog);
-    if (fileWindowLog > ZSTD_cycleLog(cParams.hashLog, cParams.strategy)) {
+    if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
         if (!prefs->ldmFlag)
             DISPLAYLEVEL(1, "long mode automatically triggered\n");
         FIO_setLdmFlag(prefs, 1);
@@ -1007,12 +1007,12 @@
     return ress;
 }
 
-static void FIO_freeCResources(cRess_t ress)
+static void FIO_freeCResources(const cRess_t* const ress)
 {
-    free(ress.srcBuffer);
-    free(ress.dstBuffer);
-    free(ress.dictBuffer);
-    ZSTD_freeCStream(ress.cctx);   /* never fails */
+    free(ress->srcBuffer);
+    free(ress->dstBuffer);
+    free(ress->dictBuffer);
+    ZSTD_freeCStream(ress->cctx);   /* never fails */
 }
 
 
@@ -1722,7 +1722,7 @@
 
 #define DISPLAY_LEVEL_DEFAULT 2
 
-    FIO_freeCResources(ress);
+    FIO_freeCResources(&ress);
     return result;
 }
 
@@ -1802,7 +1802,7 @@
     assert(outFileName != NULL || suffix != NULL);
     if (outFileName != NULL) {   /* output into a single destination (stdout typically) */
         if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
-            FIO_freeCResources(ress);
+            FIO_freeCResources(&ress);
             return 1;
         }
         ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName);
@@ -1855,7 +1855,7 @@
                         fCtx->totalBytesInput, fCtx->totalBytesOutput);
     }
 
-    FIO_freeCResources(ress);
+    FIO_freeCResources(&ress);
     return error;
 }
 
diff --git a/programs/timefn.h b/programs/timefn.h
index eb3c130..5d2818e 100644
--- a/programs/timefn.h
+++ b/programs/timefn.h
@@ -28,7 +28,11 @@
 ******************************************/
 
 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# if defined(_AIX)
+#  include <inttypes.h>
+# else
+#  include <stdint.h> /* intptr_t */
+# endif
   typedef uint64_t           PTime;  /* Precise Time */
 #else
   typedef unsigned long long PTime;  /* does not support compilers without long long support */
diff --git a/programs/util.c b/programs/util.c
index 980ab5a..5386d00 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -788,7 +788,7 @@
     outDirBuffer = (char *) malloc(dir1Size + dir2Size + 2);
     CONTROL(outDirBuffer != NULL);
 
-    strncpy(outDirBuffer, dir1, dir1Size);
+    memcpy(outDirBuffer, dir1, dir1Size);
     outDirBuffer[dir1Size] = '\0';
 
     if (dir2[0] == '.')
@@ -800,7 +800,7 @@
         *buffer = PATH_SEP;
         buffer++;
     }
-    strncpy(buffer, dir2, dir2Size);
+    memcpy(buffer, dir2, dir2Size);
     buffer[dir2Size] = '\0';
 
     return outDirBuffer;
diff --git a/tests/Makefile b/tests/Makefile
index d347a94..7e7d553 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -94,23 +94,23 @@
 # note : broken : requires symbols unavailable from dynamic library
 dll: fuzzer-dll zstreamtest-dll
 
-PHONY: zstd zstd32 zstd-nolegacy  # must be phony, only external makefile knows how to build them, or if they need an update
+PHONY: zstd zstd32 zstd-nolegacy  # phony: only external makefile knows how to build or update them
 zstd zstd32 zstd-nolegacy:
-	$(MAKE) -C $(PRGDIR) $@ MOREFLAGS+="$(DEBUGFLAGS)"
+	$(MAKE) -C $(PRGDIR) $@ MOREFLAGS+="$(DEBUGFLAGS)" DEBUGLEVEL=$(DEBUGLEVEL)
 
 gzstd:
-	$(MAKE) -C $(PRGDIR) $@ HAVE_ZLIB=1 MOREFLAGS+="$(DEBUGFLAGS)"
+	$(MAKE) -C $(PRGDIR) $@ HAVE_ZLIB=1 MOREFLAGS+="$(DEBUGFLAGS)" DEBUGLEVEL=$(DEBUGLEVEL)
 
 .PHONY: libzstd
 libzstd :
-	$(MAKE) -C $(ZSTDDIR) libzstd
+	$(MAKE) -C $(ZSTDDIR) libzstd DEBUGLEVEL=$(DEBUGLEVEL)
 
 %-dll : libzstd
 %-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
 
 .PHONY: zstd-staticLib
 zstd-staticLib :
-	$(MAKE) -C $(ZSTDDIR) libzstd.a
+	$(MAKE) -C $(ZSTDDIR) libzstd.a DEBUGLEVEL=$(DEBUGLEVEL)
 
 zstdm_%.o : $(ZSTDDIR)/common/%.c
 	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
@@ -255,7 +255,8 @@
 #----------------------------------------------------------------------------------
 # valgrind tests are validated only for some posix platforms
 #----------------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
+UNAME := $(shell uname)
+ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
 HOST_OS = POSIX
 
 valgrindTest: VALGRIND = valgrind --leak-check=full --show-leak-kinds=all --error-exitcode=1
@@ -275,8 +276,8 @@
 endif
 
 
-ifneq (,$(filter MINGW% MSYS%,$(shell uname)))
-HOST_OS = MSYS
+ifneq (,$(filter MINGW% MSYS%,$(UNAME)))
+  HOST_OS = MSYS
 endif
 
 
@@ -286,8 +287,8 @@
 ifneq (,$(filter $(HOST_OS),MSYS POSIX))
 
 DIFF:=diff
-ifneq (,$(filter $(shell uname),SunOS))
-DIFF:=gdiff
+ifneq (,$(filter $(UNAME),SunOS))
+  DIFF:=gdiff
 endif
 
 .PHONY: list
@@ -380,7 +381,6 @@
 
 test-zstream: zstreamtest
 	$(QEMU_SYS) ./zstreamtest -v $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
-	$(QEMU_SYS) ./zstreamtest --mt -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
 	$(QEMU_SYS) ./zstreamtest --newapi -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
 
 test-zstream32: zstreamtest32
diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore
index b6fc6e5..9bd280c 100644
--- a/tests/fuzz/.gitignore
+++ b/tests/fuzz/.gitignore
@@ -13,7 +13,12 @@
 stream_decompress
 stream_round_trip
 zstd_frame_info
+decompress_dstSize_tooSmall
+fse_read_ncount
+sequence_compression_api
 fuzz-*.log
+rt_lib_*
+d_lib_*
 
 # misc
 trace
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index d88fae9..36232a8 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -97,9 +97,10 @@
 	raw_dictionary_round_trip \
 	dictionary_stream_round_trip \
 	decompress_dstSize_tooSmall \
-	fse_read_ncount
+	fse_read_ncount \
+	sequence_compression_api
 
-all: $(FUZZ_TARGETS)
+all: libregression.a $(FUZZ_TARGETS)
 
 rt_lib_common_%.o: $(ZSTDDIR)/common/%.c
 	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
@@ -188,6 +189,9 @@
 fse_read_ncount: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o
 	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o $(LIB_FUZZING_ENGINE) -o $@
 
+sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o $(LIB_FUZZING_ENGINE) -o $@
+
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
 
@@ -209,12 +213,10 @@
 	$(PYTHON) ./fuzz.py regression all
 
 clean:
-	@$(RM) *.a *.o
-	@$(RM) simple_round_trip stream_round_trip simple_decompress \
-           stream_decompress block_decompress block_round_trip \
-           simple_compress dictionary_round_trip dictionary_decompress \
-           zstd_frame_info
+	@$(RM) *.a *.o $(FUZZ_TARGETS)
+	@echo Cleaning completed
 
 cleanall:
 	@$(RM) -r Fuzzer
 	@$(RM) -r corpora
+	@echo Cleaning completed
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index 24430a2..ef94a53 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -61,6 +61,7 @@
     'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
     'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
     'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
+    'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
 }
 TARGETS = list(TARGET_INFO.keys())
 ALL_TARGETS = TARGETS + ['all']
diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c
new file mode 100644
index 0000000..e838687
--- /dev/null
+++ b/tests/fuzz/sequence_compression_api.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2016-2020, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**
+ * This fuzz target performs a zstd round-trip test by generating an arbitrary
+ * array of sequences, generating the associated source buffer, calling
+ * ZSTD_compressSequences(), and then decompresses and compares the result with
+ * the original generated source buffer.
+ */
+
+#define ZSTD_STATIC_LINKING_ONLY
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+#include "fuzz_data_producer.h"
+
+static ZSTD_CCtx *cctx = NULL;
+static ZSTD_DCtx *dctx = NULL;
+static void* literalsBuffer = NULL;
+static void* generatedSrc = NULL;
+static ZSTD_Sequence* generatedSequences = NULL;
+
+#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
+#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
+#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */
+#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */
+#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
+
+/* Deterministic random number generator */
+#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static uint32_t FUZZ_RDG_rand(uint32_t* src)
+{
+    static const uint32_t prime1 = 2654435761U;
+    static const uint32_t prime2 = 2246822519U;
+    uint32_t rand32 = *src;
+    rand32 *= prime1;
+    rand32 ^= prime2;
+    rand32  = FUZZ_RDG_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 5;
+}
+
+/* Make a pseudorandom string - this simple function exists to avoid
+ * taking a dependency on datagen.h to have RDG_genBuffer().
+ */
+static char *generatePseudoRandomString(char *str, size_t size) {
+    const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
+    uint32_t seed = 0;
+    if (size) {
+        for (size_t n = 0; n < size; n++) {
+            int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
+            str[n] = charset[key];
+        }
+    }
+    return str;
+}
+
+/* Returns size of source buffer */
+static size_t decodeSequences(void* dst, size_t nbSequences,
+                              size_t literalsSize, const void* dict, size_t dictSize) {
+    const uint8_t* litPtr = literalsBuffer;
+    const uint8_t* const litBegin = literalsBuffer;
+    const uint8_t* const litEnd = literalsBuffer + literalsSize;
+    const uint8_t* dictPtr = dict;
+    uint8_t* op = dst;
+    const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
+    size_t generatedSrcBufferSize = 0;
+    size_t bytesWritten = 0;
+    uint32_t lastLLSize;
+
+    for (size_t i = 0; i < nbSequences; ++i) {
+        FUZZ_ASSERT(generatedSequences[i].matchLength != 0);
+        FUZZ_ASSERT(generatedSequences[i].offset != 0);
+
+        if (litPtr + generatedSequences[i].litLength > litEnd) {
+            litPtr = litBegin;
+        }
+        ZSTD_memcpy(op, litPtr, generatedSequences[i].litLength);
+        bytesWritten += generatedSequences[i].litLength;
+        op += generatedSequences[i].litLength;
+        litPtr += generatedSequences[i].litLength;
+
+        FUZZ_ASSERT(generatedSequences[i].offset != 0);
+        /* Copy over the match */
+        {   size_t matchLength = generatedSequences[i].matchLength;
+            size_t j = 0;
+            size_t k = 0;
+            if (dictSize != 0) {
+                if (generatedSequences[i].offset > bytesWritten) {
+                    /* Offset goes into the dictionary */
+                    size_t offsetFromEndOfDict = generatedSequences[i].offset - bytesWritten;
+                    for (; k < offsetFromEndOfDict && k < matchLength; ++k) {
+                        op[k] = dictPtr[dictSize - offsetFromEndOfDict + k];
+                    }
+                    matchLength -= k;
+                    op += k;
+                }
+            }
+            for (; j < matchLength; ++j) {
+                op[j] = op[j-(int)generatedSequences[i].offset];
+            }
+            op += j;
+            FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
+            bytesWritten += generatedSequences[i].matchLength;
+        }
+    }
+    generatedSrcBufferSize = bytesWritten;
+    FUZZ_ASSERT(litPtr <= litEnd);
+    lastLLSize = (uint32_t)(litEnd - litPtr);
+    if (lastLLSize <= oend - op) {
+        ZSTD_memcpy(op, litPtr, lastLLSize);
+        generatedSrcBufferSize += lastLLSize;
+    }
+    return generatedSrcBufferSize;
+}
+
+/* Returns nb sequences generated
+ * TODO: Add repcode fuzzing once we support repcode match splits
+ */
+static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
+                                      size_t literalsSizeLimit, size_t dictSize,
+                                      size_t windowLog) {
+    uint32_t bytesGenerated = 0;
+    uint32_t nbSeqGenerated = 0;
+    uint32_t litLength;
+    uint32_t matchLength;
+    uint32_t matchBound;
+    uint32_t offset;
+    uint32_t offsetBound;
+    uint32_t repCode = 0;
+    uint32_t isFirstSequence = 1;
+    uint32_t windowSize = 1 << windowLog;
+
+    while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ
+         && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
+         && !FUZZ_dataProducer_empty(producer)) {
+        matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
+        litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit)
+                                                     : FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit);
+        bytesGenerated += litLength;
+        if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
+            break;
+        }
+        offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize;
+        offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
+        if (dictSize > 0 && bytesGenerated <= windowSize) {
+            /* Prevent match length from being such that it would be associated with an offset too large
+             * from the decoder's perspective. If not possible (match would be too small),
+             * then reduce the offset if necessary.
+             */
+            size_t bytesToReachWindowSize = windowSize - bytesGenerated;
+            if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
+                uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
+                offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
+            } else {
+                matchBound = bytesToReachWindowSize > ZSTD_FUZZ_MATCHLENGTH_MAXSIZE ?
+                             ZSTD_FUZZ_MATCHLENGTH_MAXSIZE : bytesToReachWindowSize;
+            }
+        }
+        matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
+        bytesGenerated += matchLength;
+        if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
+            break;
+        }
+        ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
+        generatedSequences[nbSeqGenerated++] = seq;
+        isFirstSequence = 0;
+    }
+
+    return nbSeqGenerated;
+}
+
+static size_t roundTripTest(void *result, size_t resultCapacity,
+                            void *compressed, size_t compressedCapacity,
+                            size_t srcSize,
+                            const void *dict, size_t dictSize,
+                            size_t generatedSequencesSize,
+                            size_t wLog, unsigned cLevel, unsigned hasDict)
+{
+    size_t cSize;
+    size_t dSize;
+    ZSTD_CDict* cdict = NULL;
+    ZSTD_DDict* ddict = NULL;
+
+    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
+    /* TODO: Add block delim mode fuzzing */
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
+    if (hasDict) {
+        FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize));
+        FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize));
+    }
+
+    cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
+                                   generatedSequences, generatedSequencesSize,
+                                   generatedSrc, srcSize);
+    FUZZ_ZASSERT(cSize);
+    dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
+    FUZZ_ZASSERT(dSize);
+
+    if (cdict) {
+        ZSTD_freeCDict(cdict);
+    }
+    if (ddict) {
+        ZSTD_freeDDict(ddict);
+    }
+    return dSize;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    void* rBuf;
+    size_t rBufSize;
+    void* cBuf;
+    size_t cBufSize;
+    size_t generatedSrcSize;
+    size_t nbSequences;
+    void* dictBuffer;
+    size_t dictSize = 0;
+    unsigned hasDict;
+    unsigned wLog;
+    int cLevel;
+
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    if (literalsBuffer == NULL) {
+        literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
+        literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
+    }
+
+    hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
+    if (hasDict) {
+        dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
+        dictBuffer = FUZZ_malloc(dictSize);
+        dictBuffer = generatePseudoRandomString(dictBuffer, dictSize);
+    }
+    /* Generate window log first so we dont generate offsets too large */
+    wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32);
+    cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
+
+    if (!generatedSequences) {
+        generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
+    }
+    if (!generatedSrc) {
+        generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
+    }
+    nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog);
+    generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize);
+    cBufSize = ZSTD_compressBound(generatedSrcSize);
+    cBuf = FUZZ_malloc(cBufSize);
+
+    rBufSize = generatedSrcSize;
+    rBuf = FUZZ_malloc(rBufSize);
+
+    if (!cctx) {
+        cctx = ZSTD_createCCtx();
+        FUZZ_ASSERT(cctx);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+
+    size_t const result = roundTripTest(rBuf, rBufSize,
+                                        cBuf, cBufSize,
+                                        generatedSrcSize,
+                                        dictBuffer, dictSize,
+                                        nbSequences,
+                                        wLog, cLevel, hasDict);
+    FUZZ_ZASSERT(result);
+    FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size");
+    FUZZ_ASSERT_MSG(!FUZZ_memcmp(generatedSrc, rBuf, generatedSrcSize), "Corruption!");
+
+    free(rBuf);
+    free(cBuf);
+    FUZZ_dataProducer_free(producer);
+    if (hasDict) {
+        free(dictBuffer);
+    }
+#ifndef STATEFUL_FUZZING
+    ZSTD_freeCCtx(cctx); cctx = NULL;
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+    free(generatedSequences); generatedSequences = NULL;
+    free(generatedSrc); generatedSrc = NULL;
+    free(literalsBuffer); literalsBuffer = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 8b10078..2e5d70e 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -32,7 +32,6 @@
 #include "fse.h"
 #include "zstd.h"         /* ZSTD_VERSION_STRING */
 #include "zstd_errors.h"  /* ZSTD_getErrorCode */
-#include "zstdmt_compress.h"
 #define ZDICT_STATIC_LINKING_ONLY
 #include "zdict.h"        /* ZDICT_trainFromBuffer */
 #include "mem.h"
@@ -306,26 +305,34 @@
 
 #endif
 
-static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size)
+static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
+                                BYTE* src, size_t size, ZSTD_sequenceFormat_e format)
 {
     size_t i;
     size_t j;
-    for(i = 0; i < seqsSize - 1; ++i) {
-        assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size);
-        assert(src + seqs[i].litLength + seqs[i].matchLength < src + size);
+    for(i = 0; i < seqsSize; ++i) {
+        assert(dst + seqs[i].litLength + seqs[i].matchLength <= dst + size);
+        assert(src + seqs[i].litLength + seqs[i].matchLength <= src + size);
+        if (format == ZSTD_sf_noBlockDelimiters) {
+            assert(seqs[i].matchLength != 0 || seqs[i].offset != 0);
+        }
 
         memcpy(dst, src, seqs[i].litLength);
         dst += seqs[i].litLength;
         src += seqs[i].litLength;
         size -= seqs[i].litLength;
 
-        for (j = 0; j < seqs[i].matchLength; ++j)
-            dst[j] = dst[j - seqs[i].offset];
-        dst += seqs[i].matchLength;
-        src += seqs[i].matchLength;
-        size -= seqs[i].matchLength;
+        if (seqs[i].offset != 0) {
+            for (j = 0; j < seqs[i].matchLength; ++j)
+                dst[j] = dst[j - seqs[i].offset];
+            dst += seqs[i].matchLength;
+            src += seqs[i].matchLength;
+            size -= seqs[i].matchLength;
+        }
     }
-    memcpy(dst, src, size);
+    if (format == ZSTD_sf_noBlockDelimiters) {
+        memcpy(dst, src, size);
+    }
 }
 
 /*=============================================
@@ -372,6 +379,19 @@
         DISPLAYLEVEL(3, "%u (OK) \n", vn);
     }
 
+    DISPLAYLEVEL(3, "test%3u : ZSTD_adjustCParams : ", testNb++);
+    {
+        ZSTD_compressionParameters params;
+        memset(&params, 0, sizeof(params));
+        params.windowLog = 10;
+        params.hashLog = 19;
+        params.chainLog = 19;
+        params = ZSTD_adjustCParams(params, 1000, 100000);
+        if (params.hashLog != 18) goto _output_error;
+        if (params.chainLog != 17) goto _output_error;
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3u : compress %u bytes : ", testNb++, (unsigned)CNBuffSize);
     {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
         if (cctx==NULL) goto _output_error;
@@ -705,6 +725,48 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : LDM + opt parser with small uncompressible block ", testNb++);
+    {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        ZSTD_DCtx* dctx = ZSTD_createDCtx();
+        size_t const srcSize = 300 KB;
+        size_t const flushSize = 128 KB + 5;
+        size_t const dstSize = ZSTD_compressBound(srcSize);
+        char* src = (char*)CNBuffer;
+        char* dst = (char*)compressedBuffer;
+
+        ZSTD_outBuffer out = { dst, dstSize, 0 };
+        ZSTD_inBuffer in = { src, flushSize, 0 };
+
+        if (!cctx || !dctx) {
+            DISPLAY("Not enough memory, aborting\n");
+            testResult = 1;
+            goto _end;
+        }
+
+        RDG_genBuffer(src, srcSize, 0.5, 0.5, seed);
+        /* Force an LDM to exist that crosses block boundary into uncompressible block */
+        memcpy(src + 125 KB, src, 3 KB + 5);
+
+        /* Enable MT, LDM, and opt parser */
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19));
+
+        /* Flushes a block of 128 KB and block of 5 bytes */
+        CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+
+        /* Compress the rest */
+        in.size = 300 KB;
+        CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+
+        CHECK_Z(ZSTD_decompress(decodedBuffer, CNBuffSize, dst, out.pos));
+
+        ZSTD_freeCCtx(cctx);
+        ZSTD_freeDCtx(dctx);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3i : testing ldm dictionary gets invalidated : ", testNb++);
     {
         ZSTD_CCtx* const cctx = ZSTD_createCCtx();
@@ -1145,6 +1207,26 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3d : ldm conditionally enabled by default doesn't change cctx params: ", testNb++);
+    {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        ZSTD_outBuffer out = {NULL, 0, 0};
+        ZSTD_inBuffer in = {NULL, 0, 0};
+        int value;
+
+        /* Even if LDM will be enabled by default in the applied params (since wlog >= 27 and strategy >= btopt),
+         * we should not modify the actual parameter specified by the user within the CCtx
+         */
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 27));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, ZSTD_btopt));
+
+        CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue));
+        CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_enableLongDistanceMatching, &value));
+        CHECK_EQ(value, 0);
+
+        ZSTD_freeCCtx(cctx);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     /* this test is really too long, and should be made faster */
     DISPLAYLEVEL(3, "test%3d : overflow protection with large windowLog : ", testNb++);
     {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
@@ -1352,19 +1434,20 @@
 
     /* ZSTDMT simple MT compression test */
     DISPLAYLEVEL(3, "test%3i : create ZSTDMT CCtx : ", testNb++);
-    {   ZSTDMT_CCtx* const mtctx = ZSTDMT_createCCtx(2);
+    {   ZSTD_CCtx* const mtctx = ZSTD_createCCtx();
         if (mtctx==NULL) {
             DISPLAY("mtctx : not enough memory, aborting \n");
             testResult = 1;
             goto _end;
         }
+        CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_nbWorkers, 2) );
+        CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_compressionLevel, 1) );
         DISPLAYLEVEL(3, "OK \n");
 
         DISPLAYLEVEL(3, "test%3u : compress %u bytes with 2 threads : ", testNb++, (unsigned)CNBuffSize);
-        CHECK_VAR(cSize, ZSTDMT_compressCCtx(mtctx,
+        CHECK_VAR(cSize, ZSTD_compress2(mtctx,
                                 compressedBuffer, compressedBufferSize,
-                                CNBuffer, CNBuffSize,
-                                1) );
+                                CNBuffer, CNBuffSize) );
         DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
 
         DISPLAYLEVEL(3, "test%3i : decompressed size test : ", testNb++);
@@ -1388,14 +1471,12 @@
         DISPLAYLEVEL(3, "OK \n");
 
         DISPLAYLEVEL(3, "test%3i : compress -T2 with checksum : ", testNb++);
-        {   ZSTD_parameters params = ZSTD_getParams(1, CNBuffSize, 0);
-            params.fParams.checksumFlag = 1;
-            params.fParams.contentSizeFlag = 1;
-            CHECK_VAR(cSize, ZSTDMT_compress_advanced(mtctx,
-                                    compressedBuffer, compressedBufferSize,
-                                    CNBuffer, CNBuffSize,
-                                    NULL, params, 3 /*overlapRLog*/) );
-        }
+        CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_checksumFlag, 1) );
+        CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_contentSizeFlag, 1) );
+        CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_overlapLog, 3) );
+        CHECK_VAR(cSize, ZSTD_compress2(mtctx,
+                                compressedBuffer, compressedBufferSize,
+                                CNBuffer, CNBuffSize) );
         DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/CNBuffSize*100);
 
         DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize);
@@ -1403,7 +1484,7 @@
           if (r != CNBuffSize) goto _output_error; }
         DISPLAYLEVEL(3, "OK \n");
 
-        ZSTDMT_freeCCtx(mtctx);
+        ZSTD_freeCCtx(mtctx);
     }
 
     DISPLAYLEVEL(3, "test%3u : compress empty string and decompress with small window log : ", testNb++);
@@ -1837,6 +1918,7 @@
                                             cParams, ZSTD_defaultCMem);
             assert(cdict != NULL);
             DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
+            assert(ZSTD_getDictID_fromDict(dictBuffer, dictSize) == ZSTD_getDictID_fromCDict(cdict));
             cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
                                                  CNBuffer, CNBuffSize, cdict);
             ZSTD_freeCDict(cdict);
@@ -1971,34 +2053,53 @@
         }
         DISPLAYLEVEL(3, "OK \n");
 
-        DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
-        {
-            size_t ret;
-            MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
-            /* Either operation is allowed to fail, but one must fail. */
-            ret = ZSTD_CCtx_loadDictionary_advanced(
-                    cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto);
-            if (!ZSTD_isError(ret)) {
-                ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
-                if (!ZSTD_isError(ret)) goto _output_error;
-            }
-        }
-        DISPLAYLEVEL(3, "OK \n");
+        {   char* rawDictBuffer = (char*)malloc(dictSize);
+            assert(rawDictBuffer);
+            memcpy(rawDictBuffer, (char*)dictBuffer + 2, dictSize - 2);
+            memset(rawDictBuffer + dictSize - 2, 0, 2);
+            MEM_writeLE32((char*)rawDictBuffer, ZSTD_MAGIC_DICTIONARY);
 
-        DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
-        {
-            size_t ret;
-            MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
-            ret = ZSTD_CCtx_loadDictionary_advanced(
-                    cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
-            if (ZSTD_isError(ret)) goto _output_error;
-            ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
-            if (ZSTD_isError(ret)) goto _output_error;
+            DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
+            {
+                size_t ret;
+                /* Either operation is allowed to fail, but one must fail. */
+                ret = ZSTD_CCtx_loadDictionary_advanced(
+                        cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+                if (!ZSTD_isError(ret)) {
+                    ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
+                    if (!ZSTD_isError(ret)) goto _output_error;
+                }
+            }
+            DISPLAYLEVEL(3, "OK \n");
+
+            DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
+            {
+                size_t ret;
+                ret = ZSTD_CCtx_loadDictionary_advanced(
+                        cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
+                if (ZSTD_isError(ret)) goto _output_error;
+                ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
+                if (ZSTD_isError(ret)) goto _output_error;
+            }
+            DISPLAYLEVEL(3, "OK \n");
+
+            DISPLAYLEVEL(3, "test%3i : Testing non-attached CDict with ZSTD_dct_rawContent : ", testNb++);
+            {   size_t const srcSize = MIN(CNBuffSize, 100);
+                ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+                /* Force the dictionary to be reloaded in raw content mode */
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceLoad));
+                CHECK_Z(ZSTD_CCtx_loadDictionary_advanced(cctx, rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent));
+                cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize);
+                CHECK_Z(cSize);
+            }
+            DISPLAYLEVEL(3, "OK \n");
+
+            free(rawDictBuffer);
         }
-        DISPLAYLEVEL(3, "OK \n");
 
         DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++);
         {   ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1);
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
             CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
             CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 ));
             CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );
@@ -2608,9 +2709,9 @@
         DISPLAYLEVEL(3, "OK \n");
     }
 
-    DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++);
+    DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences decode from sequences test : ", testNb++);
     {
-        size_t srcSize = 100 KB;
+        size_t srcSize = 150 KB;
         BYTE* src = (BYTE*)CNBuffer;
         BYTE* decoded = (BYTE*)compressedBuffer;
 
@@ -2620,20 +2721,83 @@
 
         if (seqs == NULL) goto _output_error;
         assert(cctx != NULL);
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19);
+        /* Populate src with random data */
+        RDG_genBuffer(CNBuffer, srcSize, compressibility, 0.5, seed);
+
+        /* Test with block delimiters roundtrip */
+        seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
+        FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
+        assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
+
+        /* Test no block delimiters roundtrip */
+        seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
+        FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
+        assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
+
+        ZSTD_freeCCtx(cctx);
+        free(seqs);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+    
+    DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++);
+    {
+        size_t srcSize = 500 KB;
+        BYTE* src = (BYTE*)CNBuffer;
+        BYTE* dst = (BYTE*)compressedBuffer;
+        size_t dstSize = ZSTD_compressBound(srcSize);
+        size_t decompressSize = srcSize;
+        char* decompressBuffer = (char*)malloc(decompressSize);
+        size_t compressedSize;
+        size_t dSize;
+
+        ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
+        size_t seqsSize;
+
+        if (seqs == NULL) goto _output_error;
+        assert(cctx != NULL);
 
         /* Populate src with random data */
         RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
 
-        /* get the sequences */
-        seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize);
+        /* Test with block delimiters roundtrip */
+        seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
+        ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
+        compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
+        if (ZSTD_isError(compressedSize)) {
+            DISPLAY("Error in sequence compression with block delims\n");
+            goto _output_error;
+        }
+        dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
+        if (ZSTD_isError(dSize)) {
+            DISPLAY("Error in sequence compression roundtrip with block delims\n");
+            goto _output_error;
+        }
+        assert(!memcmp(decompressBuffer, src, srcSize));
 
-        /* "decode" and compare the sequences */
-        FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize);
-        assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
+        /* Test with no block delimiters roundtrip */
+        seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
+        ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
+        compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
+        if (ZSTD_isError(compressedSize)) {
+            DISPLAY("Error in sequence compression with no block delims\n");
+            goto _output_error;
+        }
+        dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
+        if (ZSTD_isError(dSize)) {
+            DISPLAY("Error in sequence compression roundtrip with no block delims\n");
+            goto _output_error;
+        }
+        assert(!memcmp(decompressBuffer, src, srcSize));
 
         ZSTD_freeCCtx(cctx);
+        free(decompressBuffer);
         free(seqs);
     }
+    DISPLAYLEVEL(3, "OK \n");
 
     /* Multiple blocks of zeros test */
     #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
@@ -2983,6 +3147,44 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "longtest%3i : testing ldm no regressions in size for opt parser : ", testNb++);
+    {
+        size_t cSizeLdm;
+        size_t cSizeNoLdm;
+        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+
+        RDG_genBuffer(CNBuffer, CNBuffSize, 0.5, 0.5, seed);
+
+        /* Enable checksum to verify round trip. */
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19));
+
+        /* Round trip once with ldm. */
+        cSizeLdm = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+        CHECK_Z(cSizeLdm);
+        CHECK_Z(ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSizeLdm));
+
+        ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 0));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19));
+
+        /* Round trip once without ldm. */
+        cSizeNoLdm = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+        CHECK_Z(cSizeNoLdm);
+        CHECK_Z(ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSizeNoLdm));
+
+        if (cSizeLdm > cSizeNoLdm) {
+            DISPLAY("Using long mode should not cause regressions for btopt+\n");
+            testResult = 1;
+            goto _end;
+        }
+
+        ZSTD_freeCCtx(cctx);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "longtest%3i : testing cdict compression with different attachment strategies : ", testNb++);
     {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
         ZSTD_DCtx* const dctx = ZSTD_createDCtx();
diff --git a/tests/libzstd_partial_builds.sh b/tests/libzstd_partial_builds.sh
index b1c1e3b..bee2dbd 100755
--- a/tests/libzstd_partial_builds.sh
+++ b/tests/libzstd_partial_builds.sh
@@ -22,7 +22,6 @@
 }
 
 # default compilation : all features enabled
-make clean > /dev/null
 $ECHO "testing default library compilation"
 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
 nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
diff --git a/tests/playTests.sh b/tests/playTests.sh
index fd602e7..3f53b6a 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -1103,7 +1103,7 @@
         datagen > tmp
         zstd --format=lz4 -f tmp
         lz4 -t -v tmp.lz4
-        lz4 -f tmp
+        lz4 -f -m tmp   # ensure result is sent into tmp.lz4, not stdout
         zstd -d -f -v tmp.lz4
         rm tmp*
     else
@@ -1206,6 +1206,7 @@
 roundTripTest -g517K "6 --single-thread --long"
 roundTripTest -g516K "16 --single-thread --long"
 roundTripTest -g518K "19 --single-thread --long"
+roundTripTest -g2M "22 --single-thread --ultra --long"
 fileRoundTripTest -g5M "3 --single-thread --long"
 
 
@@ -1215,6 +1216,7 @@
     println "\n===>  zstdmt round-trip tests "
     roundTripTest -g4M "1 -T0"
     roundTripTest -g8M "3 -T2"
+    roundTripTest -g8M "19 -T0 --long"
     roundTripTest -g8000K "2 --threads=2"
     fileRoundTripTest -g4M "19 -T2 -B1M"
 
@@ -1333,6 +1335,28 @@
 roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB"
 
 
+println "\n===>  zstd long distance matching with optimal parser compressed size tests "
+optCSize16=$(datagen -g511K | zstd -16 -c | wc -c)
+longCSize16=$(datagen -g511K | zstd -16 --long -c | wc -c)
+optCSize19=$(datagen -g2M | zstd -19 -c | wc -c)
+longCSize19=$(datagen -g2M | zstd -19 --long -c | wc -c)
+optCSize19wlog23=$(datagen -g2M | zstd -19 -c  --zstd=wlog=23 | wc -c)
+longCSize19wlog23=$(datagen -g2M | zstd -19 -c --long=23 | wc -c)
+optCSize22=$(datagen -g900K | zstd -22 --ultra -c | wc -c)
+longCSize22=$(datagen -g900K | zstd -22 --ultra --long -c | wc -c)
+if [ "$longCSize16" -gt "$optCSize16" ]; then
+    echo using --long on compression level 16 should not cause compressed size regression
+    exit 1
+elif [ "$longCSize19" -gt "$optCSize19" ]; then
+    echo using --long on compression level 19 should not cause compressed size regression
+    exit 1
+elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then
+    echo using --long on compression level 19 with wLog=23 should not cause compressed size regression
+    exit 1
+elif [ "$longCSize22" -gt "$optCSize22" ]; then
+    echo using --long on compression level 22 should not cause compressed size regression
+    exit 1
+fi
 
 
 if [ "$1" != "--test-large-data" ]; then
diff --git a/tests/regression/README.md b/tests/regression/README.md
new file mode 100644
index 0000000..bb36b1d
--- /dev/null
+++ b/tests/regression/README.md
@@ -0,0 +1,28 @@
+# Regression tests
+
+The regression tests run zstd in many scenarios and ensures that the size of the compressed results doesn't change. This helps us ensure that we don't accidentally regress zstd's compression ratio.
+
+These tests get run every night by CircleCI. If the job fails you can read the diff printed by the job to ensure the change isn't a regression. If all is well you can download the `results.csv` artifact and commit the new results. Or you can rebuild it yourself following the instructions below.
+
+## Rebuilding results.csv
+
+From the root of the zstd repo run:
+
+```
+# Build the zstd binary
+make clean
+make -j zstd
+
+# Build the regression test binary
+cd tests/regression
+make clean
+make -j test
+
+# Run the regression test
+./test --cache data-cache --zstd ../../zstd --output results.csv
+
+# Check results.csv to ensure the new results are okay
+git diff
+
+# Then submit the PR
+```
diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index 60d43b6..979b1d2 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -212,8 +212,8 @@
 silesia.tar,                        level 19,                           advanced one pass,                  4281605
 silesia.tar,                        no source size,                     advanced one pass,                  4861425
 silesia.tar,                        long distance mode,                 advanced one pass,                  4848098
-silesia.tar,                        multithreaded,                      advanced one pass,                  4860781
-silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4847398
+silesia.tar,                        multithreaded,                      advanced one pass,                  4861508
+silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4853186
 silesia.tar,                        small window log,                   advanced one pass,                  7101530
 silesia.tar,                        small hash log,                     advanced one pass,                  6587951
 silesia.tar,                        small chain log,                    advanced one pass,                  4943307
@@ -221,7 +221,7 @@
 silesia.tar,                        uncompressed literals,              advanced one pass,                  5129458
 silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4320927
 silesia.tar,                        huffman literals,                   advanced one pass,                  5347335
-silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5129777
+silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5129555
 github,                             level -5,                           advanced one pass,                  205285
 github,                             level -5 with dict,                 advanced one pass,                  46718
 github,                             level -3,                           advanced one pass,                  190643
@@ -304,8 +304,8 @@
 silesia.tar,                        level 19,                           advanced one pass small out,        4281605
 silesia.tar,                        no source size,                     advanced one pass small out,        4861425
 silesia.tar,                        long distance mode,                 advanced one pass small out,        4848098
-silesia.tar,                        multithreaded,                      advanced one pass small out,        4860781
-silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4847398
+silesia.tar,                        multithreaded,                      advanced one pass small out,        4861508
+silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4853186
 silesia.tar,                        small window log,                   advanced one pass small out,        7101530
 silesia.tar,                        small hash log,                     advanced one pass small out,        6587951
 silesia.tar,                        small chain log,                    advanced one pass small out,        4943307
@@ -313,7 +313,7 @@
 silesia.tar,                        uncompressed literals,              advanced one pass small out,        5129458
 silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4320927
 silesia.tar,                        huffman literals,                   advanced one pass small out,        5347335
-silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5129777
+silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5129555
 github,                             level -5,                           advanced one pass small out,        205285
 github,                             level -5 with dict,                 advanced one pass small out,        46718
 github,                             level -3,                           advanced one pass small out,        190643
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 79d5a82..1855b4d 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -31,7 +31,6 @@
 #define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */
 #include "zstd.h"         /* ZSTD_compressBound */
 #include "zstd_errors.h"  /* ZSTD_error_srcSize_wrong */
-#include "zstdmt_compress.h"
 #include "zdict.h"        /* ZDICT_trainFromBuffer */
 #include "datagen.h"      /* RDG_genBuffer */
 #define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
@@ -274,7 +273,7 @@
     U32 coreSeed = 0;  /* this name to conform with CHECK_Z macro display */
     ZSTD_CStream* zc = ZSTD_createCStream();
     ZSTD_DStream* zd = ZSTD_createDStream();
-    ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
+    ZSTD_CCtx* mtctx = ZSTD_createCCtx();
 
     ZSTD_inBuffer  inBuff, inBuff2;
     ZSTD_outBuffer outBuff;
@@ -283,12 +282,14 @@
     unsigned dictID = 0;
 
     /* Create compressible test buffer */
-    if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
+    if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd || !mtctx) {
         DISPLAY("Not enough memory, aborting \n");
         goto _output_error;
     }
     RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed);
 
+    CHECK_Z(ZSTD_CCtx_setParameter(mtctx, ZSTD_c_nbWorkers, 2));
+
     /* Create dictionary */
     DISPLAYLEVEL(3, "creating dictionary for unit tests \n");
     dictionary = FUZ_createDictionary(CNBuffer, CNBufferSize / 3, 16 KB, 48 KB);
@@ -753,6 +754,166 @@
         ZSTD_freeDCtx(dctx);
     }
 
+    /* Compression with ZSTD_c_stable{In,Out}Buffer */
+    {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        ZSTD_inBuffer in;
+        ZSTD_outBuffer out;
+        size_t cctxSize1;
+        size_t cctxSize2;
+        in.src = CNBuffer;
+        in.size = CNBufferSize;
+        out.dst = compressedBuffer;
+        out.size = compressedBufferSize;
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compress2() uses stable input and output : ", testNb++);
+        CHECK_Z(cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize));
+        CHECK(!(cSize < ZSTD_compressBound(CNBufferSize)), "cSize too large for test");
+        CHECK_Z(cSize = ZSTD_compress2(cctx, compressedBuffer, cSize + 4, CNBuffer, CNBufferSize));
+        CHECK_Z(cctxSize1 = ZSTD_sizeof_CCtx(cctx));
+        {   ZSTD_CCtx* cctx2 = ZSTD_createCCtx();
+            in.pos = out.pos = 0;
+            CHECK_Z(ZSTD_compressStream2(cctx2, &out, &in, ZSTD_e_continue));
+            CHECK(!(ZSTD_compressStream2(cctx2, &out, &in, ZSTD_e_end) == 0), "Not finished");
+            CHECK_Z(cctxSize2 = ZSTD_sizeof_CCtx(cctx2));
+            ZSTD_freeCCtx(cctx2);
+        }
+        {   ZSTD_CCtx* cctx3 = ZSTD_createCCtx();
+            ZSTD_parameters params = ZSTD_getParams(0, CNBufferSize, 0);
+            size_t cSize3;
+            params.fParams.checksumFlag = 1;
+            cSize3 = ZSTD_compress_advanced(cctx3, compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, NULL, 0, params);
+            CHECK_Z(cSize3);
+            CHECK(!(cSize == cSize3), "Must be same compressed size");
+            CHECK(!(cctxSize1 == ZSTD_sizeof_CCtx(cctx3)), "Must be same CCtx size");
+            ZSTD_freeCCtx(cctx3);
+        }
+        CHECK(!(cctxSize1 < cctxSize2), "Stable buffers means less allocated size");
+        CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compress2() doesn't modify user parameters : ", testNb++);
+        {
+            int stableInBuffer;
+            int stableOutBuffer;
+            CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_stableInBuffer, &stableInBuffer));
+            CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_stableOutBuffer, &stableOutBuffer));
+            CHECK(!(stableInBuffer == 0), "Modified");
+            CHECK(!(stableOutBuffer == 0), "Modified");
+            CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableInBuffer, 1));
+            CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableOutBuffer, 1));
+            CHECK_Z(cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize));
+            CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_stableInBuffer, &stableInBuffer));
+            CHECK_Z(ZSTD_CCtx_getParameter(cctx, ZSTD_c_stableOutBuffer, &stableOutBuffer));
+            CHECK(!(stableInBuffer == 1), "Modified");
+            CHECK(!(stableOutBuffer == 1), "Modified");
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() with ZSTD_c_stableInBuffer and ZSTD_c_stableOutBuffer : ", testNb++);
+        CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableInBuffer, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableOutBuffer, 1));
+        in.pos = out.pos = 0;
+        CHECK(!(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) == 0), "Not finished");
+        CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() ZSTD_c_stableInBuffer and ZSTD_c_stableOutBuffer allocated size : ", testNb++);
+        {   size_t const cctxSize = ZSTD_sizeof_CCtx(cctx);
+            CHECK(!(cctxSize1 == cctxSize), "Must be the same size as single pass");
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() with ZSTD_c_stableInBuffer only : ", testNb++);
+        CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableInBuffer, 1));
+        in.pos = out.pos = 0;
+        out.size = cSize / 4;
+        for (;;) {
+            size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
+            CHECK_Z(ret);
+            if (ret == 0)
+                break;
+            out.size = MIN(out.size + cSize / 4, compressedBufferSize);
+        }
+        CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() ZSTD_c_stableInBuffer modify buffer : ", testNb++);
+        in.pos = out.pos = 0;
+        out.size = cSize / 4;
+        CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
+        in.src = (char const*)in.src + in.pos;
+        in.size -= in.pos;
+        in.pos = 0;
+        {   size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
+            CHECK(!ZSTD_isError(ret), "Must error");
+            CHECK(!(ZSTD_getErrorCode(ret) == ZSTD_error_srcBuffer_wrong), "Must be this error");
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() ZSTD_c_stableInBuffer with continue and flush : ", testNb++);
+        in.src = CNBuffer;
+        in.size = CNBufferSize;
+        in.pos = 0;
+        out.pos = 0;
+        out.size = compressedBufferSize;
+        CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only));
+        {   size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue);
+            CHECK(!ZSTD_isError(ret), "Must error");
+            CHECK(!(ZSTD_getErrorCode(ret) == ZSTD_error_srcBuffer_wrong), "Must be this error");
+        }
+        CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only));
+        {   size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush);
+            CHECK(!ZSTD_isError(ret), "Must error");
+            CHECK(!(ZSTD_getErrorCode(ret) == ZSTD_error_srcBuffer_wrong), "Must be this error");
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() ZSTD_c_stableInBuffer allocated size : ", testNb++);
+        {   size_t const cctxSize = ZSTD_sizeof_CCtx(cctx);
+            CHECK(!(cctxSize1 < cctxSize), "Must be bigger than single-pass");
+            CHECK(!(cctxSize < cctxSize2), "Must be smaller than streaming");
+            cctxSize1 = cctxSize;
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() with ZSTD_c_stableOutBuffer only : ", testNb++);
+        CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+        CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableOutBuffer, 1));
+        in.pos = out.pos = 0;
+        in.size = MIN(CNBufferSize, 10);
+        CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
+        in.pos = 0;
+        in.size = CNBufferSize - in.size;
+        CHECK(!(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) == 0), "Not finished");
+        CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() ZSTD_c_stableOutBuffer modify buffer : ", testNb++);
+        in.pos = out.pos = 0;
+        in.size = CNBufferSize;
+        CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue));
+        in.pos = out.pos = 0;
+        {   size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue);
+            CHECK(!ZSTD_isError(ret), "Must have errored");
+            CHECK(!(ZSTD_getErrorCode(ret) == ZSTD_error_dstBuffer_wrong), "Must be this error");
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
+        DISPLAYLEVEL(3, "test%3i : ZSTD_compressStream2() ZSTD_c_stableOutBuffer allocated size : ", testNb++);
+        {   size_t const cctxSize = ZSTD_sizeof_CCtx(cctx);
+            CHECK(!(cctxSize1 < cctxSize), "Must be bigger than single-pass and stableInBuffer");
+            CHECK(!(cctxSize < cctxSize2), "Must be smaller than streaming");
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
+        ZSTD_freeCCtx(cctx);
+    }
+
     /* CDict scenario */
     DISPLAYLEVEL(3, "test%3i : digested dictionary : ", testNb++);
     {   ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1 /*byRef*/ );
@@ -1144,12 +1305,10 @@
 
     /* Basic multithreading compression test */
     DISPLAYLEVEL(3, "test%3i : compress %u bytes with multiple threads : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    {   ZSTD_parameters const params = ZSTD_getParams(1, 0, 0);
-        int jobSize;
-        CHECK_Z( ZSTDMT_getMTCtxParameter(mtctx, ZSTDMT_p_jobSize, &jobSize));
+    {   int jobSize;
+        CHECK_Z( ZSTD_CCtx_getParameter(mtctx, ZSTD_c_jobSize, &jobSize));
         CHECK(jobSize != 0, "job size non-zero");
-        CHECK_Z( ZSTDMT_initCStream_advanced(mtctx, CNBuffer, dictSize, params, CNBufferSize) );
-        CHECK_Z( ZSTDMT_getMTCtxParameter(mtctx, ZSTDMT_p_jobSize, &jobSize));
+        CHECK_Z( ZSTD_CCtx_getParameter(mtctx, ZSTD_c_jobSize, &jobSize));
         CHECK(jobSize != 0, "job size non-zero");
     }
     outBuff.dst = compressedBuffer;
@@ -1158,7 +1317,7 @@
     inBuff.src = CNBuffer;
     inBuff.size = CNBufferSize;
     inBuff.pos = 0;
-    {   size_t const compressResult = ZSTDMT_compressStream_generic(mtctx, &outBuff, &inBuff, ZSTD_e_end);
+    {   size_t const compressResult = ZSTD_compressStream2(mtctx, &outBuff, &inBuff, ZSTD_e_end);
         if (compressResult != 0) goto _output_error;  /* compression must be completed in a single round */
     }
     if (inBuff.pos != inBuff.size) goto _output_error;   /* entire input should be consumed */
@@ -1516,7 +1675,7 @@
     FUZ_freeDictionary(dictionary);
     ZSTD_freeCStream(zc);
     ZSTD_freeDStream(zd);
-    ZSTDMT_freeCCtx(mtctx);
+    ZSTD_freeCCtx(mtctx);
     free(CNBuffer);
     free(compressedBuffer);
     free(decodedBuffer);
@@ -1826,283 +1985,6 @@
     goto _cleanup;
 }
 
-
-/* fuzzing ZSTDMT_* interface */
-static int fuzzerTests_MT(U32 seed, int nbTests, int startTest,
-                          double compressibility, int bigTests)
-{
-    const U32 maxSrcLog = bigTests ? 24 : 22;
-    static const U32 maxSampleLog = 19;
-    size_t const srcBufferSize = (size_t)1<<maxSrcLog;
-    BYTE* cNoiseBuffer[5];
-    size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog);
-    BYTE*  const copyBuffer = (BYTE*)malloc (copyBufferSize);
-    size_t const cBufferSize   = ZSTD_compressBound(srcBufferSize);
-    BYTE*  const cBuffer = (BYTE*)malloc (cBufferSize);
-    size_t const dstBufferSize = srcBufferSize;
-    BYTE*  const dstBuffer = (BYTE*)malloc (dstBufferSize);
-    U32 result = 0;
-    int testNb = 0;
-    U32 coreSeed = seed;
-    int nbThreads = 2;
-    ZSTDMT_CCtx* zc = ZSTDMT_createCCtx(nbThreads);   /* will be reset sometimes */
-    ZSTD_DStream* zd = ZSTD_createDStream();   /* will be reset sometimes */
-    ZSTD_DStream* const zd_noise = ZSTD_createDStream();
-    UTIL_time_t const startClock = UTIL_getTime();
-    const BYTE* dict=NULL;   /* can keep same dict on 2 consecutive tests */
-    size_t dictSize = 0;
-    int const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel()-1 : g_cLevelMax_smallTests;
-    U32 const nbThreadsMax = bigTests ? 4 : 2;
-
-    /* allocations */
-    cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
-    CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] ||
-           !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd || !zd_noise ,
-           "Not enough memory, fuzzer tests cancelled");
-
-    /* Create initial samples */
-    RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed);    /* pure noise */
-    RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed);    /* barely compressible */
-    RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
-    RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed);    /* highly compressible */
-    RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed);    /* sparse content */
-    memset(copyBuffer, 0x65, copyBufferSize);                             /* make copyBuffer considered initialized */
-    ZSTD_initDStream_usingDict(zd, NULL, 0);  /* ensure at least one init */
-    DISPLAYLEVEL(6, "Creating initial context with %i threads \n", nbThreads);
-
-    /* catch up testNb */
-    for (testNb=1; testNb < startTest; testNb++)
-        FUZ_rand(&coreSeed);
-
-    /* test loop */
-    for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < g_clockTime) ; testNb++ ) {
-        U32 lseed;
-        const BYTE* srcBuffer;
-        size_t totalTestSize, totalGenSize, cSize;
-        XXH64_state_t xxhState;
-        U64 crcOrig;
-        size_t maxTestSize;
-
-        FUZ_rand(&coreSeed);
-        if (nbTests >= testNb) {
-            DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests);
-        } else {
-            DISPLAYUPDATE(2, "\r%6u         ", testNb);
-        }
-        lseed = coreSeed ^ prime32;
-
-        /* states full reset (deliberately not synchronized) */
-        /* some issues can only happen when reusing states */
-        if ((FUZ_rand(&lseed) & 0xFF) == 131) {
-            nbThreads = (FUZ_rand(&lseed) % nbThreadsMax) + 1;
-            DISPLAYLEVEL(5, "Creating new context with %u threads \n", nbThreads);
-            ZSTDMT_freeCCtx(zc);
-            zc = ZSTDMT_createCCtx(nbThreads);
-            CHECK(zc==NULL, "ZSTDMT_createCCtx allocation error")
-        }
-        if ((FUZ_rand(&lseed) & 0xFF) == 132) {
-            ZSTD_freeDStream(zd);
-            zd = ZSTD_createDStream();
-            CHECK(zd==NULL, "ZSTDMT_createCCtx allocation error")
-            ZSTD_initDStream_usingDict(zd, NULL, 0);  /* ensure at least one init */
-        }
-
-        /* srcBuffer selection [0-4] */
-        {   U32 buffNb = FUZ_rand(&lseed) & 0x7F;
-            if (buffNb & 7) buffNb=2;   /* most common : compressible (P) */
-            else {
-                buffNb >>= 3;
-                if (buffNb & 7) {
-                    const U32 tnb[2] = { 1, 3 };   /* barely/highly compressible */
-                    buffNb = tnb[buffNb >> 3];
-                } else {
-                    const U32 tnb[2] = { 0, 4 };   /* not compressible / sparse */
-                    buffNb = tnb[buffNb >> 3];
-            }   }
-            srcBuffer = cNoiseBuffer[buffNb];
-        }
-
-        /* compression init */
-        {   U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
-            U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog;
-            int const cLevelCandidate = ( FUZ_rand(&lseed)
-                            % (ZSTD_maxCLevel() - (MAX(testLog, dictLog) / 2)) )
-                            + 1;
-            int const cLevelThreadAdjusted = cLevelCandidate - (nbThreads * 2) + 2;  /* reduce cLevel when multiple threads to reduce memory consumption */
-            int const cLevelMin = MAX(cLevelThreadAdjusted, 1);  /* no negative cLevel yet */
-            int const cLevel = MIN(cLevelMin, cLevelMax);
-            maxTestSize = FUZ_rLogLength(&lseed, testLog);
-
-            if (FUZ_rand(&lseed)&1) {   /* simple init */
-                int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1;
-                DISPLAYLEVEL(5, "Init with compression level = %i \n", compressionLevel);
-                CHECK_Z( ZSTDMT_initCStream(zc, compressionLevel) );
-            } else {   /* advanced init */
-                /* random dictionary selection */
-                dictSize  = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0;
-                {   size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
-                    dict = srcBuffer + dictStart;
-                }
-                {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize;
-                    ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize);
-                    DISPLAYLEVEL(5, "Init with windowLog = %u, pledgedSrcSize = %u, dictSize = %u \n",
-                        params.cParams.windowLog, (unsigned)pledgedSrcSize, (unsigned)dictSize);
-                    params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
-                    params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
-                    params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1;
-                    DISPLAYLEVEL(5, "checksumFlag : %u \n", params.fParams.checksumFlag);
-                    CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapLog, FUZ_rand(&lseed) % 12) );
-                    CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_jobSize, FUZ_rand(&lseed) % (2*maxTestSize+1)) );   /* custom job size */
-                    CHECK_Z( ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) );
-        }   }   }
-
-        /* multi-segments compression test */
-        XXH64_reset(&xxhState, 0);
-        {   ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ;
-            U32 n;
-            for (n=0, cSize=0, totalTestSize=0 ; totalTestSize < maxTestSize ; n++) {
-                /* compress random chunks into randomly sized dst buffers */
-                {   size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-                    size_t const srcSize = MIN (maxTestSize-totalTestSize, randomSrcSize);
-                    size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize);
-                    size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                    size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                    ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 };
-                    outBuff.size = outBuff.pos + dstBuffSize;
-
-                    DISPLAYLEVEL(6, "Sending %u bytes to compress \n", (unsigned)srcSize);
-                    CHECK_Z( ZSTDMT_compressStream(zc, &outBuff, &inBuff) );
-                    DISPLAYLEVEL(6, "%u bytes read by ZSTDMT_compressStream \n", (unsigned)inBuff.pos);
-
-                    XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos);
-                    memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos);
-                    totalTestSize += inBuff.pos;
-                }
-
-                /* random flush operation, to mess around */
-                if ((FUZ_rand(&lseed) & 15) == 0) {
-                    size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                    size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize);
-                    size_t const previousPos = outBuff.pos;
-                    outBuff.size = outBuff.pos + adjustedDstSize;
-                    DISPLAYLEVEL(5, "Flushing into dst buffer of size %u \n", (unsigned)adjustedDstSize);
-                    CHECK_Z( ZSTDMT_flushStream(zc, &outBuff) );
-                    assert(outBuff.pos >= previousPos);
-                    DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_flushStream \n", (unsigned)(outBuff.pos-previousPos));
-            }   }
-
-            /* final frame epilogue */
-            {   size_t remainingToFlush = (size_t)(-1);
-                while (remainingToFlush) {
-                    size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                    size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize);
-                    size_t const previousPos = outBuff.pos;
-                    outBuff.size = outBuff.pos + adjustedDstSize;
-                    DISPLAYLEVEL(5, "Ending into dst buffer of size %u \n", (unsigned)adjustedDstSize);
-                    remainingToFlush = ZSTDMT_endStream(zc, &outBuff);
-                    CHECK (ZSTD_isError(remainingToFlush), "ZSTDMT_endStream error : %s", ZSTD_getErrorName(remainingToFlush));
-                    assert(outBuff.pos >= previousPos);
-                    DISPLAYLEVEL(6, "%u bytes flushed by ZSTDMT_endStream \n", (unsigned)(outBuff.pos-previousPos));
-                    DISPLAYLEVEL(5, "endStream : remainingToFlush : %u \n", (unsigned)remainingToFlush);
-            }   }
-            crcOrig = XXH64_digest(&xxhState);
-            cSize = outBuff.pos;
-            DISPLAYLEVEL(5, "Frame completed : %u bytes compressed into %u bytes \n",
-                            (unsigned)totalTestSize, (unsigned)cSize);
-        }
-
-        /* multi - fragments decompression test */
-        assert(totalTestSize < dstBufferSize);
-        memset(dstBuffer, 170, totalTestSize);   /* init dest area */
-        if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) {
-            CHECK_Z( ZSTD_resetDStream(zd) );
-        } else {
-            CHECK_Z( ZSTD_initDStream_usingDict(zd, dict, dictSize) );
-        }
-        {   size_t decompressionResult = 1;
-            ZSTD_inBuffer  inBuff = { cBuffer, cSize, 0 };
-            ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 };
-            for (totalGenSize = 0 ; decompressionResult ; ) {
-                size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
-                inBuff.size = inBuff.pos + readCSrcSize;
-                outBuff.size = outBuff.pos + dstBuffSize;
-                DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes into outBuff %u bytes \n",
-                                (unsigned)readCSrcSize, (unsigned)dstBuffSize);
-                decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff);
-                if (ZSTD_isError(decompressionResult)) {
-                    DISPLAY("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(decompressionResult));
-                    findDiff(copyBuffer, dstBuffer, totalTestSize);
-                }
-                CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult));
-                DISPLAYLEVEL(6, "total ingested (inBuff.pos) = %u and produced (outBuff.pos) = %u \n",
-                                (unsigned)inBuff.pos, (unsigned)outBuff.pos);
-            }
-            CHECK (outBuff.pos != totalTestSize,
-                    "decompressed data : wrong size (%u != %u)",
-                    (unsigned)outBuff.pos, (unsigned)totalTestSize );
-            CHECK (inBuff.pos != cSize,
-                    "compressed data should be fully read (%u != %u)",
-                    (unsigned)inBuff.pos, (unsigned)cSize );
-            {   U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0);
-                if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize);
-                CHECK (crcDest!=crcOrig, "decompressed data corrupted");
-        }   }
-
-        /*=====   noisy/erroneous src decompression test   =====*/
-
-        /* add some noise */
-        {   U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2;
-            U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) {
-                size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const noiseSize  = MIN((cSize/3) , randomNoiseSize);
-                size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize);
-                size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize);
-                memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize);
-        }   }
-
-        /* try decompression on noisy data */
-        CHECK_Z( ZSTD_initDStream(zd_noise) );   /* note : no dictionary */
-        {   ZSTD_inBuffer  inBuff = { cBuffer, cSize, 0 };
-            ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 };
-            while (outBuff.pos < dstBufferSize) {
-                size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize);
-                size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize);
-                outBuff.size = outBuff.pos + adjustedDstSize;
-                inBuff.size  = inBuff.pos + adjustedCSrcSize;
-                {   size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff);
-                    if (ZSTD_isError(decompressError)) break;   /* error correctly detected */
-                    /* No forward progress possible */
-                    if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break;
-    }   }   }   }
-    DISPLAY("\r%u fuzzer tests completed   \n", testNb);
-
-_cleanup:
-    ZSTDMT_freeCCtx(zc);
-    ZSTD_freeDStream(zd);
-    ZSTD_freeDStream(zd_noise);
-    free(cNoiseBuffer[0]);
-    free(cNoiseBuffer[1]);
-    free(cNoiseBuffer[2]);
-    free(cNoiseBuffer[3]);
-    free(cNoiseBuffer[4]);
-    free(copyBuffer);
-    free(cBuffer);
-    free(dstBuffer);
-    return result;
-
-_output_error:
-    result = 1;
-    goto _cleanup;
-}
-
 /** If useOpaqueAPI, sets param in cctxParams.
  *  Otherwise, sets the param in zc. */
 static size_t setCCtxParameter(ZSTD_CCtx* zc, ZSTD_CCtx_params* cctxParams,
@@ -2179,6 +2061,8 @@
         U32 resetAllowed = 1;
         size_t maxTestSize;
         ZSTD_parameters savedParams;
+        int isRefPrefix = 0;
+        U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
 
         /* init */
         if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests); }
@@ -2249,8 +2133,8 @@
                 dict = srcBuffer + dictStart;
                 if (!dictSize) dict=NULL;
             }
-            {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize;
-                ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, pledgedSrcSize, dictSize);
+            pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize;
+            {   ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, pledgedSrcSize, dictSize);
                 const U32 windowLogMax = bigTests ? 24 : 20;
                 const U32 searchLogMax = bigTests ? 15 : 13;
                 if (dictSize)
@@ -2306,6 +2190,8 @@
                 if (FUZ_rand(&lseed) & 1) {
                     DISPLAYLEVEL(5, "t%u: pledgedSrcSize : %u \n", testNb, (unsigned)pledgedSrcSize);
                     CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) );
+                } else {
+                    pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
                 }
 
                 /* multi-threading parameters. Only adjust occasionally for small tests. */
@@ -2322,7 +2208,11 @@
                     }
                 }
                 /* Enable rsyncable mode 1 in 4 times. */
-                setCCtxParameter(zc, cctxParams, ZSTD_c_rsyncable, (FUZ_rand(&lseed) % 4 == 0), opaqueAPI);
+                {
+                    int const rsyncable = (FUZ_rand(&lseed) % 4 == 0);
+                    DISPLAYLEVEL(5, "t%u: rsyncable : %d \n", testNb, rsyncable);
+                    setCCtxParameter(zc, cctxParams, ZSTD_c_rsyncable, rsyncable, opaqueAPI);
+                }
 
                 if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) );
 
@@ -2339,6 +2229,7 @@
                         CHECK_Z( ZSTD_CCtx_loadDictionary_byReference(zc, dict, dictSize) );
                     }
                 } else {
+                    isRefPrefix = 1;
                     CHECK_Z( ZSTD_CCtx_refPrefix(zc, dict, dictSize) );
                 }
         }   }
@@ -2346,45 +2237,96 @@
         CHECK_Z(getCCtxParams(zc, &savedParams));
 
         /* multi-segments compression test */
-        XXH64_reset(&xxhState, 0);
-        {   ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ;
-            for (cSize=0, totalTestSize=0 ; (totalTestSize < maxTestSize) ; ) {
-                /* compress random chunks into randomly sized dst buffers */
-                size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const srcSize = MIN(maxTestSize-totalTestSize, randomSrcSize);
-                size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize);
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
-                size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                ZSTD_EndDirective const flush = (FUZ_rand(&lseed) & 15) ? ZSTD_e_continue : ZSTD_e_flush;
-                ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 };
-                outBuff.size = outBuff.pos + dstBuffSize;
+        {   int iter;
+            int const startSeed = lseed;
+            XXH64_hash_t compressedCrcs[2];
+            for (iter = 0; iter < 2; ++iter, lseed = startSeed) {
+                ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ;
+                int const singlePass = (FUZ_rand(&lseed) & 3) == 0;
+                int nbWorkers;
 
-                CHECK_Z( ZSTD_compressStream2(zc, &outBuff, &inBuff, flush) );
-                DISPLAYLEVEL(6, "t%u: compress consumed %u bytes (total : %u) ; flush: %u (total : %u) \n",
-                    testNb, (unsigned)inBuff.pos, (unsigned)(totalTestSize + inBuff.pos), (unsigned)flush, (unsigned)outBuff.pos);
+                XXH64_reset(&xxhState, 0);
 
-                XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos);
-                memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos);
-                totalTestSize += inBuff.pos;
+                CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) );
+                if (isRefPrefix) {
+                    DISPLAYLEVEL(6, "t%u: Reloading prefix\n", testNb);
+                    /* Need to reload the prefix because it gets dropped after one compression */
+                    CHECK_Z( ZSTD_CCtx_refPrefix(zc, dict, dictSize) );
+                }
+
+                /* Adjust number of workers occassionally - result must be deterministic independent of nbWorkers */
+                CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers));
+                if (nbWorkers > 0 && (FUZ_rand(&lseed) & 7) == 0) {
+                    DISPLAYLEVEL(6, "t%u: Modify nbWorkers: %d -> %d \n", testNb, nbWorkers, nbWorkers + iter);
+                    CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, nbWorkers + iter));
+                }
+
+                if (singlePass) {
+                    ZSTD_inBuffer inBuff = { srcBuffer, maxTestSize, 0 };
+                    CHECK_Z(ZSTD_compressStream2(zc, &outBuff, &inBuff, ZSTD_e_end));
+                    DISPLAYLEVEL(6, "t%u: Single pass compression: consumed %u bytes ; produced %u bytes \n",
+                        testNb, (unsigned)inBuff.pos, (unsigned)outBuff.pos);
+                    CHECK(inBuff.pos != inBuff.size, "Input not consumed!");
+                    crcOrig = XXH64(srcBuffer, maxTestSize, 0);
+                    totalTestSize = maxTestSize;
+                } else {
+                    outBuff.size = 0;
+                    for (totalTestSize=0 ; (totalTestSize < maxTestSize) ; ) {
+                        /* compress random chunks into randomly sized dst buffers */
+                        size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
+                        size_t const srcSize = MIN(maxTestSize-totalTestSize, randomSrcSize);
+                        size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize);
+                        ZSTD_EndDirective const flush = (FUZ_rand(&lseed) & 15) ? ZSTD_e_continue : ZSTD_e_flush;
+                        ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 };
+                        int forwardProgress;
+                        do {
+                            size_t const ipos = inBuff.pos;
+                            size_t const opos = outBuff.pos;
+                            size_t ret;
+                            if (outBuff.pos == outBuff.size) {
+                                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
+                                size_t const dstBuffSize = MIN(cBufferSize - outBuff.pos, randomDstSize);
+                                outBuff.size = outBuff.pos + dstBuffSize;
+                            }
+                            CHECK_Z( ret = ZSTD_compressStream2(zc, &outBuff, &inBuff, flush) );
+                            DISPLAYLEVEL(6, "t%u: compress consumed %u bytes (total : %u) ; flush: %u (total : %u) \n",
+                                testNb, (unsigned)inBuff.pos, (unsigned)(totalTestSize + inBuff.pos), (unsigned)flush, (unsigned)outBuff.pos);
+
+                            /* We've completed the flush */
+                            if (flush == ZSTD_e_flush && ret == 0)
+                                break;
+
+                            /* Ensure maximal forward progress for determinism */
+                            forwardProgress = (inBuff.pos != ipos) || (outBuff.pos != opos);
+                        } while (forwardProgress);
+
+                        XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos);
+                        memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos);
+                        totalTestSize += inBuff.pos;
+                    }
+
+                    /* final frame epilogue */
+                    {   size_t remainingToFlush = 1;
+                        while (remainingToFlush) {
+                            ZSTD_inBuffer inBuff = { NULL, 0, 0 };
+                            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
+                            size_t const adjustedDstSize = MIN(cBufferSize - outBuff.pos, randomDstSize);
+                            outBuff.size = outBuff.pos + adjustedDstSize;
+                            DISPLAYLEVEL(6, "t%u: End-flush into dst buffer of size %u \n", testNb, (unsigned)adjustedDstSize);
+                            /* ZSTD_e_end guarantees maximal forward progress */
+                            remainingToFlush = ZSTD_compressStream2(zc, &outBuff, &inBuff, ZSTD_e_end);
+                            DISPLAYLEVEL(6, "t%u: Total flushed so far : %u bytes \n", testNb, (unsigned)outBuff.pos);
+                            CHECK( ZSTD_isError(remainingToFlush),
+                                "ZSTD_compressStream2 w/ ZSTD_e_end error : %s",
+                                ZSTD_getErrorName(remainingToFlush) );
+                    }   }
+                    crcOrig = XXH64_digest(&xxhState);
+                }
+                cSize = outBuff.pos;
+                compressedCrcs[iter] = XXH64(cBuffer, cSize, 0);
+                DISPLAYLEVEL(5, "Frame completed : %zu bytes \n", cSize);
             }
-
-            /* final frame epilogue */
-            {   size_t remainingToFlush = 1;
-                while (remainingToFlush) {
-                    ZSTD_inBuffer inBuff = { NULL, 0, 0 };
-                    size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
-                    size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize);
-                    outBuff.size = outBuff.pos + adjustedDstSize;
-                    DISPLAYLEVEL(6, "t%u: End-flush into dst buffer of size %u \n", testNb, (unsigned)adjustedDstSize);
-                    remainingToFlush = ZSTD_compressStream2(zc, &outBuff, &inBuff, ZSTD_e_end);
-                    DISPLAYLEVEL(6, "t%u: Total flushed so far : %u bytes \n", testNb, (unsigned)outBuff.pos);
-                    CHECK( ZSTD_isError(remainingToFlush),
-                          "ZSTD_compressStream2 w/ ZSTD_e_end error : %s",
-                           ZSTD_getErrorName(remainingToFlush) );
-            }   }
-            crcOrig = XXH64_digest(&xxhState);
-            cSize = outBuff.pos;
-            DISPLAYLEVEL(5, "Frame completed : %zu bytes \n", cSize);
+            CHECK(!(compressedCrcs[0] == compressedCrcs[1]), "Compression is not deterministic!");
         }
 
         CHECK(badParameters(zc, savedParams), "CCtx params are wrong");
@@ -2496,7 +2438,7 @@
     return 0;
 }
 
-typedef enum { simple_api, mt_api, advanced_api } e_api;
+typedef enum { simple_api, advanced_api } e_api;
 
 int main(int argc, const char** argv)
 {
@@ -2520,7 +2462,6 @@
         /* Parsing commands. Aggregated commands are allowed */
         if (argument[0]=='-') {
 
-            if (!strcmp(argument, "--mt")) { selected_api=mt_api; testNb += !testNb; continue; }
             if (!strcmp(argument, "--newapi")) { selected_api=advanced_api; testNb += !testNb; continue; }
             if (!strcmp(argument, "--no-big-tests")) { bigTests=0; continue; }
 
@@ -2633,9 +2574,6 @@
         case simple_api :
             result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100, bigTests);
             break;
-        case mt_api :
-            result = fuzzerTests_MT(seed, nbTests, testNb, ((double)proba) / 100, bigTests);
-            break;
         case advanced_api :
             result = fuzzerTests_newAPI(seed, nbTests, testNb, ((double)proba) / 100, bigTests);
             break;
diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile
index feed5b8..d74c41b 100644
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@@ -18,6 +18,8 @@
 PROGRAMS_PATH = ../programs
 TEST_FILE = ../doc/zstd_compression_format.md
 
+VPATH = $(PROGRAMS_PATH)
+
 CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH)       \
             -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH)
 STDFLAGS  = -std=c89 -pedantic -Wno-long-long -Wno-variadic-macros -Wc++-compat \
@@ -95,7 +97,7 @@
 fitblk_zstd: $(EXAMPLE_PATH)/fitblk.o zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY)
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
 
-zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o zstd_zlibwrapper.o $(PROGRAMS_PATH)/util.o $(PROGRAMS_PATH)/timefn.o $(PROGRAMS_PATH)/datagen.o $(ZSTDLIBRARY)
+zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o zstd_zlibwrapper.o util.o timefn.o datagen.o $(ZSTDLIBRARY)
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@