Merge pull request #2422 from felixhandte/doc-update-repcodes
Update Zstd Compression Format to Clarify Repcode Behavior
diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index 6e5c6fe..9a221d3 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -137,12 +137,33 @@
if (UNIX OR MINGW)
# pkg-config
set(PREFIX "${CMAKE_INSTALL_PREFIX}")
- set(LIBDIR "${CMAKE_INSTALL_LIBDIR}")
- set(INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}")
+ set(EXEC_PREFIX "\\$$\{prefix}")
+ set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}")
+ set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
set(VERSION "${zstd_VERSION}")
+
+ string(LENGTH "${PREFIX}" PREFIX_LENGTH)
+ string(SUBSTRING "${LIBDIR}" 0 ${PREFIX_LENGTH} LIBDIR_PREFIX)
+ string(SUBSTRING "${LIBDIR}" ${PREFIX_LENGTH} -1 LIBDIR_SUFFIX)
+ string(SUBSTRING "${INCLUDEDIR}" 0 ${PREFIX_LENGTH} INCLUDEDIR_PREFIX)
+ string(SUBSTRING "${INCLUDEDIR}" ${PREFIX_LENGTH} -1 INCLUDEDIR_SUFFIX)
+
+ if ("${INCLUDEDIR_PREFIX}" STREQUAL "${PREFIX}")
+ set(INCLUDEDIR_PREFIX "\\$$\{prefix}")
+ endif()
+ if ("${LIBDIR_PREFIX}" STREQUAL "${PREFIX}")
+ set(LIBDIR_PREFIX "\\$$\{exec_prefix}")
+ endif()
+
add_custom_target(libzstd.pc ALL
- ${CMAKE_COMMAND} -DIN="${LIBRARY_DIR}/libzstd.pc.in" -DOUT="libzstd.pc"
- -DPREFIX="${PREFIX}" -DLIBDIR="${LIBDIR}" -DINCLUDEDIR="${INCLUDEDIR}" -DVERSION="${VERSION}"
+ ${CMAKE_COMMAND}
+ -DIN="${LIBRARY_DIR}/libzstd.pc.in"
+ -DOUT="libzstd.pc"
+ -DPREFIX="${PREFIX}"
+ -DEXEC_PREFIX="${EXEC_PREFIX}"
+ -DINCLUDEDIR="${INCLUDEDIR_PREFIX}${INCLUDEDIR_SUFFIX}"
+ -DLIBDIR="${LIBDIR_PREFIX}${LIBDIR_SUFFIX}"
+ -DVERSION="${VERSION}"
-P "${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig.cmake"
COMMENT "Creating pkg-config file")
diff --git a/lib/Makefile b/lib/Makefile
index 7a736d3..d04caf6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -353,6 +353,8 @@
all: libzstd.pc
+HAS_EXPLICIT_EXEC_PREFIX := $(if $(or $(EXEC_PREFIX),$(exec_prefix)),1,)
+
DESTDIR ?=
# directory variables : GNU conventions prefer lowercase
# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
@@ -366,24 +368,17 @@
includedir ?= $(PREFIX)/include
INCLUDEDIR ?= $(includedir)
-PCLIBDIR ?= $(shell echo "$(LIBDIR)" | sed -n $(SED_ERE_OPT) -e "s@^$(EXEC_PREFIX)(/|$$)@@p")
-PCINCDIR ?= $(shell echo "$(INCLUDEDIR)" | sed -n $(SED_ERE_OPT) -e "s@^$(PREFIX)(/|$$)@@p")
+PCINCDIR := $(patsubst $(PREFIX)%,%,$(INCLUDEDIR))
+PCLIBDIR := $(patsubst $(EXEC_PREFIX)%,%,$(LIBDIR))
-ifeq (,$(PCLIBDIR))
-# Additional prefix check is required, since the empty string is technically a
-# valid PCLIBDIR
-ifeq (,$(shell echo "$(LIBDIR)" | sed -n $(SED_ERE_OPT) -e "\\@^$(EXEC_PREFIX)(/|$$)@ p"))
-$(error configured libdir ($(LIBDIR)) is outside of prefix ($(EXEC_PREFIX)), can't generate pkg-config file)
-endif
-endif
+# If we successfully stripped off a prefix, we'll add a reference to the
+# relevant pc variable.
+PCINCPREFIX := $(if $(findstring $(INCLUDEDIR),$(PCINCDIR)),,$${prefix})
+PCLIBPREFIX := $(if $(findstring $(LIBDIR),$(PCLIBDIR)),,$${exec_prefix})
-ifeq (,$(PCINCDIR))
-# Additional prefix check is required, since the empty string is technically a
-# valid PCINCDIR
-ifeq (,$(shell echo "$(INCLUDEDIR)" | sed -n $(SED_ERE_OPT) -e "\\@^$(PREFIX)(/|$$)@ p"))
-$(error configured includedir ($(INCLUDEDIR)) is outside of exec_prefix ($(PREFIX)), can't generate pkg-config file)
-endif
-endif
+# If no explicit EXEC_PREFIX was set by the caller, write it out as a reference
+# to PREFIX, rather than as a resolved value.
+PCEXEC_PREFIX := $(if $(HAS_EXPLICIT_EXEC_PREFIX),$(EXEC_PREFIX),$${prefix})
ifneq (,$(filter $(UNAME),FreeBSD NetBSD DragonFly))
PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
@@ -404,9 +399,11 @@
libzstd.pc:
libzstd.pc: libzstd.pc.in
@echo creating pkgconfig
- @sed $(SED_ERE_OPT) -e 's|@PREFIX@|$(PREFIX)|' \
- -e 's|@LIBDIR@|$(PCLIBDIR)|' \
- -e 's|@INCLUDEDIR@|$(PCINCDIR)|' \
+ @sed $(SED_ERE_OPT) \
+ -e 's|@PREFIX@|$(PREFIX)|' \
+ -e 's|@EXEC_PREFIX@|$(PCEXEC_PREFIX)|' \
+ -e 's|@INCLUDEDIR@|$(PCINCPREFIX)$(PCINCDIR)|' \
+ -e 's|@LIBDIR@|$(PCLIBPREFIX)$(PCLIBDIR)|' \
-e 's|@VERSION@|$(VERSION)|' \
$< >$@
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index f6faca7..eb7780c 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -109,7 +109,7 @@
ZSTD_CCtx* cctx;
if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
- ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
if (cctx == NULL) return NULL;
@@ -457,12 +457,12 @@
bounds.lowerBound = (int)ZSTD_bm_buffered;
bounds.upperBound = (int)ZSTD_bm_stable;
return bounds;
-
+
case ZSTD_c_blockDelimiters:
bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
return bounds;
-
+
case ZSTD_c_validateSequences:
bounds.lowerBound = 0;
bounds.upperBound = 1;
@@ -781,12 +781,12 @@
BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
return CCtxParams->outBufferMode;
-
+
case ZSTD_c_blockDelimiters:
BOUNDCHECK(ZSTD_c_blockDelimiters, value);
CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
return CCtxParams->blockDelimiters;
-
+
case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value;
@@ -3692,7 +3692,7 @@
return NULL;
}
- ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
assert(cdict != NULL);
@@ -3836,7 +3836,7 @@
{
ZSTD_cwksp ws;
- ZSTD_cwksp_init(&ws, workspace, workspaceSize);
+ ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
if (cdict == NULL) return NULL;
ZSTD_cwksp_move(&cdict->workspace, &ws);
@@ -4635,10 +4635,10 @@
/* Returns the number of bytes to move the current read position back by. Only non-zero
* if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
* went wrong.
- *
+ *
* This function will attempt to scan through blockSize bytes represented by the sequences
- * in inSeqs, storing any (partial) sequences.
- *
+ * in inSeqs, storing any (partial) sequences.
+ *
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
@@ -4659,7 +4659,7 @@
U32 matchLength;
U32 rawOffset;
U32 offCode;
-
+
if (cctx->cdict) {
dictSize = cctx->cdict->dictContentSize;
} else if (cctx->prefixDict.dict) {
@@ -4793,7 +4793,7 @@
size_t compressedSeqsSize;
size_t remaining = srcSize;
ZSTD_sequencePosition seqPos = {0, 0, 0};
-
+
BYTE const* ip = (BYTE const*)src;
BYTE* op = (BYTE*)dst;
ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
@@ -4879,7 +4879,7 @@
cSize += cBlockSize;
DEBUGLOG(4, "cSize running total: %zu", cSize);
-
+
if (lastBlock) {
break;
} else {
@@ -4890,7 +4890,7 @@
cctx->isFirstBlock = 0;
}
}
-
+
return cSize;
}
diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
index 5d07352..d65170b 100644
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@@ -45,6 +45,16 @@
} ZSTD_cwksp_alloc_phase_e;
/**
+ * Used to describe whether the workspace is statically allocated (and will not
+ * necessarily ever be freed), or if it's dynamically allocated and we can
+ * expect a well-formed caller to free this.
+ */
+typedef enum {
+ ZSTD_cwksp_dynamic_alloc,
+ ZSTD_cwksp_static_alloc
+} ZSTD_cwksp_static_alloc_e;
+
+/**
* Zstd fits all its internal datastructures into a single continuous buffer,
* so that it only needs to perform a single OS allocation (or so that a buffer
* can be provided to it and it can perform no allocations at all). This buffer
@@ -137,9 +147,10 @@
void* tableValidEnd;
void* allocStart;
- int allocFailed;
+ BYTE allocFailed;
int workspaceOversizedDuration;
ZSTD_cwksp_alloc_phase_e phase;
+ ZSTD_cwksp_static_alloc_e isStatic;
} ZSTD_cwksp;
/*-*************************************
@@ -256,7 +267,9 @@
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
- __asan_unpoison_memory_region(alloc, bytes);
+ if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+ __asan_unpoison_memory_region(alloc, bytes);
+ }
#endif
return alloc;
@@ -302,7 +315,9 @@
ws->tableEnd = end;
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
- __asan_unpoison_memory_region(alloc, bytes);
+ if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+ __asan_unpoison_memory_region(alloc, bytes);
+ }
#endif
return alloc;
@@ -341,7 +356,9 @@
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
- __asan_unpoison_memory_region(alloc, bytes);
+ if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+ __asan_unpoison_memory_region(alloc, bytes);
+ }
#endif
return alloc;
@@ -398,7 +415,11 @@
DEBUGLOG(4, "cwksp: clearing tables!");
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
- {
+ /* We don't do this when the workspace is statically allocated, because
+ * when that is the case, we have no capability to hook into the end of the
+ * workspace's lifecycle to unpoison the memory.
+ */
+ if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
@@ -427,7 +448,11 @@
#endif
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
- {
+ /* We don't do this when the workspace is statically allocated, because
+ * when that is the case, we have no capability to hook into the end of the
+ * workspace's lifecycle to unpoison the memory.
+ */
+ if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
@@ -447,7 +472,7 @@
* Any existing values in the workspace are ignored (the previously managed
* buffer, if present, must be separately freed).
*/
-MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
ws->workspace = start;
@@ -455,6 +480,7 @@
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
ws->phase = ZSTD_cwksp_alloc_objects;
+ ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws);
ws->workspaceOversizedDuration = 0;
ZSTD_cwksp_assert_internal_consistency(ws);
@@ -464,7 +490,7 @@
void* workspace = ZSTD_customMalloc(size, customMem);
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
- ZSTD_cwksp_init(ws, workspace, size);
+ ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
return 0;
}
diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in
index 8ec0235..8465c97 100644
--- a/lib/libzstd.pc.in
+++ b/lib/libzstd.pc.in
@@ -3,9 +3,9 @@
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
prefix=@PREFIX@
-exec_prefix=${prefix}
-includedir=${prefix}/@INCLUDEDIR@
-libdir=${exec_prefix}/@LIBDIR@
+exec_prefix=@EXEC_PREFIX@
+includedir=@INCLUDEDIR@
+libdir=@LIBDIR@
Name: zstd
Description: fast lossless compression algorithm library
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 4b4996d..439aebe 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -1240,7 +1240,7 @@
size_t nbFiles) {
size_t pos = 0;
size_t n;
- size_t totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles);
+ size_t totalSizeToLoad = (size_t)UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles);
size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad);
size_t* fileSizes = calloc(sizeof(size_t), nbFiles);
void* srcBuffer = NULL;
@@ -1325,7 +1325,7 @@
}
{ U64 const dictFileSize = UTIL_getFileSize(dictFileName);
assert(dictFileSize != UTIL_FILESIZE_UNKNOWN);
- ctx->dictSize = dictFileSize;
+ ctx->dictSize = (size_t)dictFileSize;
assert((U64)ctx->dictSize == dictFileSize); /* check overflow */
}
ctx->dictBuffer = malloc(ctx->dictSize);
@@ -1492,7 +1492,7 @@
if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */
mtAll[i].tableType = xxhashMap;
- mtl = (1ULL << memoTableLog);
+ mtl = ((size_t)1 << memoTableLog);
}
mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl);
@@ -1669,7 +1669,7 @@
}
/* Bench */
- bResult.cMem = ((U64)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx);
+ bResult.cMem = ((size_t)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx);
{ BMK_benchOutcome_t bOut;
bOut.tag = 0;
@@ -1859,8 +1859,8 @@
double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
- size_t W_CMemUsed = (1ULL << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params));
- size_t O_CMemUsed = (1ULL << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params));
+ size_t W_CMemUsed = ((size_t)1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params));
+ size_t O_CMemUsed = ((size_t)1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params));
double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);