Merge branch 'dev' of github.com:facebook/zstd into dev
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 35e4346..107e394 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -799,6 +799,38 @@
} ZSTD_format_e;
</b></pre><BR>
<pre><b>typedef enum {
+ </b>/* Note: this enum and the behavior it controls are effectively internal<b>
+ * implementation details of the compressor. They are expected to continue
+ * to evolve and should be considered only in the context of extremely
+ * advanced performance tuning.
+ *
+ * Zstd currently supports the use of a CDict in two ways:
+ *
+ * - The contents of the CDict can be copied into the working context. This
+ * means that the compression can search both the dictionary and input
+ * while operating on a single set of internal tables. This makes
+ * the compression faster per-byte of input. However, the initial copy of
+ * the CDict's tables incurs a fixed cost at the beginning of the
+ * compression. For small compressions (< 8 KB), that copy can dominate
+ * the cost of the compression.
+ *
+ * - The CDict's tables can be used in-place. In this model, compression is
+ * slower per input byte, because the compressor has to search two sets of
+ * tables. However, this model incurs no start-up cost (as long as the
+ * working context's tables can be reused). For small inputs, this can be
+ * faster than copying the CDict's tables.
+ *
+ * Zstd has a simple internal heuristic that selects which strategy to use
+ * at the beginning of a compression. However, if experimentation shows that
+ * Zstd is making poor choices, it is possible to override that choice with
+ * this enum.
+ */
+ ZSTD_dictDefaultAttach = 0, </b>/* Use the default heuristic. */<b>
+ ZSTD_dictForceAttach = 1, </b>/* Never copy the dictionary. */<b>
+ ZSTD_dictForceCopy = 2, </b>/* Always copy the dictionary. */<b>
+} ZSTD_dictAttachPref_e;
+</b></pre><BR>
+<pre><b>typedef enum {
</b>/* compression format */<b>
ZSTD_p_format = 10, </b>/* See ZSTD_format_e enum definition.<b>
* Cast selected format as unsigned for ZSTD_CCtx_setParameter() compatibility. */
@@ -911,29 +943,14 @@
ZSTD_p_forceMaxWindow=1100, </b>/* Force back-reference distances to remain < windowSize,<b>
* even when referencing into Dictionary content (default:0) */
- ZSTD_p_forceAttachDict, </b>/* ZSTD supports usage of a CDict in-place<b>
- * (avoiding having to copy the compression tables
- * from the CDict into the working context). Using
- * a CDict in this way saves an initial setup step,
- * but comes at the cost of more work per byte of
- * input. ZSTD has a simple internal heuristic that
- * guesses which strategy will be faster. You can
- * use this flag to override that guess.
+ ZSTD_p_forceAttachDict, </b>/* Controls whether the contents of a CDict are<b>
+ * used in place, or whether they are copied into
+ * the working context.
*
- * Note that the by-reference, in-place strategy is
- * only used when reusing a compression context
- * with compatible compression parameters. (If
- * incompatible / uninitialized, the working
- * context needs to be cleared anyways, which is
- * about as expensive as overwriting it with the
- * dictionary context, so there's no savings in
- * using the CDict by-ref.)
- *
- * Values greater than 0 force attaching the dict.
- * Values less than 0 force copying the dict.
- * 0 selects the default heuristic-guided behavior.
+ * Accepts values from the ZSTD_dictAttachPref_e
+ * enum. See the comments on that enum for an
+ * explanation of the feature.
*/
-
} ZSTD_cParameter;
</b></pre><BR>
<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 7721325..c6d72f5 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -412,11 +412,12 @@
CCtxParams->forceWindow = (value > 0);
return CCtxParams->forceWindow;
- case ZSTD_p_forceAttachDict :
- CCtxParams->attachDictPref = value ?
- (value > 0 ? ZSTD_dictForceAttach : ZSTD_dictForceCopy) :
- ZSTD_dictDefaultAttach;
+ case ZSTD_p_forceAttachDict : {
+ const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
+ CLAMPCHECK(pref, ZSTD_dictDefaultAttach, ZSTD_dictForceCopy);
+ CCtxParams->attachDictPref = pref;
return CCtxParams->attachDictPref;
+ }
case ZSTD_p_nbWorkers :
#ifndef ZSTD_MULTITHREAD
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index ec38e5d..ffbb53a 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -48,12 +48,6 @@
typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
-typedef enum {
- ZSTD_dictDefaultAttach = 0,
- ZSTD_dictForceAttach = 1,
- ZSTD_dictForceCopy = -1,
-} ZSTD_dictAttachPref_e;
-
typedef struct ZSTD_prefixDict_s {
const void* dict;
size_t dictSize;
diff --git a/lib/zstd.h b/lib/zstd.h
index 5a1d172..c7e9215 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -997,6 +997,38 @@
} ZSTD_format_e;
typedef enum {
+ /* Note: this enum and the behavior it controls are effectively internal
+ * implementation details of the compressor. They are expected to continue
+ * to evolve and should be considered only in the context of extremely
+ * advanced performance tuning.
+ *
+ * Zstd currently supports the use of a CDict in two ways:
+ *
+ * - The contents of the CDict can be copied into the working context. This
+ * means that the compression can search both the dictionary and input
+ * while operating on a single set of internal tables. This makes
+ * the compression faster per-byte of input. However, the initial copy of
+ * the CDict's tables incurs a fixed cost at the beginning of the
+ * compression. For small compressions (< 8 KB), that copy can dominate
+ * the cost of the compression.
+ *
+ * - The CDict's tables can be used in-place. In this model, compression is
+ * slower per input byte, because the compressor has to search two sets of
+ * tables. However, this model incurs no start-up cost (as long as the
+ * working context's tables can be reused). For small inputs, this can be
+ * faster than copying the CDict's tables.
+ *
+ * Zstd has a simple internal heuristic that selects which strategy to use
+ * at the beginning of a compression. However, if experimentation shows that
+ * Zstd is making poor choices, it is possible to override that choice with
+ * this enum.
+ */
+ ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
+ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
/* compression format */
ZSTD_p_format = 10, /* See ZSTD_format_e enum definition.
* Cast selected format as unsigned for ZSTD_CCtx_setParameter() compatibility. */
@@ -1109,29 +1141,14 @@
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
- ZSTD_p_forceAttachDict, /* ZSTD supports usage of a CDict in-place
- * (avoiding having to copy the compression tables
- * from the CDict into the working context). Using
- * a CDict in this way saves an initial setup step,
- * but comes at the cost of more work per byte of
- * input. ZSTD has a simple internal heuristic that
- * guesses which strategy will be faster. You can
- * use this flag to override that guess.
+ ZSTD_p_forceAttachDict, /* Controls whether the contents of a CDict are
+ * used in place, or whether they are copied into
+ * the working context.
*
- * Note that the by-reference, in-place strategy is
- * only used when reusing a compression context
- * with compatible compression parameters. (If
- * incompatible / uninitialized, the working
- * context needs to be cleared anyways, which is
- * about as expensive as overwriting it with the
- * dictionary context, so there's no savings in
- * using the CDict by-ref.)
- *
- * Values greater than 0 force attaching the dict.
- * Values less than 0 force copying the dict.
- * 0 selects the default heuristic-guided behavior.
+ * Accepts values from the ZSTD_dictAttachPref_e
+ * enum. See the comments on that enum for an
+ * explanation of the feature.
*/
-
} ZSTD_cParameter;
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index bf5eccf..75d0359 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -71,7 +71,7 @@
setRand(cctx, ZSTD_p_contentSizeFlag, 0, 1, state);
setRand(cctx, ZSTD_p_checksumFlag, 0, 1, state);
setRand(cctx, ZSTD_p_dictIDFlag, 0, 1, state);
- setRand(cctx, ZSTD_p_forceAttachDict, -2, 2, state);
+ setRand(cctx, ZSTD_p_forceAttachDict, 0, 2, state);
/* Select long distance matchig parameters */
setRand(cctx, ZSTD_p_enableLongDistanceMatching, 0, 1, state);
setRand(cctx, ZSTD_p_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state);