compression optimization opportunity
switch to single-pass mode directly into output buffer
when outputSize >= ZSTD_compressBound(inputSize).
Speed gains observed with fullbench (~+15% on level 1)
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index b2c7cfc..59761b1 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -72,6 +72,7 @@
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
# include <stdio.h>
/* recommended values for ZSTD_DEBUG display levels :
+ * 1 : no display, enables assert() only
* 2 : reserved for currently active debugging path
* 3 : events once per object lifetime (CCtx, CDict)
* 4 : events once per frame
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index f492d92..d4c029e 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2954,6 +2954,8 @@
const BYTE* const ip = (const BYTE*) src;
size_t fhSize = 0;
+ DEBUGLOG(5, "ZSTD_compressContinue_internal");
+ DEBUGLOG(5, "stage: %u", cctx->stage);
if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
if (frame && (cctx->stage==ZSTDcs_init)) {
@@ -3211,9 +3213,9 @@
ZSTD_parameters params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
- DEBUGLOG(5, "ZSTD_compressBegin_internal");
- DEBUGLOG(5, "dict ? %s", dict ? "dict" : cdict ? "cdict" : "none");
- DEBUGLOG(5, "dictMode : %u", (U32)dictMode);
+ DEBUGLOG(4, "ZSTD_compressBegin_internal");
+ DEBUGLOG(4, "dict ? %s", dict ? "dict" : cdict ? "cdict" : "none");
+ DEBUGLOG(4, "dictMode : %u", (U32)dictMode);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
@@ -3633,7 +3635,7 @@
const ZSTD_CDict* cdict,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
- DEBUGLOG(5, "ZSTD_resetCStream_internal");
+ DEBUGLOG(4, "ZSTD_resetCStream_internal");
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
@@ -3766,6 +3768,9 @@
return length;
}
+/** ZSTD_compressStream_generic():
+ * internal function for all *compressStream*() variants and *compress_generic()
+ * @return : hint size for next input */
static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
@@ -3780,7 +3785,7 @@
U32 someMoreWork = 1;
/* check expectations */
- DEBUGLOG(5, "ZSTD_compressStream_generic");
+ DEBUGLOG(5, "ZSTD_compressStream_generic, order %u", (U32)flushMode);
assert(zcs->inBuff != NULL);
assert(zcs->inBuffSize>0);
assert(zcs->outBuff!= NULL);
@@ -3796,6 +3801,20 @@
return ERROR(init_missing);
case zcss_load:
+ if ( (flushMode == ZSTD_e_end)
+ && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip))
+ && (zcs->inBuffPos == 0) ) {
+ /* shortcut to compression pass directly into output buffer */
+ size_t const cSize = ZSTD_compressEnd(zcs,
+ op, oend-op, ip, iend-ip);
+ DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize);
+ if (ZSTD_isError(cSize)) return cSize;
+ ip = iend;
+ op += cSize;
+ zcs->frameEnded = 1;
+ ZSTD_startNewCompression(zcs);
+ someMoreWork = 0; break;
+ }
/* complete inBuffer */
{ size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
size_t const loaded = ZSTD_limitCopy(
@@ -3922,8 +3941,8 @@
if (input->pos > input->size) return ERROR(GENERIC);
assert(cctx!=NULL);
+ /* transparent initialization stage */
if (cctx->streamStage == zcss_init) {
- /* transparent reset */
const void* const prefix = cctx->prefix;
size_t const prefixSize = cctx->prefixSize;
ZSTD_parameters params = cctx->requestedParams;
@@ -3944,6 +3963,7 @@
CHECK_F( ZSTD_resetCStream_internal(cctx, prefix, prefixSize, cctx->dictMode, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
} }
+ /* compression stage */
#ifdef ZSTD_MULTITHREAD
if (cctx->nbThreads > 1) {
size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
@@ -3956,7 +3976,6 @@
}
#endif
- DEBUGLOG(5, "calling ZSTD_compressStream_generic(%i,...)", endOp);
CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
DEBUGLOG(5, "completed ZSTD_compress_generic");
return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */