strengthened streaming bufferless compression
diff --git a/Makefile b/Makefile
index 4ee569e..9e7b70e 100644
--- a/Makefile
+++ b/Makefile
@@ -87,8 +87,8 @@
$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
armtest: clean
- $(MAKE) -C $(ZSTDDIR) -e all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
- $(MAKE) -C $(PRGDIR) -e CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
+ $(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
+ $(MAKE) -C $(PRGDIR) CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static"
usan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined"
diff --git a/NEWS b/NEWS
index 33b8dfc..ff276ac 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,6 @@
v0.4.3 :
new : zstd-frugal
+new : external dictionary API
v0.4.2 :
Generic minor improvements for small blocks
diff --git a/lib/zstd.h b/lib/zstd.h
index 15fc626..e4d4414 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -48,7 +48,7 @@
***************************************/
#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
#define ZSTD_VERSION_MINOR 4 /* for new (non-breaking) interface capabilities */
-#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
unsigned ZSTD_versionNumber (void);
diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c
index 6304e2b..6d2fd2c 100644
--- a/lib/zstd_compress.c
+++ b/lib/zstd_compress.c
@@ -488,7 +488,7 @@
BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
- BIT_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */
+ BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 42 */ /* 24 bits max in 32-bits mode */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
@@ -730,13 +730,30 @@
* Fast Scan
***************************************/
+#define FILLHASHSTEP 3
+static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
+{
+ U32* const hashTable = zc->hashTable;
+ const U32 hBits = zc->params.hashLog;
+ const BYTE* const base = zc->base;
+ const BYTE* ip = base + zc->nextToUpdate;
+ const BYTE* const iend = (const BYTE*) end;
+
+ while(ip <= iend)
+ {
+ hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
+ ip += FILLHASHSTEP;
+ }
+}
+
+
FORCE_INLINE
size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
const U32 mls)
{
- U32* hashTable = zc->hashTable;
+ U32* const hashTable = zc->hashTable;
const U32 hBits = zc->params.hashLog;
seqStore_t* seqStorePtr = &(zc->seqStore);
const BYTE* const base = zc->base;
@@ -1973,10 +1990,24 @@
void* dst, size_t dstSize,
const void* src, size_t srcSize)
{
+ U32 adressOverflow = 0;
const BYTE* const ip = (const BYTE*) src;
+ /* Check if blocks follow each other */
+ if (src != zc->nextSrc)
+ {
+ /* not contiguous */
+ size_t delta = zc->nextSrc - ip;
+ zc->lowLimit = zc->dictLimit;
+ zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+ zc->dictBase = zc->base;
+ if ((size_t)zc->base < delta) adressOverflow = zc->lowLimit;
+ zc->base -= delta;
+ zc->nextToUpdate = zc->dictLimit;
+ }
+
/* preemptive overflow correction */
- if ((zc->base > (const BYTE*)src) || (zc->lowLimit > (1<<30) ))
+ if (adressOverflow || (zc->lowLimit > (1<<30) ))
{
U32 correction = zc->lowLimit-1;
ZSTD_reduceIndex(zc, correction);
@@ -1988,17 +2019,6 @@
else zc->nextToUpdate -= correction;
}
- /* Check if blocks follow each other */
- if (src != zc->nextSrc)
- {
- /* not contiguous */
- zc->lowLimit = zc->dictLimit;
- zc->dictLimit = (U32)(zc->nextSrc - zc->base);
- zc->dictBase = zc->base;
- zc->base += ip - zc->nextSrc;
- zc->nextToUpdate = zc->dictLimit;
- }
-
/* input-dictionary overlap */
if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit))
{
@@ -2011,8 +2031,46 @@
return ZSTD_compress_generic (zc, dst, dstSize, src, srcSize);
}
+size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+ const BYTE* const ip = (const BYTE*) src;
+ const BYTE* const iend = ip + srcSize;
-/** ZSTD_compressBegin_advanced
+ /* input becomes current prefix */
+ zc->lowLimit = zc->dictLimit;
+ zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+ zc->dictBase = zc->base;
+ zc->base += ip - zc->nextSrc;
+ zc->nextToUpdate = zc->dictLimit;
+
+ zc->nextSrc = iend;
+ if (srcSize <= 8) return 0;
+
+ switch(zc->params.strategy)
+ {
+ case ZSTD_fast:
+ ZSTD_fillHashTable (zc, iend-8, zc->params.searchLength);
+ break;
+
+ case ZSTD_greedy:
+ case ZSTD_lazy:
+ case ZSTD_lazy2:
+ ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.searchLength);
+ break;
+
+ case ZSTD_btlazy2:
+ ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.searchLog, zc->params.searchLength);
+ break;
+
+ default:
+ return ERROR(GENERIC); /* strategy doesn't exist; impossible */
+ }
+
+ return 0;
+}
+
+
+/*! ZSTD_compressBegin_advanced
* Write frame header, according to params
* @return : nb of bytes written */
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx,
diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c
index 8940969..fe4ae97 100644
--- a/lib/zstd_decompress.c
+++ b/lib/zstd_decompress.c
@@ -127,10 +127,10 @@
U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
- void* previousDstEnd;
- void* base;
- void* vBase;
- void* dictEnd;
+ const void* previousDstEnd;
+ const void* base;
+ const void* vBase;
+ const void* dictEnd;
size_t expected;
size_t headerSize;
ZSTD_parameters params;
@@ -141,7 +141,7 @@
size_t litSize;
BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
BYTE headerBuffer[ZSTD_frameHeaderSize_max];
-}; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
{
@@ -505,7 +505,7 @@
FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit_8,
- BYTE* const base, BYTE* const vBase, BYTE* const dictEnd)
+ const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
{
static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
@@ -607,9 +607,9 @@
U32* DTableLL = dctx->LLTable;
U32* DTableML = dctx->MLTable;
U32* DTableOffb = dctx->OffTable;
- BYTE* const base = (BYTE*) (dctx->base);
- BYTE* const vBase = (BYTE*) (dctx->vBase);
- BYTE* const dictEnd = (BYTE*) (dctx->dictEnd);
+ const BYTE* const base = (const BYTE*) (dctx->base);
+ const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
/* Build Decoding Tables */
errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
@@ -691,7 +691,7 @@
/* init */
- ctx->base = ctx->vBase = ctx->dictEnd = dst;
+ ctx->vBase = ctx->base = ctx->dictEnd = dst;
/* Frame Header */
{
@@ -776,7 +776,7 @@
if ((dst > ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment into dictionary */
ctx->base = (char*)dst; /* temporary affectation, for vBase calculation */
ctx->dictEnd = ctx->previousDstEnd;
- ctx->vBase = (char*)dst - ((char*)(ctx->previousDstEnd) - (char*)(ctx->base));
+ ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
ctx->base = dst;
ctx->previousDstEnd = dst;
}
@@ -827,10 +827,9 @@
ctx->bType = bp.blockType;
ctx->stage = ZSTDds_decompressBlock;
}
-
return 0;
}
- case 3:
+ case ZSTDds_decompressBlock:
{
/* Decompress : block content */
size_t rSize;
@@ -862,3 +861,10 @@
}
+void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize)
+{
+ ctx->dictEnd = ctx->previousDstEnd;
+ ctx->vBase = (const char*)src - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+ ctx->base = src;
+ ctx->previousDstEnd = (const char*)src + srcSize;
+}
diff --git a/lib/zstd_static.h b/lib/zstd_static.h
index f698153..5c31562 100644
--- a/lib/zstd_static.h
+++ b/lib/zstd_static.h
@@ -104,6 +104,8 @@
****************************************/
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel);
size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, ZSTD_parameters params);
+size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+
size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);
@@ -118,6 +120,10 @@
Use ZSTD_compressBegin().
You may also prefer the advanced derivative ZSTD_compressBegin_advanced(), for finer parameter control.
+ It's then possible to add a dictionary with ZSTD_compressDictionary()
+ Note that dictionary presence is a "hidden" information,
+ the decoder needs to be aware that it is required for proper decoding, or decoding will fail.
+
Then, consume your input using ZSTD_compressContinue().
The interface is synchronous, so all input will be consumed.
You must ensure there is enough space in destination buffer to store compressed data under worst case scenario.
@@ -131,12 +137,15 @@
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTD_DCtx* ZSTD_createDCtx(void);
-size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx);
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx);
size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
+void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
+
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
/**
Streaming decompression, bufferless mode
@@ -146,15 +155,17 @@
First operation is to retrieve frame parameters, using ZSTD_getFrameParams().
This function doesn't consume its input. It needs enough input data to properly decode the frame header.
- The objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+ Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
Result : 0 when successful, it means the ZSTD_parameters structure has been filled.
>0 : means there is not enough data into src. Provides the expected size to successfully decode header.
errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header)
+ Then, you can optionally insert a dictionary. This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted.
+
Then it's possible to start decompression.
Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
- ZSTD_decompressContinue() requires this exact amount of bytes, or just fails.
+ ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index 793af7f..62f58f1 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -294,9 +294,10 @@
/* test loop */
for ( ; testNb <= nbTests; testNb++ )
{
- size_t sampleSize, sampleStart;
- size_t cSize, dSize, dSupSize;
- U32 sampleSizeLog, buffNb, cLevelMod;
+ size_t sampleSize, sampleStart, maxTestSize, totalTestSize;
+ size_t cSize, dSize, dSupSize, errorCode;
+ U32 sampleSizeLog, buffNb, cLevelMod, nbChunks, n;
+ XXH64_state_t crc64;
U64 crcOrig, crcDest;
int cLevel;
BYTE* sampleBuffer;
@@ -342,7 +343,6 @@
/* compression failure test : too small dest buffer */
if (cSize > 3)
{
- size_t errorCode;
const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */
const size_t tooSmallSize = cSize - missing;
static const U32 endMark = 0x4DC2B1A9;
@@ -365,7 +365,6 @@
/* truncated src decompression test */
{
- size_t errorCode;
const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */
const size_t tooSmallSize = cSize - missing;
void* cBufferTooSmall = malloc(tooSmallSize); /* valgrind will catch overflows */
@@ -379,7 +378,6 @@
/* too small dst decompression test */
if (sampleSize > 3)
{
- size_t errorCode;
const size_t missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */
const size_t tooSmallSize = sampleSize - missing;
static const BYTE token = 0xA9;
@@ -424,7 +422,6 @@
U32 noiseSrc = FUZ_rand(&lseed) % 5;
const U32 endMark = 0xA9B1C3D6;
U32 endCheck;
- size_t errorCode;
srcBuffer = cNoiseBuffer[noiseSrc];
memcpy(dstBuffer+sampleSize, &endMark, 4);
errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize);
@@ -435,6 +432,39 @@
CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow");
}
}
+
+ /* Multi - segments compression test */
+ XXH64_reset(&crc64, 0);
+ nbChunks = (FUZ_rand(&lseed) & 127) + 2;
+ sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
+ maxTestSize = (size_t)1 << sampleSizeLog;
+ maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1);
+ totalTestSize = 0;
+ cSize = ZSTD_compressBegin(ctx, cBuffer, cBufferSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
+ for (n=0; n<nbChunks; n++)
+ {
+ sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+ sampleSize = (size_t)1 << sampleSizeLog;
+ sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+ sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+
+ if (cBufferSize-cSize < ZSTD_compressBound(sampleSize))
+ /* avoid invalid dstBufferTooSmall */
+ break;
+
+ errorCode = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+sampleStart, sampleSize);
+ CHECK (ZSTD_isError(errorCode), "multi-segments compression error : %s", ZSTD_getErrorName(errorCode));
+ cSize += errorCode;
+
+ XXH64_update(&crc64, srcBuffer+sampleStart, sampleSize);
+ totalTestSize += sampleSize;
+
+ if (totalTestSize > maxTestSize) break;
+ }
+ errorCode = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize);
+ CHECK (ZSTD_isError(errorCode), "multi-segments epilogue error : %s", ZSTD_getErrorName(errorCode));
+ cSize += errorCode;
+ crcOrig = XXH64_digest(&crc64);
}
DISPLAY("\rAll fuzzer tests completed \n");