Fix unit tests to agree with new changes
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 434ad6e..263a450 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -4489,7 +4489,7 @@
typedef struct {
U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
- U64 posInSrc; /* Position in src stream */
+ U64 posInSrc; /* Number of bytes given by sequences provided so far */
} ZSTD_sequencePosition;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
@@ -4510,7 +4510,7 @@
}
#endif
-/* Returns offset code, given a raw offset and repcode array */
+/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32* const rep, U32 ll0) {
U32 offCode = rawOffset + ZSTD_REP_MOVE;
U32 repCode = 0;
@@ -4525,17 +4525,21 @@
repCode = 3;
}
if (repCode) {
+ /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
offCode = repCode - 1;
}
return offCode;
}
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ */
static size_t ZSTD_copySequencesToSeqStoreBlockDelim(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_CCtx* cctx) {
size_t idx = seqPos->idx;
BYTE const* ip = (BYTE const*)(src);
- BYTE const* iend = ip + blockSize;
+ const BYTE const* iend = ip + blockSize;
repcodes_t updatedRepcodes;
U32 dictSize;
U32 litLength;
@@ -4545,12 +4549,11 @@
if (cctx->cdict) {
dictSize = cctx->cdict->dictContentSize;
- } else if (cctx->prefixDict.dictSize) {
+ } else if (cctx->prefixDict.dict) {
dictSize = cctx->prefixDict.dictSize;
} else {
dictSize = 0;
}
-
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
litLength = inSeqs[idx].litLength;
@@ -4587,7 +4590,15 @@
}
/* Returns the number of bytes to move the current read position back by. Only non-zero
- * if we ended up splitting a sequence.
+ * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+ * went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes represented by the sequences
+ * in inSeqs, storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
*/
static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
@@ -4596,7 +4607,7 @@
size_t startPosInSequence = seqPos->posInSequence;
size_t endPosInSequence = seqPos->posInSequence + blockSize;
BYTE const* ip = (BYTE const*)(src);
- BYTE const* iend = ip + blockSize;
+ BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */
repcodes_t updatedRepcodes;
U32 bytesAdjustment = 0;
U32 finalMatchSplit = 0;
@@ -4608,11 +4619,11 @@
U32 repCode;
if (cctx->cdict) {
- dictSize = ZSTD_sizeof_CDict(cctx->cdict);
- } else if (cctx->prefixDict.dictSize) {
+ dictSize = cctx->cdict->dictContentSize;
+ } else if (cctx->prefixDict.dict) {
dictSize = cctx->prefixDict.dictSize;
- } else if (ZSTD_sizeof_localDict(cctx->localDict)) {
- dictSize = ZSTD_sizeof_localDict(cctx->localDict);
+ } else {
+ dictSize = 0;
}
DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %zu PIS: %u blockSize: %zu windowSize: %u", idx, startPosInSequence, blockSize, windowSize);
DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
@@ -4638,9 +4649,10 @@
startPosInSequence = 0;
idx++;
} else {
- /* This is the final sequence we're adding from inSeqs, and endPosInSequence
+ /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
does not reach the end of the match. So, we have to split the sequence */
- DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
+ DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
+ currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
if (endPosInSequence > litLength) {
litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
U32 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
@@ -4660,7 +4672,8 @@
} else {
/* Move the position in sequence backwards so that we don't split match, and break to store
* the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
- * should go.
+ * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
+ * would cause the first half of the match to be too small
*/
bytesAdjustment = endPosInSequence - currSeq.litLength;
endPosInSequence = currSeq.litLength;
@@ -4692,7 +4705,6 @@
assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
seqPos->idx = idx;
seqPos->posInSequence = endPosInSequence;
- /* Update repcodes */
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
iend -= bytesAdjustment;
@@ -4712,7 +4724,7 @@
/* Compress, block-by-block, all of the sequences given.
*
- * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error
+ * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
*/
static size_t ZSTD_compressSequences_internal(void* dst, size_t dstCapacity,
ZSTD_CCtx* cctx,
@@ -4849,16 +4861,13 @@
op += frameHeaderSize;
dstCapacity -= frameHeaderSize;
cSize += frameHeaderSize;
-
- /* Update checksum if requested */
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
XXH64_update(&cctx->xxhState, src, srcSize);
}
-
/* cSize includes block header size and compressed sequences size */
compressedBlocksSize = ZSTD_compressSequences_internal(op, dstCapacity,
- cctx, inSeqs, inSeqsSize,
- src, srcSize);
+ cctx, inSeqs, inSeqsSize,
+ src, srcSize);
FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
cSize += compressedBlocksSize;
dstCapacity -= compressedBlocksSize;
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index f40ca08..2e5d70e 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -2764,7 +2764,8 @@
/* Test with block delimiters roundtrip */
seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
- compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
+ compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in sequence compression with block delims\n");
goto _output_error;
@@ -2779,7 +2780,8 @@
/* Test with no block delimiters roundtrip */
seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
- compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
+ compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in sequence compression with no block delims\n");
goto _output_error;