Merge pull request #2405 from senhuang42/sequence_compression_minmatch_fix

Sequence Compression API - make validator use cctx minMatch instead of global MINMATCH
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 239af1d..92f0308 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -4524,7 +4524,7 @@
 
 /* Returns a ZSTD error code if sequence is not valid */
 static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
-                                    size_t posInSrc, U32 windowLog, size_t dictSize) {
+                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
     size_t offsetBound;
     U32 windowSize = 1 << windowLog;
     /* posInSrc represents the amount of data the the decoder would decode up to this point.
@@ -4534,7 +4534,7 @@
      */
     offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
     RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
-    RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
+    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
     return 0;
 }
 
@@ -4594,7 +4594,8 @@
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                cctx->appliedParams.cParams.windowLog, dictSize),
+                                                cctx->appliedParams.cParams.windowLog, dictSize,
+                                                cctx->appliedParams.cParams.minMatch),
                                                 "Sequence validation failed");
         }
         ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
@@ -4679,13 +4680,13 @@
                 U32 firstHalfMatchLength;
                 litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
                 firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
-                if (matchLength > blockSize && firstHalfMatchLength >= MINMATCH) {
+                if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
                     /* Only ever split the match if it is larger than the block size */
                     U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
-                    if (secondHalfMatchLength < MINMATCH) {
-                        /* Move the endPosInSequence backward so that it creates match of MINMATCH length */
-                        endPosInSequence -= MINMATCH - secondHalfMatchLength;
-                        bytesAdjustment = MINMATCH - secondHalfMatchLength;
+                    if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
+                        /* Move the endPosInSequence backward so that it creates match of minMatch length */
+                        endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
                         firstHalfMatchLength -= bytesAdjustment;
                     }
                     matchLength = firstHalfMatchLength;
@@ -4716,7 +4717,8 @@
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                   cctx->appliedParams.cParams.windowLog, dictSize),
+                                                   cctx->appliedParams.cParams.windowLog, dictSize,
+                                                   cctx->appliedParams.cParams.minMatch),
                                                    "Sequence validation failed");
         }
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
@@ -4884,6 +4886,7 @@
     DEBUGLOG(3, "ZSTD_compressSequences()");
     assert(cctx != NULL);
     FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+    RETURN_ERROR_IF(inSeqsSize > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
     /* Begin writing output, starting with frame header */
     frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
     op += frameHeaderSize;
diff --git a/lib/zstd.h b/lib/zstd.h
index 1b525bd..b99ce97 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1356,9 +1356,10 @@
  *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
  *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
  * 
- *    In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log.
- *    - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression.
- *    - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
  *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
  * 
  * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c
index e22c11c..e838687 100644
--- a/tests/fuzz/sequence_compression_api.c
+++ b/tests/fuzz/sequence_compression_api.c
@@ -199,6 +199,7 @@
     ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
     ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
     ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
     /* TODO: Add block delim mode fuzzing */
     ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
     if (hasDict) {