blob: c17645de750aaff52479393064c7860794d4c801 [file] [log] [blame]
Yann Collet4ded9e52016-08-30 10:04:33 -07001/**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
Yann Colletf3eca252015-10-22 15:31:46 +01009
Yann Colletf3eca252015-10-22 15:31:46 +010010
Yann Collet7d360282016-02-12 00:07:30 +010011/*-*************************************
Yann Colletb0edb7f2017-05-12 15:31:53 -070012* Tuning parameters
13***************************************/
Yann Collet6d4fef32017-05-17 18:36:15 -070014#ifndef ZSTD_CLEVEL_DEFAULT
15# define ZSTD_CLEVEL_DEFAULT 3
Yann Colletb0edb7f2017-05-12 15:31:53 -070016#endif
17
Yann Colletbf991502017-06-19 12:56:25 -070018
Yann Colletb0edb7f2017-05-12 15:31:53 -070019/*-*************************************
Yann Colletae7aa062016-02-03 02:46:46 +010020* Dependencies
Yann Colletf3eca252015-10-22 15:31:46 +010021***************************************/
Yann Colletd3b7f8d2016-06-04 19:47:02 +020022#include <string.h> /* memset */
Yann Collet14983e72015-11-11 21:38:21 +010023#include "mem.h"
Yann Collet5a0c8e22016-08-12 01:20:36 +020024#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
Yann Colletd0e2cd12016-06-05 00:58:01 +020025#include "fse.h"
Yann Collet130fe112016-06-05 00:42:28 +020026#define HUF_STATIC_LINKING_ONLY
27#include "huf.h"
Yann Colletd3b7f8d2016-06-04 19:47:02 +020028#include "zstd_internal.h" /* includes zstd.h */
Yann Colletc4a5a212017-06-01 17:56:14 -070029#include "zstdmt_compress.h"
Yann Collet31533ba2017-04-27 00:29:04 -070030
31
32/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +010033* Constants
Yann Colletf3eca252015-10-22 15:31:46 +010034***************************************/
Yann Colletbb604482016-03-19 15:18:42 +010035static const U32 g_searchStrength = 8; /* control skip over incompressible data */
Yann Collet731ef162016-07-27 21:05:12 +020036#define HASH_READ_SIZE 8
37typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
Yann Colletf3eca252015-10-22 15:31:46 +010038
Yann Collet71ddeb62017-04-20 22:54:54 -070039/* entropy tables always have same size */
40static size_t const hufCTable_size = HUF_CTABLE_SIZE(255);
Yann Collete42afbc2017-04-26 11:39:35 -070041static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL);
42static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff);
43static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML);
Yann Collet72712032017-04-20 23:21:19 -070044static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE;
Yann Colleta34a39c2017-04-20 18:17:58 -070045
Yann Colletf3eca252015-10-22 15:31:46 +010046
Yann Collet7d360282016-02-12 00:07:30 +010047/*-*************************************
Yann Collet59d1f792016-01-23 19:28:41 +010048* Helper functions
49***************************************/
Yann Collet3f75d522017-03-31 17:11:38 -070050size_t ZSTD_compressBound(size_t srcSize) {
Yann Collet30c76982017-03-31 18:27:03 -070051 size_t const lowLimit = 256 KB;
52 size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0; /* from 64 to 0 */
Yann Collet3f75d522017-03-31 17:11:38 -070053 return srcSize + (srcSize >> 8) + margin;
54}
Yann Collet59d1f792016-01-23 19:28:41 +010055
56
Yann Collet7d360282016-02-12 00:07:30 +010057/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +010058* Sequence storage
Yann Colletf3eca252015-10-22 15:31:46 +010059***************************************/
Yann Collet14983e72015-11-11 21:38:21 +010060static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
61{
Yann Collet14983e72015-11-11 21:38:21 +010062 ssPtr->lit = ssPtr->litStart;
Yann Colletc0ce4f12016-07-30 00:55:13 +020063 ssPtr->sequences = ssPtr->sequencesStart;
Yann Collet5d393572016-04-07 17:19:00 +020064 ssPtr->longLengthID = 0;
Yann Collet14983e72015-11-11 21:38:21 +010065}
66
67
Yann Collet7d360282016-02-12 00:07:30 +010068/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +010069* Context memory management
70***************************************/
Yann Collet466f92e2017-06-20 16:25:29 -070071typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
Yann Collet0be6fd32017-05-08 16:08:01 -070072
Yann Collet18803372017-05-22 18:21:51 -070073struct ZSTD_CDict_s {
74 void* dictBuffer;
75 const void* dictContent;
76 size_t dictContentSize;
77 ZSTD_CCtx* refContext;
Yann Collet8c910d22017-06-03 01:15:02 -070078}; /* typedef'd to ZSTD_CDict within "zstd.h" */
Yann Colletf3eca252015-10-22 15:31:46 +010079
Yann Colletaca113f2016-12-23 22:25:03 +010080struct ZSTD_CCtx_s {
Yann Collet89db5e02015-11-13 11:27:46 +010081 const BYTE* nextSrc; /* next block here to continue on current prefix */
Yann Colleteeb8ba12015-10-22 16:55:40 +010082 const BYTE* base; /* All regular indexes relative to this position */
83 const BYTE* dictBase; /* extDict indexes relative to this position */
Yann Colletf3eca252015-10-22 15:31:46 +010084 U32 dictLimit; /* below that point, need extDict */
Yann Colleteeb8ba12015-10-22 16:55:40 +010085 U32 lowLimit; /* below that point, no more data */
Yann Colletf3eca252015-10-22 15:31:46 +010086 U32 nextToUpdate; /* index from which to continue dictionary update */
inikepcc52a972016-02-19 10:09:35 +010087 U32 nextToUpdate3; /* index from which to continue dictionary update */
inikep7adceef2016-03-23 15:53:38 +010088 U32 hashLog3; /* dispatch table : larger == faster, more memory */
Yann Colletbb002742017-01-25 16:25:38 -080089 U32 loadedDictEnd; /* index of end of dictionary */
90 U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
Yann Collet731ef162016-07-27 21:05:12 +020091 ZSTD_compressionStage_e stage;
Yann Collet4266c0a2016-06-14 01:49:25 +020092 U32 rep[ZSTD_REP_NUM];
Yann Colletb459aad2017-01-19 17:33:37 -080093 U32 repToConfirm[ZSTD_REP_NUM];
Yann Colletc46fb922016-05-29 05:01:04 +020094 U32 dictID;
Yann Colletb0edb7f2017-05-12 15:31:53 -070095 int compressionLevel;
Yann Collet1ad7c822017-05-22 17:06:04 -070096 ZSTD_parameters requestedParams;
97 ZSTD_parameters appliedParams;
Yann Collet712def92015-10-29 18:41:45 +010098 void* workSpace;
99 size_t workSpaceSize;
Yann Collet120230b2015-12-02 14:00:45 +0100100 size_t blockSize;
Yann Colleta0ba8492017-06-16 13:29:17 -0700101 U64 pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
Yann Collet20d5e032017-04-11 18:34:02 -0700102 U64 consumedSrcSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +0200103 XXH64_state_t xxhState;
inikep28669512016-06-02 13:04:18 +0200104 ZSTD_customMem customMem;
Yann Colletc7fe2622017-05-23 13:16:00 -0700105 size_t staticSize;
Yann Colletecd651b2016-01-07 15:35:18 +0100106
Yann Collet712def92015-10-29 18:41:45 +0100107 seqStore_t seqStore; /* sequences storage ptrs */
Yann Collet083fcc82015-10-25 14:06:35 +0100108 U32* hashTable;
inikepcc52a972016-02-19 10:09:35 +0100109 U32* hashTable3;
Yann Collet8a57b922016-04-04 13:49:18 +0200110 U32* chainTable;
Yann Collet71ddeb62017-04-20 22:54:54 -0700111 HUF_repeat hufCTable_repeatMode;
112 HUF_CElt* hufCTable;
113 U32 fseCTables_ready;
Yann Collet71aaa322017-04-20 23:03:38 -0700114 FSE_CTable* offcodeCTable;
115 FSE_CTable* matchlengthCTable;
116 FSE_CTable* litlengthCTable;
Yann Collete42afbc2017-04-26 11:39:35 -0700117 unsigned* entropyScratchSpace;
Yann Collet0be6fd32017-05-08 16:08:01 -0700118
119 /* streaming */
Yann Collet0be6fd32017-05-08 16:08:01 -0700120 char* inBuff;
121 size_t inBuffSize;
122 size_t inToCompress;
123 size_t inBuffPos;
124 size_t inBuffTarget;
125 char* outBuff;
126 size_t outBuffSize;
127 size_t outBuffContentSize;
128 size_t outBuffFlushedSize;
129 ZSTD_cStreamStage streamStage;
130 U32 frameEnded;
Yann Colletc4a5a212017-06-01 17:56:14 -0700131
Yann Colletb7372932017-06-27 15:49:12 -0700132 /* Dictionary */
133 ZSTD_dictMode_e dictMode; /* select restricting dictionary to "rawContent" or "fullDict" only */
134 U32 dictContentByRef;
135 ZSTD_CDict* cdictLocal;
136 const ZSTD_CDict* cdict;
137 const void* prefix;
138 size_t prefixSize;
139
Yann Colletc4a5a212017-06-01 17:56:14 -0700140 /* Multi-threading */
Yann Colletc35e5352017-06-01 18:44:06 -0700141 U32 nbThreads;
Yann Colletc4a5a212017-06-01 17:56:14 -0700142 ZSTDMT_CCtx* mtctx;
Yann Colletf3eca252015-10-22 15:31:46 +0100143};
144
Yann Colletc4a5a212017-06-01 17:56:14 -0700145
Yann Collet5be2dd22015-11-11 13:43:58 +0100146ZSTD_CCtx* ZSTD_createCCtx(void)
Yann Colletf3eca252015-10-22 15:31:46 +0100147{
Yann Colletae728a42017-05-30 17:11:39 -0700148 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
inikep50e82c02016-05-23 15:49:09 +0200149}
150
151ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
152{
Yann Collet69c2cdb2016-07-14 16:52:45 +0200153 ZSTD_CCtx* cctx;
inikep50e82c02016-05-23 15:49:09 +0200154
Yann Colletae728a42017-05-30 17:11:39 -0700155 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
inikep107e2432016-05-23 16:24:52 +0200156
Yann Colletc4f46b92017-05-30 17:45:37 -0700157 cctx = (ZSTD_CCtx*) ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
Yann Collet69c2cdb2016-07-14 16:52:45 +0200158 if (!cctx) return NULL;
Yann Colletbb002742017-01-25 16:25:38 -0800159 cctx->customMem = customMem;
Yann Collet6d4fef32017-05-17 18:36:15 -0700160 cctx->compressionLevel = ZSTD_CLEVEL_DEFAULT;
Yann Colleta0ba8492017-06-16 13:29:17 -0700161 ZSTD_STATIC_ASSERT(zcss_init==0);
Yann Colletd3de3d52017-06-16 16:51:33 -0700162 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
Yann Collet69c2cdb2016-07-14 16:52:45 +0200163 return cctx;
Yann Colletf3eca252015-10-22 15:31:46 +0100164}
165
Yann Colletc7fe2622017-05-23 13:16:00 -0700166ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
167{
168 ZSTD_CCtx* cctx = (ZSTD_CCtx*) workspace;
169 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
170 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
Yann Collet7bd1a292017-06-21 11:50:33 -0700171 memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */
Yann Colletc7fe2622017-05-23 13:16:00 -0700172 cctx->staticSize = workspaceSize;
173 cctx->workSpace = (void*)(cctx+1);
174 cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
175
176 /* entropy space (never moves) */
Yann Collet7bd1a292017-06-21 11:50:33 -0700177 /* note : this code should be shared with resetCCtx, rather than copy/pasted */
Yann Colletc7fe2622017-05-23 13:16:00 -0700178 { void* ptr = cctx->workSpace;
179 cctx->hufCTable = (HUF_CElt*)ptr;
Yann Collet0fdc71c2017-05-24 17:41:41 -0700180 ptr = (char*)cctx->hufCTable + hufCTable_size;
Yann Colletc7fe2622017-05-23 13:16:00 -0700181 cctx->offcodeCTable = (FSE_CTable*) ptr;
182 ptr = (char*)ptr + offcodeCTable_size;
183 cctx->matchlengthCTable = (FSE_CTable*) ptr;
184 ptr = (char*)ptr + matchlengthCTable_size;
185 cctx->litlengthCTable = (FSE_CTable*) ptr;
186 ptr = (char*)ptr + litlengthCTable_size;
187 assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */
188 cctx->entropyScratchSpace = (unsigned*) ptr;
189 }
190
Yann Colletf3eca252015-10-22 15:31:46 +0100191 return cctx;
192}
193
Yann Collet5be2dd22015-11-11 13:43:58 +0100194size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
Yann Colletf3eca252015-10-22 15:31:46 +0100195{
inikep36403962016-06-03 16:36:50 +0200196 if (cctx==NULL) return 0; /* support free on NULL */
Yann Colletc4a5a212017-06-01 17:56:14 -0700197 if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
Yann Collet23b6e052016-08-28 21:05:43 -0700198 ZSTD_free(cctx->workSpace, cctx->customMem);
Yann Collet78553662017-05-08 17:15:00 -0700199 cctx->workSpace = NULL;
200 ZSTD_freeCDict(cctx->cdictLocal);
201 cctx->cdictLocal = NULL;
Yann Colletc4a5a212017-06-01 17:56:14 -0700202 ZSTDMT_freeCCtx(cctx->mtctx);
203 cctx->mtctx = NULL;
Yann Collet23b6e052016-08-28 21:05:43 -0700204 ZSTD_free(cctx, cctx->customMem);
Yann Collet982ffc72016-02-05 02:33:10 +0100205 return 0; /* reserved as a potential error code in the future */
Yann Collet083fcc82015-10-25 14:06:35 +0100206}
207
Yann Collet70e3b312016-08-23 01:18:06 +0200208size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
Yann Collet3ae543c2016-07-11 03:12:17 +0200209{
Yann Colletd7c65892016-09-15 02:50:27 +0200210 if (cctx==NULL) return 0; /* support sizeof on NULL */
Yann Collet7bd1a292017-06-21 11:50:33 -0700211 DEBUGLOG(5, "sizeof(*cctx) : %u", (U32)sizeof(*cctx));
212 DEBUGLOG(5, "workSpaceSize : %u", (U32)cctx->workSpaceSize);
213 DEBUGLOG(5, "streaming buffers : %u", (U32)(cctx->outBuffSize + cctx->inBuffSize));
214 DEBUGLOG(5, "inner MTCTX : %u", (U32)ZSTDMT_sizeof_CCtx(cctx->mtctx));
Yann Collet791d7442017-05-08 16:17:30 -0700215 return sizeof(*cctx) + cctx->workSpaceSize
216 + ZSTD_sizeof_CDict(cctx->cdictLocal)
Yann Colletc4a5a212017-06-01 17:56:14 -0700217 + cctx->outBuffSize + cctx->inBuffSize
218 + ZSTDMT_sizeof_CCtx(cctx->mtctx);
Yann Collet3ae543c2016-07-11 03:12:17 +0200219}
220
Yann Collet009d6042017-05-19 10:17:59 -0700221size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
222{
223 return ZSTD_sizeof_CCtx(zcs); /* same object */
224}
225
Yann Colletb0edb7f2017-05-12 15:31:53 -0700226/* private API call, for dictBuilder only */
227const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
228
Yann Collet1ad7c822017-05-22 17:06:04 -0700229static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx) { return cctx->appliedParams; }
Yann Colletb0edb7f2017-05-12 15:31:53 -0700230
Yann Colletef738c12017-05-12 13:53:46 -0700231/* older variant; will be deprecated */
Yann Colletbb002742017-01-25 16:25:38 -0800232size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
233{
234 switch(param)
235 {
Yann Collet06e76972017-01-25 16:39:03 -0800236 case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
Yann Collet7bd1a292017-06-21 11:50:33 -0700237 ZSTD_STATIC_ASSERT(ZSTD_dm_auto==0);
238 ZSTD_STATIC_ASSERT(ZSTD_dm_rawContent==1);
239 case ZSTD_p_forceRawDict : cctx->dictMode = (ZSTD_dictMode_e)(value>0); return 0;
Yann Colletbb002742017-01-25 16:25:38 -0800240 default: return ERROR(parameter_unknown);
241 }
242}
243
Yann Colletadd66f82017-05-12 15:59:48 -0700244
Yann Collet6d4fef32017-05-17 18:36:15 -0700245#define ZSTD_CLEVEL_CUSTOM 999
Yann Colletadd66f82017-05-12 15:59:48 -0700246static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
Yann Collet7d360282016-02-12 00:07:30 +0100247{
Yann Collet1ad7c822017-05-22 17:06:04 -0700248 if (cctx->compressionLevel==ZSTD_CLEVEL_CUSTOM) return;
249 cctx->requestedParams.cParams = ZSTD_getCParams(cctx->compressionLevel,
Yann Colleta0ba8492017-06-16 13:29:17 -0700250 cctx->pledgedSrcSizePlusOne-1, 0);
Yann Collet6d4fef32017-05-17 18:36:15 -0700251 cctx->compressionLevel = ZSTD_CLEVEL_CUSTOM;
Yann Collet7d360282016-02-12 00:07:30 +0100252}
253
Yann Colletc3bce242017-06-20 16:09:11 -0700254#define CLAMPCHECK(val,min,max) { \
255 if (((val)<(min)) | ((val)>(max))) { \
256 return ERROR(compressionParameter_outOfBound); \
257} }
258
Yann Colletb0edb7f2017-05-12 15:31:53 -0700259size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
Yann Collet95162342016-10-25 16:19:52 -0700260{
Yann Collet24de7b02017-05-22 13:05:45 -0700261 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700262
263 switch(param)
264 {
265 case ZSTD_p_compressionLevel :
Yann Colletcd2892f2017-06-01 09:44:54 -0700266 if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel(); /* cap max compression level */
267 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700268 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700269 cctx->compressionLevel = value;
270 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700271
272 case ZSTD_p_windowLog :
Yann Colletd7a3bff2017-06-19 11:53:01 -0700273 DEBUGLOG(5, "setting ZSTD_p_windowLog = %u (cdict:%u)",
274 value, (cctx->cdict!=NULL));
Yann Colletcd2892f2017-06-01 09:44:54 -0700275 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700276 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700277 CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
278 ZSTD_cLevelToCParams(cctx);
279 cctx->requestedParams.cParams.windowLog = value;
280 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700281
282 case ZSTD_p_hashLog :
Yann Colletcd2892f2017-06-01 09:44:54 -0700283 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700284 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700285 CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
286 ZSTD_cLevelToCParams(cctx);
287 cctx->requestedParams.cParams.hashLog = value;
288 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700289
290 case ZSTD_p_chainLog :
Yann Colletcd2892f2017-06-01 09:44:54 -0700291 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700292 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700293 CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
294 ZSTD_cLevelToCParams(cctx);
295 cctx->requestedParams.cParams.chainLog = value;
296 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700297
298 case ZSTD_p_searchLog :
Yann Colletcd2892f2017-06-01 09:44:54 -0700299 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700300 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700301 CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
302 ZSTD_cLevelToCParams(cctx);
303 cctx->requestedParams.cParams.searchLog = value;
304 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700305
Yann Collet6d4fef32017-05-17 18:36:15 -0700306 case ZSTD_p_minMatch :
Yann Colletcd2892f2017-06-01 09:44:54 -0700307 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700308 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700309 CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
310 ZSTD_cLevelToCParams(cctx);
311 cctx->requestedParams.cParams.searchLength = value;
312 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700313
314 case ZSTD_p_targetLength :
Yann Colletcd2892f2017-06-01 09:44:54 -0700315 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700316 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700317 CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
318 ZSTD_cLevelToCParams(cctx);
319 cctx->requestedParams.cParams.targetLength = value;
320 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700321
322 case ZSTD_p_compressionStrategy :
Yann Colletcd2892f2017-06-01 09:44:54 -0700323 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700324 if (cctx->cdict) return ERROR(stage_wrong);
Yann Colletcd2892f2017-06-01 09:44:54 -0700325 CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
326 ZSTD_cLevelToCParams(cctx);
327 cctx->requestedParams.cParams.strategy = (ZSTD_strategy)value;
328 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700329
Yann Colletcd2892f2017-06-01 09:44:54 -0700330 case ZSTD_p_contentSizeFlag :
Yann Collet2cf77552017-06-16 12:34:41 -0700331 DEBUGLOG(5, "set content size flag = %u", (value>0));
Yann Colletcd2892f2017-06-01 09:44:54 -0700332 /* Content size written in frame header _when known_ (default:1) */
333 cctx->requestedParams.fParams.contentSizeFlag = value>0;
334 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700335
Yann Colletcd2892f2017-06-01 09:44:54 -0700336 case ZSTD_p_checksumFlag :
337 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
338 cctx->requestedParams.fParams.checksumFlag = value>0;
339 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700340
Yann Collet1ad7c822017-05-22 17:06:04 -0700341 case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
Yann Colletcd2892f2017-06-01 09:44:54 -0700342 DEBUGLOG(5, "set dictIDFlag = %u", (value>0));
343 cctx->requestedParams.fParams.noDictIDFlag = (value==0);
344 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700345
Yann Colletfecc7212017-06-27 11:46:39 -0700346 /* Dictionary parameters */
Yann Collet7bd1a292017-06-21 11:50:33 -0700347 case ZSTD_p_dictMode :
Yann Colletfecc7212017-06-27 11:46:39 -0700348 if (cctx->cdict) return ERROR(stage_wrong); /* must be set before loading */
Yann Collet7bd1a292017-06-21 11:50:33 -0700349 /* restrict dictionary mode, to "rawContent" or "fullDict" only */
350 ZSTD_STATIC_ASSERT((U32)ZSTD_dm_fullDict > (U32)ZSTD_dm_rawContent);
351 if (value > (unsigned)ZSTD_dm_fullDict)
352 return ERROR(compressionParameter_outOfBound);
353 cctx->dictMode = (ZSTD_dictMode_e)value;
354 return 0;
355
Yann Colletfecc7212017-06-27 11:46:39 -0700356 case ZSTD_p_refDictContent :
357 if (cctx->cdict) return ERROR(stage_wrong); /* must be set before loading */
358 /* dictionary content will be referenced, instead of copied */
359 cctx->dictContentByRef = value>0;
360 return 0;
361
Yann Colletb0edb7f2017-05-12 15:31:53 -0700362 case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize,
363 * even when referencing into Dictionary content
364 * default : 0 when using a CDict, 1 when using a Prefix */
Yann Colletc35e5352017-06-01 18:44:06 -0700365 cctx->forceWindow = value>0;
366 cctx->loadedDictEnd = 0;
367 return 0;
368
369 case ZSTD_p_nbThreads:
370 if (value==0) return 0;
Yann Collet33873f02017-06-16 12:04:21 -0700371 DEBUGLOG(5, " setting nbThreads : %u", value);
Yann Colletc35e5352017-06-01 18:44:06 -0700372#ifndef ZSTD_MULTITHREAD
373 if (value > 1) return ERROR(compressionParameter_unsupported);
374#endif
375 if ((value>1) && (cctx->nbThreads != value)) {
Yann Collet05ae4b22017-06-15 18:03:34 -0700376 if (cctx->staticSize) /* MT not compatible with static alloc */
377 return ERROR(compressionParameter_unsupported);
Yann Colletc35e5352017-06-01 18:44:06 -0700378 ZSTDMT_freeCCtx(cctx->mtctx);
Yann Collet559ee822017-06-16 11:58:21 -0700379 cctx->nbThreads = 1;
Yann Collet88da8f12017-07-10 14:02:33 -0700380 cctx->mtctx = ZSTDMT_createCCtx_advanced(value, cctx->customMem);
Yann Colletc35e5352017-06-01 18:44:06 -0700381 if (cctx->mtctx == NULL) return ERROR(memory_allocation);
Yann Collet33873f02017-06-16 12:04:21 -0700382 }
383 cctx->nbThreads = value;
Yann Colletc35e5352017-06-01 18:44:06 -0700384 return 0;
385
Yann Collet559ee822017-06-16 11:58:21 -0700386 case ZSTD_p_jobSize:
Yann Colletc35e5352017-06-01 18:44:06 -0700387 if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
388 assert(cctx->mtctx != NULL);
389 return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_sectionSize, value);
390
Yann Collet559ee822017-06-16 11:58:21 -0700391 case ZSTD_p_overlapSizeLog:
Yann Collet33873f02017-06-16 12:04:21 -0700392 DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->nbThreads);
Yann Colletc35e5352017-06-01 18:44:06 -0700393 if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
394 assert(cctx->mtctx != NULL);
395 return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700396
Yann Colletb0edb7f2017-05-12 15:31:53 -0700397 default: return ERROR(parameter_unknown);
398 }
Yann Collet95162342016-10-25 16:19:52 -0700399}
400
Yann Colletb0edb7f2017-05-12 15:31:53 -0700401ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
Yann Collet083fcc82015-10-25 14:06:35 +0100402{
Yann Collet2cf77552017-06-16 12:34:41 -0700403 DEBUGLOG(5, " setting pledgedSrcSize to %u", (U32)pledgedSrcSize);
Yann Collet24de7b02017-05-22 13:05:45 -0700404 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
Yann Colleta0ba8492017-06-16 13:29:17 -0700405 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700406 return 0;
407}
Yann Collet59d70632015-11-04 12:05:27 +0100408
Yann Collet6d4fef32017-05-17 18:36:15 -0700409ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
410{
Yann Collet24de7b02017-05-22 13:05:45 -0700411 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
Yann Colletc7fe2622017-05-23 13:16:00 -0700412 if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */
Yann Colletd7a3bff2017-06-19 11:53:01 -0700413 DEBUGLOG(5, "load dictionary of size %u", (U32)dictSize);
Yann Collet6d4fef32017-05-17 18:36:15 -0700414 ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */
415 if (dict==NULL || dictSize==0) { /* no dictionary mode */
416 cctx->cdictLocal = NULL;
417 cctx->cdict = NULL;
418 } else {
Yann Collet8b21ec42017-05-19 19:46:15 -0700419 ZSTD_compressionParameters const cParams =
420 cctx->compressionLevel == ZSTD_CLEVEL_CUSTOM ?
Yann Collet1ad7c822017-05-22 17:06:04 -0700421 cctx->requestedParams.cParams :
Yann Collet8b21ec42017-05-19 19:46:15 -0700422 ZSTD_getCParams(cctx->compressionLevel, 0, dictSize);
Yann Collet6d4fef32017-05-17 18:36:15 -0700423 cctx->cdictLocal = ZSTD_createCDict_advanced(
424 dict, dictSize,
Yann Colletfecc7212017-06-27 11:46:39 -0700425 cctx->dictContentByRef, cctx->dictMode,
Yann Collet8b21ec42017-05-19 19:46:15 -0700426 cParams, cctx->customMem);
Yann Collet6d4fef32017-05-17 18:36:15 -0700427 cctx->cdict = cctx->cdictLocal;
428 if (cctx->cdictLocal == NULL)
429 return ERROR(memory_allocation);
430 }
431 return 0;
432}
433
Yann Colletbd18c882017-06-16 10:17:50 -0700434size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
Yann Colletb0edb7f2017-05-12 15:31:53 -0700435{
Yann Collet24de7b02017-05-22 13:05:45 -0700436 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
437 cctx->cdict = cdict;
Yann Colletb7372932017-06-27 15:49:12 -0700438 cctx->prefix = NULL; /* exclusive */
439 cctx->prefixSize = 0;
440 return 0;
441}
442
443size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
444{
445 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
446 cctx->cdict = NULL; /* prefix discards any prior cdict */
447 cctx->prefix = prefix;
448 cctx->prefixSize = prefixSize;
449 return 0;
Yann Collet083fcc82015-10-25 14:06:35 +0100450}
451
Yann Colletb26728c2017-06-16 14:00:46 -0700452static void ZSTD_startNewCompression(ZSTD_CCtx* cctx)
Yann Colletbd18c882017-06-16 10:17:50 -0700453{
454 cctx->streamStage = zcss_init;
Yann Colleta0ba8492017-06-16 13:29:17 -0700455 cctx->pledgedSrcSizePlusOne = 0;
Yann Colletb26728c2017-06-16 14:00:46 -0700456}
457
458/*! ZSTD_CCtx_reset() :
459 * Also dumps dictionary */
460void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
461{
462 ZSTD_startNewCompression(cctx);
Yann Colletbd18c882017-06-16 10:17:50 -0700463 cctx->cdict = NULL;
464}
465
Yann Collet381e66c2017-06-16 17:29:35 -0700466/** ZSTD_checkCParams() :
467 control CParam values remain within authorized range.
Yann Collet21588e32016-03-30 16:50:44 +0200468 @return : 0, or an error code if one value is beyond authorized range */
Yann Collet3b719252016-03-30 19:48:05 +0200469size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
Yann Collet21588e32016-03-30 16:50:44 +0200470{
Yann Collet15354142016-04-04 04:22:53 +0200471 CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
Yann Collet8a57b922016-04-04 13:49:18 +0200472 CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
Yann Collet3b719252016-03-30 19:48:05 +0200473 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
474 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
Yann Collet2e2e78d2017-03-29 16:02:47 -0700475 CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
Yann Collet3b719252016-03-30 19:48:05 +0200476 CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
Nick Terrelleeb31ee2017-03-09 11:44:25 -0800477 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) return ERROR(compressionParameter_unsupported);
Yann Collet21588e32016-03-30 16:50:44 +0200478 return 0;
479}
480
Yann Collet381e66c2017-06-16 17:29:35 -0700481/** ZSTD_clampCParams() :
482 * make CParam values within valid range.
483 * @return : valid CParams */
484static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams)
485{
486# define CLAMP(val,min,max) { \
487 if (val<min) val=min; \
488 else if (val>max) val=max; \
489 }
490 CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
491 CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
492 CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
493 CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
494 CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
495 CLAMP(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
496 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
497 return cParams;
498}
Yann Collet21588e32016-03-30 16:50:44 +0200499
Yann Colletc3a5c4b2016-12-12 00:47:30 +0100500/** ZSTD_cycleLog() :
501 * condition for correct operation : hashLog > 1 */
502static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
503{
504 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
505 return hashLog - btScale;
506}
507
Yann Collet381e66c2017-06-16 17:29:35 -0700508/** ZSTD_adjustCParams_internal() :
Yann Colletcf409a72016-09-26 16:41:05 +0200509 optimize `cPar` for a given input (`srcSize` and `dictSize`).
Yann Collet21588e32016-03-30 16:50:44 +0200510 mostly downsizing to reduce memory consumption and initialization.
511 Both `srcSize` and `dictSize` are optional (use 0 if unknown),
512 but if both are 0, no optimization can be done.
Yann Collet70d13012016-06-01 18:45:34 +0200513 Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
Yann Collet381e66c2017-06-16 17:29:35 -0700514ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
Yann Collet59d70632015-11-04 12:05:27 +0100515{
Yann Collet381e66c2017-06-16 17:29:35 -0700516 assert(ZSTD_checkCParams(cPar)==0);
Yann Collet70d13012016-06-01 18:45:34 +0200517 if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */
Yann Collet59d70632015-11-04 12:05:27 +0100518
Yann Collet70e45772016-03-19 18:08:32 +0100519 /* resize params, to use less memory when necessary */
Yann Colletdd6466a2016-03-30 20:06:26 +0200520 { U32 const minSrcSize = (srcSize==0) ? 500 : 0;
521 U64 const rSize = srcSize + dictSize + minSrcSize;
Yann Colletb59bf962016-04-04 14:53:16 +0200522 if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
Yann Colletcf409a72016-09-26 16:41:05 +0200523 U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
Yann Collet70d13012016-06-01 18:45:34 +0200524 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
Yann Collet21588e32016-03-30 16:50:44 +0200525 } }
Yann Collet70d13012016-06-01 18:45:34 +0200526 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
Yann Colletc3a5c4b2016-12-12 00:47:30 +0100527 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
528 if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
529 }
Yann Colletc6eea2b2016-03-19 17:18:00 +0100530
Yann Collet70d13012016-06-01 18:45:34 +0200531 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
Yann Collet70d13012016-06-01 18:45:34 +0200532
533 return cPar;
Yann Collet59d70632015-11-04 12:05:27 +0100534}
535
Yann Collet381e66c2017-06-16 17:29:35 -0700536ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
537{
538 cPar = ZSTD_clampCParams(cPar);
539 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
540}
541
Yann Collet59d70632015-11-04 12:05:27 +0100542
Yann Collet31af8292017-06-26 15:52:39 -0700543size_t ZSTD_estimateCCtxSize_advanced(ZSTD_compressionParameters cParams)
Yann Collete74215e2016-03-19 16:09:09 +0100544{
Yann Colletfa3671e2017-05-19 10:51:30 -0700545 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
Yann Collet731ef162016-07-27 21:05:12 +0200546 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
547 size_t const maxNbSeq = blockSize / divider;
548 size_t const tokenSpace = blockSize + 11*maxNbSeq;
Yann Collet3ae543c2016-07-11 03:12:17 +0200549
Yann Collet731ef162016-07-27 21:05:12 +0200550 size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
551 size_t const hSize = ((size_t)1) << cParams.hashLog;
552 U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
553 size_t const h3Size = ((size_t)1) << hashLog3;
Yann Collet71ddeb62017-04-20 22:54:54 -0700554 size_t const entropySpace = hufCTable_size + litlengthCTable_size
Yann Colleta4086452017-04-20 23:09:39 -0700555 + offcodeCTable_size + matchlengthCTable_size
Yann Collet72712032017-04-20 23:21:19 -0700556 + entropyScratchSpace_size;
Yann Collet731ef162016-07-27 21:05:12 +0200557 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
Yann Collet3ae543c2016-07-11 03:12:17 +0200558
Yann Colletfc514592017-05-08 17:07:59 -0700559 size_t const optBudget = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
Yann Collet3ae543c2016-07-11 03:12:17 +0200560 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
Nick Terrell5f2c7212017-05-10 16:49:58 -0700561 size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
Yann Colletfc514592017-05-08 17:07:59 -0700562 size_t const neededSpace = entropySpace + tableSpace + tokenSpace + optSpace;
Yann Collet3ae543c2016-07-11 03:12:17 +0200563
Yann Collet7bd1a292017-06-21 11:50:33 -0700564 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
565 DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
Yann Collet3ae543c2016-07-11 03:12:17 +0200566 return sizeof(ZSTD_CCtx) + neededSpace;
Yann Collet2e91dde2016-03-08 12:22:11 +0100567}
568
Yann Collet31af8292017-06-26 15:52:39 -0700569size_t ZSTD_estimateCCtxSize(int compressionLevel)
570{
571 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
572 return ZSTD_estimateCCtxSize_advanced(cParams);
573}
574
Yann Collet0c9a9152017-06-26 16:02:25 -0700575size_t ZSTD_estimateCStreamSize_advanced(ZSTD_compressionParameters cParams)
Yann Colleta7737f62016-09-06 09:44:59 +0200576{
Yann Collet31af8292017-06-26 15:52:39 -0700577 size_t const CCtxSize = ZSTD_estimateCCtxSize_advanced(cParams);
Yann Colletc7fe2622017-05-23 13:16:00 -0700578 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
579 size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
580 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
581 size_t const streamingSize = inBuffSize + outBuffSize;
582
583 return CCtxSize + streamingSize;
584}
585
Yann Collet0c9a9152017-06-26 16:02:25 -0700586size_t ZSTD_estimateCStreamSize(int compressionLevel) {
587 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
588 return ZSTD_estimateCStreamSize_advanced(cParams);
589}
590
Yann Colleta7737f62016-09-06 09:44:59 +0200591
Yann Collet009d6042017-05-19 10:17:59 -0700592static U32 ZSTD_equivalentParams(ZSTD_compressionParameters cParams1,
593 ZSTD_compressionParameters cParams2)
Yann Colleta7737f62016-09-06 09:44:59 +0200594{
Yann Colletfa3671e2017-05-19 10:51:30 -0700595 U32 bslog1 = MIN(cParams1.windowLog, ZSTD_BLOCKSIZELOG_MAX);
596 U32 bslog2 = MIN(cParams2.windowLog, ZSTD_BLOCKSIZELOG_MAX);
Yann Collet009d6042017-05-19 10:17:59 -0700597 return (bslog1 == bslog2) /* same block size */
598 & (cParams1.hashLog == cParams2.hashLog)
599 & (cParams1.chainLog == cParams2.chainLog)
600 & (cParams1.strategy == cParams2.strategy) /* opt parser space */
601 & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */
Yann Colleta7737f62016-09-06 09:44:59 +0200602}
603
604/*! ZSTD_continueCCtx() :
Yann Colletc08e6492017-06-19 18:25:35 -0700605 * reuse CCtx without reset (note : requires no dictionary) */
Yann Colletb26728c2017-06-16 14:00:46 -0700606static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 pledgedSrcSize)
Yann Colleta7737f62016-09-06 09:44:59 +0200607{
608 U32 const end = (U32)(cctx->nextSrc - cctx->base);
Yann Collet2cf77552017-06-16 12:34:41 -0700609 DEBUGLOG(5, "continue mode");
Yann Collet1ad7c822017-05-22 17:06:04 -0700610 cctx->appliedParams = params;
Yann Colletb26728c2017-06-16 14:00:46 -0700611 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
Yann Collet20d5e032017-04-11 18:34:02 -0700612 cctx->consumedSrcSize = 0;
Yann Colletb26728c2017-06-16 14:00:46 -0700613 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
Yann Collet2cf77552017-06-16 12:34:41 -0700614 cctx->appliedParams.fParams.contentSizeFlag = 0;
Yann Colletb26728c2017-06-16 14:00:46 -0700615 DEBUGLOG(5, "pledged content size : %u ; flag : %u",
616 (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
Yann Colleta7737f62016-09-06 09:44:59 +0200617 cctx->lowLimit = end;
618 cctx->dictLimit = end;
619 cctx->nextToUpdate = end+1;
620 cctx->stage = ZSTDcs_init;
621 cctx->dictID = 0;
622 cctx->loadedDictEnd = 0;
623 { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; }
Yann Colletb6249222016-09-06 09:54:22 +0200624 cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
625 XXH64_reset(&cctx->xxhState, 0);
Yann Colleta7737f62016-09-06 09:44:59 +0200626 return 0;
627}
628
Yann Colletb0739bc2017-05-22 17:45:15 -0700629typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
Yann Collet5a773612017-07-03 15:21:24 -0700630typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
Yann Colleta7737f62016-09-06 09:44:59 +0200631
Yann Collet30fb4992017-04-18 14:08:50 -0700632/*! ZSTD_resetCCtx_internal() :
Yann Collet5ac72b42017-05-23 11:18:24 -0700633 note : `params` are assumed fully validated at this stage */
634static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
Yann Colleta0ba8492017-06-16 13:29:17 -0700635 ZSTD_parameters params, U64 pledgedSrcSize,
Yann Collet5ac72b42017-05-23 11:18:24 -0700636 ZSTD_compResetPolicy_e const crp,
637 ZSTD_buffered_policy_e const zbuff)
Yann Colleta7737f62016-09-06 09:44:59 +0200638{
Yann Collet5ac72b42017-05-23 11:18:24 -0700639 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
Yann Collet0be6fd32017-05-08 16:08:01 -0700640
Yann Colletb0739bc2017-05-22 17:45:15 -0700641 if (crp == ZSTDcrp_continue) {
Yann Collet1ad7c822017-05-22 17:06:04 -0700642 if (ZSTD_equivalentParams(params.cParams, zc->appliedParams.cParams)) {
Yann Collet009d6042017-05-19 10:17:59 -0700643 DEBUGLOG(5, "ZSTD_equivalentParams()==1");
Yann Collet71ddeb62017-04-20 22:54:54 -0700644 zc->fseCTables_ready = 0;
645 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta0ba8492017-06-16 13:29:17 -0700646 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
Yann Colletb0739bc2017-05-22 17:45:15 -0700647 } }
inikep87d4f3d2016-03-02 15:56:24 +0100648
Yann Colletfa3671e2017-05-19 10:51:30 -0700649 { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
Yann Colleta7737f62016-09-06 09:44:59 +0200650 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
651 size_t const maxNbSeq = blockSize / divider;
652 size_t const tokenSpace = blockSize + 11*maxNbSeq;
Yann Collet5ac72b42017-05-23 11:18:24 -0700653 size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ?
654 0 : (1 << params.cParams.chainLog);
Yann Colleta7737f62016-09-06 09:44:59 +0200655 size_t const hSize = ((size_t)1) << params.cParams.hashLog;
Yann Collet5ac72b42017-05-23 11:18:24 -0700656 U32 const hashLog3 = (params.cParams.searchLength>3) ?
657 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
Yann Colleta7737f62016-09-06 09:44:59 +0200658 size_t const h3Size = ((size_t)1) << hashLog3;
659 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
Yann Collet7bd1a292017-06-21 11:50:33 -0700660 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
661 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200662 void* ptr;
Yann Collete74215e2016-03-19 16:09:09 +0100663
Yann Colleta7737f62016-09-06 09:44:59 +0200664 /* Check if workSpace is large enough, alloc a new one if needed */
Yann Collet71ddeb62017-04-20 22:54:54 -0700665 { size_t const entropySpace = hufCTable_size + litlengthCTable_size
Yann Colleta4086452017-04-20 23:09:39 -0700666 + offcodeCTable_size + matchlengthCTable_size
Yann Collet72712032017-04-20 23:21:19 -0700667 + entropyScratchSpace_size;
Yann Collet71ddeb62017-04-20 22:54:54 -0700668 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
Yann Collete6fa70a2017-04-20 17:28:31 -0700669 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
Yann Collet5ac72b42017-05-23 11:18:24 -0700670 size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
671 || (params.cParams.strategy == ZSTD_btultra)) ?
672 optPotentialSpace : 0;
Yann Collet7bd1a292017-06-21 11:50:33 -0700673 size_t const bufferSpace = buffInSize + buffOutSize;
Yann Collet5ac72b42017-05-23 11:18:24 -0700674 size_t const neededSpace = entropySpace + optSpace + tableSpace
675 + tokenSpace + bufferSpace;
Yann Colletc7fe2622017-05-23 13:16:00 -0700676
677 if (zc->workSpaceSize < neededSpace) { /* too small : resize /*/
Yann Collet0be6fd32017-05-08 16:08:01 -0700678 DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n",
Yann Colletc7fe2622017-05-23 13:16:00 -0700679 (unsigned)zc->workSpaceSize>>10,
680 (unsigned)neededSpace>>10);
681 /* static cctx : no resize, error out */
682 if (zc->staticSize) return ERROR(memory_allocation);
683
Yann Collet0181fef2017-04-06 01:25:26 -0700684 zc->workSpaceSize = 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200685 ZSTD_free(zc->workSpace, zc->customMem);
686 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
687 if (zc->workSpace == NULL) return ERROR(memory_allocation);
688 zc->workSpaceSize = neededSpace;
Yann Collet7bb60b12017-04-20 17:38:56 -0700689 ptr = zc->workSpace;
690
691 /* entropy space */
Yann Collet71ddeb62017-04-20 22:54:54 -0700692 zc->hufCTable = (HUF_CElt*)ptr;
693 ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */
Yann Collet71aaa322017-04-20 23:03:38 -0700694 zc->offcodeCTable = (FSE_CTable*) ptr;
695 ptr = (char*)ptr + offcodeCTable_size;
Yann Collet72712032017-04-20 23:21:19 -0700696 zc->matchlengthCTable = (FSE_CTable*) ptr;
Yann Collet71aaa322017-04-20 23:03:38 -0700697 ptr = (char*)ptr + matchlengthCTable_size;
Yann Collet72712032017-04-20 23:21:19 -0700698 zc->litlengthCTable = (FSE_CTable*) ptr;
699 ptr = (char*)ptr + litlengthCTable_size;
700 assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */
Yann Collete42afbc2017-04-26 11:39:35 -0700701 zc->entropyScratchSpace = (unsigned*) ptr;
Yann Colleta7737f62016-09-06 09:44:59 +0200702 } }
Yann Collet083fcc82015-10-25 14:06:35 +0100703
Yann Collete6fa70a2017-04-20 17:28:31 -0700704 /* init params */
Yann Collet1ad7c822017-05-22 17:06:04 -0700705 zc->appliedParams = params;
Yann Colleta0ba8492017-06-16 13:29:17 -0700706 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
Yann Collete6fa70a2017-04-20 17:28:31 -0700707 zc->consumedSrcSize = 0;
Yann Colleta0ba8492017-06-16 13:29:17 -0700708 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
Yann Colletcc9f9b72017-06-15 18:17:10 -0700709 zc->appliedParams.fParams.contentSizeFlag = 0;
Yann Colletb26728c2017-06-16 14:00:46 -0700710 DEBUGLOG(5, "pledged content size : %u ; flag : %u",
711 (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
Yann Colletcc9f9b72017-06-15 18:17:10 -0700712 zc->blockSize = blockSize;
Yann Collet70e8c382016-02-10 13:37:52 +0100713
Yann Collet083fcc82015-10-25 14:06:35 +0100714 XXH64_reset(&zc->xxhState, 0);
Yann Collete6fa70a2017-04-20 17:28:31 -0700715 zc->stage = ZSTDcs_init;
716 zc->dictID = 0;
717 zc->loadedDictEnd = 0;
Yann Collet71ddeb62017-04-20 22:54:54 -0700718 zc->fseCTables_ready = 0;
719 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta7737f62016-09-06 09:44:59 +0200720 zc->nextToUpdate = 1;
721 zc->nextSrc = NULL;
722 zc->base = NULL;
723 zc->dictBase = NULL;
724 zc->dictLimit = 0;
725 zc->lowLimit = 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200726 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
Yann Collete6fa70a2017-04-20 17:28:31 -0700727 zc->hashLog3 = hashLog3;
728 zc->seqStore.litLengthSum = 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200729
Yann Collet71aaa322017-04-20 23:03:38 -0700730 /* ensure entropy tables are close together at the beginning */
731 assert((void*)zc->hufCTable == zc->workSpace);
732 assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size);
733 assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size);
734 assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size);
Yann Collete42afbc2017-04-26 11:39:35 -0700735 assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size);
736 ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size;
Yann Collete6fa70a2017-04-20 17:28:31 -0700737
738 /* opt parser space */
Nick Terrelleeb31ee2017-03-09 11:44:25 -0800739 if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
Yann Collet009d6042017-05-19 10:17:59 -0700740 DEBUGLOG(5, "reserving optimal parser space");
Yann Collete6fa70a2017-04-20 17:28:31 -0700741 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
Yann Colleta7737f62016-09-06 09:44:59 +0200742 zc->seqStore.litFreq = (U32*)ptr;
743 zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
744 zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
745 zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
746 ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
747 zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
748 ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
749 zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
750 ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
Yann Colleta7737f62016-09-06 09:44:59 +0200751 }
Yann Collete6fa70a2017-04-20 17:28:31 -0700752
753 /* table Space */
754 if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
755 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
756 zc->hashTable = (U32*)(ptr);
757 zc->chainTable = zc->hashTable + hSize;
758 zc->hashTable3 = zc->chainTable + chainSize;
759 ptr = zc->hashTable3 + h3Size;
760
761 /* sequences storage */
Yann Colleta7737f62016-09-06 09:44:59 +0200762 zc->seqStore.sequencesStart = (seqDef*)ptr;
763 ptr = zc->seqStore.sequencesStart + maxNbSeq;
764 zc->seqStore.llCode = (BYTE*) ptr;
765 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
766 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
767 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
Yann Collet5ac72b42017-05-23 11:18:24 -0700768 ptr = zc->seqStore.litStart + blockSize;
769
770 /* buffers */
771 zc->inBuffSize = buffInSize;
772 zc->inBuff = (char*)ptr;
773 zc->outBuffSize = buffOutSize;
774 zc->outBuff = zc->inBuff + buffInSize;
Yann Colleta7737f62016-09-06 09:44:59 +0200775
Yann Colleta7737f62016-09-06 09:44:59 +0200776 return 0;
Yann Collet72d706a2016-03-23 20:44:12 +0100777 }
Yann Colletf3eca252015-10-22 15:31:46 +0100778}
779
Yann Collet32dfae62017-01-19 10:32:55 -0800780/* ZSTD_invalidateRepCodes() :
781 * ensures next compression will not use repcodes from previous block.
782 * Note : only works with regular variant;
783 * do not use with extDict variant ! */
784void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
785 int i;
786 for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
787}
Yann Collet083fcc82015-10-25 14:06:35 +0100788
Yann Collet7b51a292016-01-26 15:58:49 +0100789
Yann Colleta4cab802017-04-18 14:54:54 -0700790/*! ZSTD_copyCCtx_internal() :
791 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
792 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
793 * pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1
794 * @return : 0, or an error code */
Yann Collet1ad7c822017-05-22 17:06:04 -0700795static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
796 const ZSTD_CCtx* srcCCtx,
797 ZSTD_frameParameters fParams,
Yann Collet204b6b72017-06-21 15:13:00 -0700798 unsigned long long pledgedSrcSize,
799 ZSTD_buffered_policy_e zbuff)
Yann Collet7b51a292016-01-26 15:58:49 +0100800{
Yann Collet009d6042017-05-19 10:17:59 -0700801 DEBUGLOG(5, "ZSTD_copyCCtx_internal");
Yann Collet7b51a292016-01-26 15:58:49 +0100802 if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
Sean Purcell2db72492017-02-09 10:50:43 -0800803
inikep28669512016-06-02 13:04:18 +0200804 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
Yann Collet204b6b72017-06-21 15:13:00 -0700805 { ZSTD_parameters params = srcCCtx->appliedParams;
Yann Colleta4cab802017-04-18 14:54:54 -0700806 params.fParams = fParams;
Yann Collet5ac72b42017-05-23 11:18:24 -0700807 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
808 ZSTDcrp_noMemset, zbuff);
Sean Purcell2db72492017-02-09 10:50:43 -0800809 }
Yann Collet7b51a292016-01-26 15:58:49 +0100810
811 /* copy tables */
Yann Collet1ad7c822017-05-22 17:06:04 -0700812 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->appliedParams.cParams.chainLog);
813 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
Yann Collet731ef162016-07-27 21:05:12 +0200814 size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
815 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
Yann Collete6fa70a2017-04-20 17:28:31 -0700816 assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */
817 assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
818 memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */
Yann Colletc6eea2b2016-03-19 17:18:00 +0100819 }
Yann Collet7b51a292016-01-26 15:58:49 +0100820
Yann Colletc46fb922016-05-29 05:01:04 +0200821 /* copy dictionary offsets */
Yann Colletc6eea2b2016-03-19 17:18:00 +0100822 dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
823 dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
824 dstCCtx->nextSrc = srcCCtx->nextSrc;
825 dstCCtx->base = srcCCtx->base;
826 dstCCtx->dictBase = srcCCtx->dictBase;
827 dstCCtx->dictLimit = srcCCtx->dictLimit;
828 dstCCtx->lowLimit = srcCCtx->lowLimit;
829 dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
Yann Colletc46fb922016-05-29 05:01:04 +0200830 dstCCtx->dictID = srcCCtx->dictID;
Yann Collet7b51a292016-01-26 15:58:49 +0100831
Yann Colletfb810d62016-01-28 00:18:06 +0100832 /* copy entropy tables */
Yann Collet71ddeb62017-04-20 22:54:54 -0700833 dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready;
834 if (srcCCtx->fseCTables_ready) {
Yann Colleta34a39c2017-04-20 18:17:58 -0700835 memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size);
836 memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size);
837 memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size);
Yann Colletfb810d62016-01-28 00:18:06 +0100838 }
Yann Collet71ddeb62017-04-20 22:54:54 -0700839 dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode;
840 if (srcCCtx->hufCTable_repeatMode) {
841 memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size);
Nick Terrella4197772017-03-01 17:51:56 -0800842 }
Yann Collet7b51a292016-01-26 15:58:49 +0100843
844 return 0;
845}
846
Yann Colleta4cab802017-04-18 14:54:54 -0700847/*! ZSTD_copyCCtx() :
848 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
849 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
850 * pledgedSrcSize==0 means "unknown".
851* @return : 0, or an error code */
852size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
853{
854 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
Yann Collet204b6b72017-06-21 15:13:00 -0700855 ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
856 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
Yann Colleta4cab802017-04-18 14:54:54 -0700857 fParams.contentSizeFlag = pledgedSrcSize>0;
858
Yann Collet204b6b72017-06-21 15:13:00 -0700859 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize, zbuff);
Yann Colleta4cab802017-04-18 14:54:54 -0700860}
861
Yann Collet7b51a292016-01-26 15:58:49 +0100862
Yann Colletecabfe32016-03-20 16:20:06 +0100863/*! ZSTD_reduceTable() :
Yann Colleta4cab802017-04-18 14:54:54 -0700864 * reduce table indexes by `reducerValue` */
Yann Colletecabfe32016-03-20 16:20:06 +0100865static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
Yann Collet89db5e02015-11-13 11:27:46 +0100866{
Yann Colletecabfe32016-03-20 16:20:06 +0100867 U32 u;
868 for (u=0 ; u < size ; u++) {
869 if (table[u] < reducerValue) table[u] = 0;
870 else table[u] -= reducerValue;
Yann Collet89db5e02015-11-13 11:27:46 +0100871 }
872}
873
Yann Colletecabfe32016-03-20 16:20:06 +0100874/*! ZSTD_reduceIndex() :
875* rescale all indexes to avoid future overflow (indexes are U32) */
876static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
877{
Yann Collet1ad7c822017-05-22 17:06:04 -0700878 { U32 const hSize = 1 << zc->appliedParams.cParams.hashLog;
Yann Colletecabfe32016-03-20 16:20:06 +0100879 ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
880
Yann Collet1ad7c822017-05-22 17:06:04 -0700881 { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->appliedParams.cParams.chainLog);
Yann Collet8a57b922016-04-04 13:49:18 +0200882 ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
Yann Colletecabfe32016-03-20 16:20:06 +0100883
Yann Collet731ef162016-07-27 21:05:12 +0200884 { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
Yann Colletecabfe32016-03-20 16:20:06 +0100885 ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
886}
887
Yann Collet89db5e02015-11-13 11:27:46 +0100888
Yann Collet863ec402016-01-28 17:56:33 +0100889/*-*******************************************************
Yann Collet14983e72015-11-11 21:38:21 +0100890* Block entropic compression
891*********************************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100892
Przemyslaw Skibinski3ee94a72016-10-24 15:58:07 +0200893/* See doc/zstd_compression_format.md for detailed format description */
Yann Collet14983e72015-11-11 21:38:21 +0100894
Yann Colletd1b26842016-03-15 01:24:33 +0100895size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +0100896{
Yann Colletd1b26842016-03-15 01:24:33 +0100897 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
Yann Collet6fa05a22016-07-20 14:58:49 +0200898 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
899 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
Yann Collet14983e72015-11-11 21:38:21 +0100900 return ZSTD_blockHeaderSize+srcSize;
901}
902
903
Yann Colletd1b26842016-03-15 01:24:33 +0100904static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +0100905{
906 BYTE* const ostart = (BYTE* const)dst;
Yann Collet731ef162016-07-27 21:05:12 +0200907 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
Yann Collet14983e72015-11-11 21:38:21 +0100908
Yann Colletd1b26842016-03-15 01:24:33 +0100909 if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
Yann Collet14983e72015-11-11 21:38:21 +0100910
Yann Collet59d1f792016-01-23 19:28:41 +0100911 switch(flSize)
912 {
913 case 1: /* 2 - 1 - 5 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200914 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
Yann Collet59d1f792016-01-23 19:28:41 +0100915 break;
916 case 2: /* 2 - 2 - 12 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200917 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100918 break;
Yann Collet59d1f792016-01-23 19:28:41 +0100919 case 3: /* 2 - 2 - 20 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200920 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100921 break;
Yann Colletcd2892f2017-06-01 09:44:54 -0700922 default: /* not necessary : flSize is {1,2,3} */
923 assert(0);
Yann Collet59d1f792016-01-23 19:28:41 +0100924 }
925
926 memcpy(ostart + flSize, src, srcSize);
927 return srcSize + flSize;
Yann Collet14983e72015-11-11 21:38:21 +0100928}
929
Yann Colletd1b26842016-03-15 01:24:33 +0100930static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +0100931{
932 BYTE* const ostart = (BYTE* const)dst;
Yann Collet731ef162016-07-27 21:05:12 +0200933 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
Yann Collet14983e72015-11-11 21:38:21 +0100934
Yann Collet198e6aa2016-07-20 20:12:24 +0200935 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
Yann Collet59d1f792016-01-23 19:28:41 +0100936
937 switch(flSize)
938 {
939 case 1: /* 2 - 1 - 5 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200940 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
Yann Collet59d1f792016-01-23 19:28:41 +0100941 break;
942 case 2: /* 2 - 2 - 12 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200943 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100944 break;
Yann Collet59d1f792016-01-23 19:28:41 +0100945 case 3: /* 2 - 2 - 20 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200946 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100947 break;
Yann Colletcd2892f2017-06-01 09:44:54 -0700948 default: /* not necessary : flSize is {1,2,3} */
949 assert(0);
Yann Collet59d1f792016-01-23 19:28:41 +0100950 }
951
952 ostart[flSize] = *(const BYTE*)src;
953 return flSize+1;
Yann Collet14983e72015-11-11 21:38:21 +0100954}
955
Yann Collet59d1f792016-01-23 19:28:41 +0100956
Yann Colleta5c2c082016-03-20 01:09:18 +0100957static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
Yann Collet14983e72015-11-11 21:38:21 +0100958
Yann Colletb923f652016-01-26 03:14:20 +0100959static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
Yann Colletd1b26842016-03-15 01:24:33 +0100960 void* dst, size_t dstCapacity,
Yann Collet14983e72015-11-11 21:38:21 +0100961 const void* src, size_t srcSize)
962{
Yann Colleta910dc82016-03-18 12:37:45 +0100963 size_t const minGain = ZSTD_minGain(srcSize);
964 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
Yann Collet731ef162016-07-27 21:05:12 +0200965 BYTE* const ostart = (BYTE*)dst;
Yann Colletafe07092016-01-25 04:10:46 +0100966 U32 singleStream = srcSize < 256;
Yann Colletf8e7b532016-07-23 16:31:49 +0200967 symbolEncodingType_e hType = set_compressed;
Yann Colleta910dc82016-03-18 12:37:45 +0100968 size_t cLitSize;
Yann Collet14983e72015-11-11 21:38:21 +0100969
Yann Collet14983e72015-11-11 21:38:21 +0100970
Yann Colleta5c2c082016-03-20 01:09:18 +0100971 /* small ? don't even attempt compression (speed opt) */
972# define LITERAL_NOENTROPY 63
Yann Collet71ddeb62017-04-20 22:54:54 -0700973 { size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
Yann Colleta5c2c082016-03-20 01:09:18 +0100974 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
975 }
976
977 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
Yann Collet71ddeb62017-04-20 22:54:54 -0700978 { HUF_repeat repeat = zc->hufCTable_repeatMode;
Yann Collet1ad7c822017-05-22 17:06:04 -0700979 int const preferRepeat = zc->appliedParams.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
Nick Terrella4197772017-03-01 17:51:56 -0800980 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
Yann Collete348dad2017-04-20 11:14:13 -0700981 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
Yann Collete42afbc2017-04-26 11:39:35 -0700982 zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat)
Yann Collete348dad2017-04-20 11:14:13 -0700983 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
Yann Collete42afbc2017-04-26 11:39:35 -0700984 zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat);
Nick Terrella4197772017-03-01 17:51:56 -0800985 if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
Yann Collet71ddeb62017-04-20 22:54:54 -0700986 else { zc->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */
Yann Colletb923f652016-01-26 03:14:20 +0100987 }
Yann Collet14983e72015-11-11 21:38:21 +0100988
Nick Terrella4197772017-03-01 17:51:56 -0800989 if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
Yann Collet71ddeb62017-04-20 22:54:54 -0700990 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta910dc82016-03-18 12:37:45 +0100991 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
Nick Terrella4197772017-03-01 17:51:56 -0800992 }
993 if (cLitSize==1) {
Yann Collet71ddeb62017-04-20 22:54:54 -0700994 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta910dc82016-03-18 12:37:45 +0100995 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
Nick Terrella4197772017-03-01 17:51:56 -0800996 }
Yann Collet14983e72015-11-11 21:38:21 +0100997
998 /* Build header */
Yann Collet59d1f792016-01-23 19:28:41 +0100999 switch(lhSize)
Yann Collet14983e72015-11-11 21:38:21 +01001000 {
Yann Collet59d1f792016-01-23 19:28:41 +01001001 case 3: /* 2 - 2 - 10 - 10 */
Yann Colletc2e1a682016-07-22 17:30:52 +02001002 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
Yann Collet198e6aa2016-07-20 20:12:24 +02001003 MEM_writeLE24(ostart, lhc);
1004 break;
1005 }
Yann Collet59d1f792016-01-23 19:28:41 +01001006 case 4: /* 2 - 2 - 14 - 14 */
Yann Collet32faf6c2016-07-22 04:45:06 +02001007 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
Yann Collet198e6aa2016-07-20 20:12:24 +02001008 MEM_writeLE32(ostart, lhc);
1009 break;
1010 }
Yann Collet59d1f792016-01-23 19:28:41 +01001011 case 5: /* 2 - 2 - 18 - 18 */
Yann Collet32faf6c2016-07-22 04:45:06 +02001012 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
Yann Collet198e6aa2016-07-20 20:12:24 +02001013 MEM_writeLE32(ostart, lhc);
1014 ostart[4] = (BYTE)(cLitSize >> 10);
1015 break;
1016 }
Yann Colletcd2892f2017-06-01 09:44:54 -07001017 default: /* not possible : lhSize is {3,4,5} */
1018 assert(0);
Yann Collet14983e72015-11-11 21:38:21 +01001019 }
Yann Colleta910dc82016-03-18 12:37:45 +01001020 return lhSize+cLitSize;
Yann Collet14983e72015-11-11 21:38:21 +01001021}
1022
Yann Collet3b2bd1d2016-07-30 13:21:41 +02001023static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
1024 8, 9, 10, 11, 12, 13, 14, 15,
1025 16, 16, 17, 17, 18, 18, 19, 19,
1026 20, 20, 20, 20, 21, 21, 21, 21,
1027 22, 22, 22, 22, 22, 22, 22, 22,
1028 23, 23, 23, 23, 23, 23, 23, 23,
1029 24, 24, 24, 24, 24, 24, 24, 24,
1030 24, 24, 24, 24, 24, 24, 24, 24 };
Yann Collet14983e72015-11-11 21:38:21 +01001031
Yann Collet3b2bd1d2016-07-30 13:21:41 +02001032static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1033 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1034 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
1035 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
1036 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
1037 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
1038 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
1039 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
Yann Colleted57d852016-07-29 21:22:17 +02001040
1041
1042void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
Yann Colletb44be742016-03-26 20:52:14 +01001043{
Yann Colleted57d852016-07-29 21:22:17 +02001044 BYTE const LL_deltaCode = 19;
1045 BYTE const ML_deltaCode = 36;
Yann Colletc0ce4f12016-07-30 00:55:13 +02001046 const seqDef* const sequences = seqStorePtr->sequencesStart;
Yann Colleted57d852016-07-29 21:22:17 +02001047 BYTE* const llCodeTable = seqStorePtr->llCode;
1048 BYTE* const ofCodeTable = seqStorePtr->ofCode;
1049 BYTE* const mlCodeTable = seqStorePtr->mlCode;
Yann Colletc0ce4f12016-07-30 00:55:13 +02001050 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
Yann Colleted57d852016-07-29 21:22:17 +02001051 U32 u;
1052 for (u=0; u<nbSeq; u++) {
1053 U32 const llv = sequences[u].litLength;
1054 U32 const mlv = sequences[u].matchLength;
Yann Collet3b2bd1d2016-07-30 13:21:41 +02001055 llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
Yann Colleted57d852016-07-29 21:22:17 +02001056 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
Yann Collet3b2bd1d2016-07-30 13:21:41 +02001057 mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
Yann Collet5d393572016-04-07 17:19:00 +02001058 }
Yann Colleted57d852016-07-29 21:22:17 +02001059 if (seqStorePtr->longLengthID==1)
1060 llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
1061 if (seqStorePtr->longLengthID==2)
1062 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
Yann Colletb44be742016-03-26 20:52:14 +01001063}
1064
Sean Purcell553f67e2017-03-02 15:15:31 -08001065MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
Yann Colletd1b26842016-03-15 01:24:33 +01001066 void* dst, size_t dstCapacity,
Sean Purcell553f67e2017-03-02 15:15:31 -08001067 size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +01001068{
Yann Collet1ad7c822017-05-22 17:06:04 -07001069 const int longOffsets = zc->appliedParams.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
Yann Colletb923f652016-01-26 03:14:20 +01001070 const seqStore_t* seqStorePtr = &(zc->seqStore);
Yann Collet14983e72015-11-11 21:38:21 +01001071 U32 count[MaxSeq+1];
1072 S16 norm[MaxSeq+1];
Yann Colletfb810d62016-01-28 00:18:06 +01001073 FSE_CTable* CTable_LitLength = zc->litlengthCTable;
1074 FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
1075 FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
Yann Collet14983e72015-11-11 21:38:21 +01001076 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
Yann Colletc0ce4f12016-07-30 00:55:13 +02001077 const seqDef* const sequences = seqStorePtr->sequencesStart;
Yann Colleted57d852016-07-29 21:22:17 +02001078 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
1079 const BYTE* const llCodeTable = seqStorePtr->llCode;
1080 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
Yann Collet5054ee02015-11-23 13:34:21 +01001081 BYTE* const ostart = (BYTE*)dst;
Yann Colletd1b26842016-03-15 01:24:33 +01001082 BYTE* const oend = ostart + dstCapacity;
Yann Colleta910dc82016-03-18 12:37:45 +01001083 BYTE* op = ostart;
Yann Colletc0ce4f12016-07-30 00:55:13 +02001084 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
Yann Collet14983e72015-11-11 21:38:21 +01001085 BYTE* seqHead;
Yann Colletd79a9a02016-11-30 15:52:20 -08001086 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
Yann Collet14983e72015-11-11 21:38:21 +01001087
Yann Collet14983e72015-11-11 21:38:21 +01001088 /* Compress literals */
Yann Colleta5c2c082016-03-20 01:09:18 +01001089 { const BYTE* const literals = seqStorePtr->litStart;
Yann Colleta910dc82016-03-18 12:37:45 +01001090 size_t const litSize = seqStorePtr->lit - literals;
Yann Colleta5c2c082016-03-20 01:09:18 +01001091 size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
Yann Collet14983e72015-11-11 21:38:21 +01001092 if (ZSTD_isError(cSize)) return cSize;
1093 op += cSize;
1094 }
1095
1096 /* Sequences Header */
Yann Collet7cbe79a2016-03-23 22:31:57 +01001097 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
Yann Colletd409db62016-03-04 14:45:31 +01001098 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
1099 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
1100 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
Yann Collete93d6ce2016-01-31 00:58:06 +01001101 if (nbSeq==0) goto _check_compressibility;
Yann Collet14983e72015-11-11 21:38:21 +01001102
Yann Colletbe391432016-03-22 23:19:28 +01001103 /* seqHead : flags for FSE encoding type */
1104 seqHead = op++;
Yann Collet14983e72015-11-11 21:38:21 +01001105
Yann Colletfb810d62016-01-28 00:18:06 +01001106#define MIN_SEQ_FOR_DYNAMIC_FSE 64
1107#define MAX_SEQ_FOR_STATIC_FSE 1000
1108
Yann Colletb44be742016-03-26 20:52:14 +01001109 /* convert length/distances into codes */
Yann Colleted57d852016-07-29 21:22:17 +02001110 ZSTD_seqToCodes(seqStorePtr);
Yann Collet597847a2016-03-20 19:14:22 +01001111
Yann Collet14983e72015-11-11 21:38:21 +01001112 /* CTable for Literal Lengths */
Yann Colletfadda6c2016-03-22 12:14:26 +01001113 { U32 max = MaxLL;
Yann Collete42afbc2017-04-26 11:39:35 -07001114 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace);
Yann Colletfadda6c2016-03-22 12:14:26 +01001115 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
1116 *op++ = llCodeTable[0];
1117 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
Yann Colletf8e7b532016-07-23 16:31:49 +02001118 LLtype = set_rle;
Yann Collet71ddeb62017-04-20 22:54:54 -07001119 } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Yann Colletf8e7b532016-07-23 16:31:49 +02001120 LLtype = set_repeat;
Yann Colletfadda6c2016-03-22 12:14:26 +01001121 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
Yann Colletd79a9a02016-11-30 15:52:20 -08001122 FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001123 LLtype = set_basic;
Yann Colletfadda6c2016-03-22 12:14:26 +01001124 } else {
Yann Colletfadda6c2016-03-22 12:14:26 +01001125 size_t nbSeq_1 = nbSeq;
1126 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
1127 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
1128 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
Yann Colletadd08d62016-03-23 01:32:41 +01001129 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
Yann Collet23776ce2017-03-23 17:59:50 -07001130 if (FSE_isError(NCountSize)) return NCountSize;
Yann Colletadd08d62016-03-23 01:32:41 +01001131 op += NCountSize; }
Yann Colletd79a9a02016-11-30 15:52:20 -08001132 FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001133 LLtype = set_compressed;
Yann Colletfadda6c2016-03-22 12:14:26 +01001134 } }
Yann Collet14983e72015-11-11 21:38:21 +01001135
Yann Colletb44be742016-03-26 20:52:14 +01001136 /* CTable for Offsets */
Yann Colletfadda6c2016-03-22 12:14:26 +01001137 { U32 max = MaxOff;
Yann Collete42afbc2017-04-26 11:39:35 -07001138 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace);
Yann Colletfadda6c2016-03-22 12:14:26 +01001139 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
Yann Collet7cbe79a2016-03-23 22:31:57 +01001140 *op++ = ofCodeTable[0];
Yann Colletfadda6c2016-03-22 12:14:26 +01001141 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
Yann Colletf8e7b532016-07-23 16:31:49 +02001142 Offtype = set_rle;
Yann Collet71ddeb62017-04-20 22:54:54 -07001143 } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Yann Colletf8e7b532016-07-23 16:31:49 +02001144 Offtype = set_repeat;
Yann Collet48537162016-04-07 15:24:29 +02001145 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
Yann Colletd79a9a02016-11-30 15:52:20 -08001146 FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001147 Offtype = set_basic;
Yann Colletfadda6c2016-03-22 12:14:26 +01001148 } else {
Yann Colletfadda6c2016-03-22 12:14:26 +01001149 size_t nbSeq_1 = nbSeq;
1150 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
Yann Collet7cbe79a2016-03-23 22:31:57 +01001151 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
Yann Colletfadda6c2016-03-22 12:14:26 +01001152 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
Yann Colletadd08d62016-03-23 01:32:41 +01001153 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
Yann Collet23776ce2017-03-23 17:59:50 -07001154 if (FSE_isError(NCountSize)) return NCountSize;
Yann Colletadd08d62016-03-23 01:32:41 +01001155 op += NCountSize; }
Yann Colletd79a9a02016-11-30 15:52:20 -08001156 FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001157 Offtype = set_compressed;
Yann Colletfadda6c2016-03-22 12:14:26 +01001158 } }
1159
Yann Collet14983e72015-11-11 21:38:21 +01001160 /* CTable for MatchLengths */
Yann Colletfadda6c2016-03-22 12:14:26 +01001161 { U32 max = MaxML;
Yann Collete42afbc2017-04-26 11:39:35 -07001162 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace);
Yann Colletfadda6c2016-03-22 12:14:26 +01001163 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
Yann Collet72d706a2016-03-23 20:44:12 +01001164 *op++ = *mlCodeTable;
Yann Colletfadda6c2016-03-22 12:14:26 +01001165 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
Yann Colletf8e7b532016-07-23 16:31:49 +02001166 MLtype = set_rle;
Yann Collet71ddeb62017-04-20 22:54:54 -07001167 } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Yann Colletf8e7b532016-07-23 16:31:49 +02001168 MLtype = set_repeat;
Yann Colletfadda6c2016-03-22 12:14:26 +01001169 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
Yann Colletd79a9a02016-11-30 15:52:20 -08001170 FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001171 MLtype = set_basic;
Yann Colletfadda6c2016-03-22 12:14:26 +01001172 } else {
1173 size_t nbSeq_1 = nbSeq;
1174 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
1175 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
1176 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
1177 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
Yann Collet23776ce2017-03-23 17:59:50 -07001178 if (FSE_isError(NCountSize)) return NCountSize;
Yann Colletfadda6c2016-03-22 12:14:26 +01001179 op += NCountSize; }
Yann Colletd79a9a02016-11-30 15:52:20 -08001180 FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001181 MLtype = set_compressed;
Yann Colletfadda6c2016-03-22 12:14:26 +01001182 } }
Yann Collet14983e72015-11-11 21:38:21 +01001183
Yann Colletbe391432016-03-22 23:19:28 +01001184 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
Yann Collet71ddeb62017-04-20 22:54:54 -07001185 zc->fseCTables_ready = 0;
Yann Collet14983e72015-11-11 21:38:21 +01001186
1187 /* Encoding Sequences */
Yann Collet70e45772016-03-19 18:08:32 +01001188 { BIT_CStream_t blockStream;
Yann Colleta910dc82016-03-18 12:37:45 +01001189 FSE_CState_t stateMatchLength;
1190 FSE_CState_t stateOffsetBits;
1191 FSE_CState_t stateLitLength;
Yann Collet14983e72015-11-11 21:38:21 +01001192
Yann Collet95d07d72016-09-06 16:38:51 +02001193 CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
Yann Collet14983e72015-11-11 21:38:21 +01001194
Yann Collet597847a2016-03-20 19:14:22 +01001195 /* first symbols */
Yann Colletfadda6c2016-03-22 12:14:26 +01001196 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
Yann Collet7cbe79a2016-03-23 22:31:57 +01001197 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
Yann Collet597847a2016-03-20 19:14:22 +01001198 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
Yann Colleted57d852016-07-29 21:22:17 +02001199 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
Yann Colletb9151402016-03-26 17:18:11 +01001200 if (MEM_32bits()) BIT_flushBits(&blockStream);
Yann Colleted57d852016-07-29 21:22:17 +02001201 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
Yann Colletb9151402016-03-26 17:18:11 +01001202 if (MEM_32bits()) BIT_flushBits(&blockStream);
Sean Purcelld44703d2017-03-01 14:36:25 -08001203 if (longOffsets) {
1204 U32 const ofBits = ofCodeTable[nbSeq-1];
1205 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
1206 if (extraBits) {
1207 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
1208 BIT_flushBits(&blockStream);
1209 }
1210 BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
1211 ofBits - extraBits);
1212 } else {
1213 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
1214 }
Yann Collet597847a2016-03-20 19:14:22 +01001215 BIT_flushBits(&blockStream);
1216
Yann Colletfadda6c2016-03-22 12:14:26 +01001217 { size_t n;
1218 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
Yann Collet3c6b8082016-07-30 03:20:47 +02001219 BYTE const llCode = llCodeTable[n];
Yann Collet731ef162016-07-27 21:05:12 +02001220 BYTE const ofCode = ofCodeTable[n];
1221 BYTE const mlCode = mlCodeTable[n];
Yann Collet731ef162016-07-27 21:05:12 +02001222 U32 const llBits = LL_bits[llCode];
Yann Collet731ef162016-07-27 21:05:12 +02001223 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
Yann Collet3c6b8082016-07-30 03:20:47 +02001224 U32 const mlBits = ML_bits[mlCode];
Yann Colletfadda6c2016-03-22 12:14:26 +01001225 /* (7)*/ /* (7)*/
Yann Colletb9151402016-03-26 17:18:11 +01001226 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
1227 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
1228 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
1229 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
Yann Collet582933f2016-04-11 16:25:56 +02001230 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
Yann Colletb9151402016-03-26 17:18:11 +01001231 BIT_flushBits(&blockStream); /* (7)*/
Yann Colleted57d852016-07-29 21:22:17 +02001232 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
Yann Colletb9151402016-03-26 17:18:11 +01001233 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
Yann Colleted57d852016-07-29 21:22:17 +02001234 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
Yann Colletb9151402016-03-26 17:18:11 +01001235 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
Sean Purcelld44703d2017-03-01 14:36:25 -08001236 if (longOffsets) {
1237 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
1238 if (extraBits) {
1239 BIT_addBits(&blockStream, sequences[n].offset, extraBits);
1240 BIT_flushBits(&blockStream); /* (7)*/
1241 }
1242 BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
1243 ofBits - extraBits); /* 31 */
1244 } else {
1245 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
1246 }
Yann Colletb9151402016-03-26 17:18:11 +01001247 BIT_flushBits(&blockStream); /* (7)*/
Yann Colletfadda6c2016-03-22 12:14:26 +01001248 } }
Yann Collet14983e72015-11-11 21:38:21 +01001249
1250 FSE_flushCState(&blockStream, &stateMatchLength);
1251 FSE_flushCState(&blockStream, &stateOffsetBits);
1252 FSE_flushCState(&blockStream, &stateLitLength);
1253
Yann Colletb9151402016-03-26 17:18:11 +01001254 { size_t const streamSize = BIT_closeCStream(&blockStream);
1255 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
1256 op += streamSize;
1257 } }
Yann Collet14983e72015-11-11 21:38:21 +01001258
1259 /* check compressibility */
Yann Collete93d6ce2016-01-31 00:58:06 +01001260_check_compressibility:
Nick Terrella4197772017-03-01 17:51:56 -08001261 { size_t const minGain = ZSTD_minGain(srcSize);
1262 size_t const maxCSize = srcSize - minGain;
1263 if ((size_t)(op-ostart) >= maxCSize) {
Yann Collet71ddeb62017-04-20 22:54:54 -07001264 zc->hufCTable_repeatMode = HUF_repeat_none;
Nick Terrella4197772017-03-01 17:51:56 -08001265 return 0;
1266 } }
Yann Collet14983e72015-11-11 21:38:21 +01001267
Yann Collet4266c0a2016-06-14 01:49:25 +02001268 /* confirm repcodes */
Yann Colletb459aad2017-01-19 17:33:37 -08001269 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
Yann Collet4266c0a2016-06-14 01:49:25 +02001270
Yann Collet5054ee02015-11-23 13:34:21 +01001271 return op - ostart;
Yann Collet14983e72015-11-11 21:38:21 +01001272}
1273
Yann Colletbb002742017-01-25 16:25:38 -08001274
Yann Collet95cd0c22016-03-08 18:24:21 +01001275/*! ZSTD_storeSeq() :
1276 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
1277 `offsetCode` : distance to match, or 0 == repCode.
1278 `matchCode` : matchLength - MINMATCH
Yann Collet14983e72015-11-11 21:38:21 +01001279*/
Yann Colletd57dffb2016-07-03 01:48:26 +02001280MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
Yann Collet14983e72015-11-11 21:38:21 +01001281{
Yann Collet009d6042017-05-19 10:17:59 -07001282#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
1283 static const BYTE* g_start = NULL;
1284 U32 const pos = (U32)((const BYTE*)literals - g_start);
1285 if (g_start==NULL) g_start = (const BYTE*)literals;
1286 if ((pos > 0) && (pos < 1000000000))
1287 DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u",
1288 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
Yann Collet14983e72015-11-11 21:38:21 +01001289#endif
Yann Collet14983e72015-11-11 21:38:21 +01001290 /* copy Literals */
Yann Collet009d6042017-05-19 10:17:59 -07001291 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
Yann Collet14983e72015-11-11 21:38:21 +01001292 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
1293 seqStorePtr->lit += litLength;
1294
1295 /* literal Length */
Yann Collete6fa70a2017-04-20 17:28:31 -07001296 if (litLength>0xFFFF) {
1297 seqStorePtr->longLengthID = 1;
1298 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1299 }
Yann Colletc0ce4f12016-07-30 00:55:13 +02001300 seqStorePtr->sequences[0].litLength = (U16)litLength;
Yann Collet14983e72015-11-11 21:38:21 +01001301
1302 /* match offset */
Yann Colletc0ce4f12016-07-30 00:55:13 +02001303 seqStorePtr->sequences[0].offset = offsetCode + 1;
Yann Collet14983e72015-11-11 21:38:21 +01001304
1305 /* match Length */
Yann Collete6fa70a2017-04-20 17:28:31 -07001306 if (matchCode>0xFFFF) {
1307 seqStorePtr->longLengthID = 2;
1308 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1309 }
Yann Colletc0ce4f12016-07-30 00:55:13 +02001310 seqStorePtr->sequences[0].matchLength = (U16)matchCode;
Yann Colleted57d852016-07-29 21:22:17 +02001311
Yann Colletc0ce4f12016-07-30 00:55:13 +02001312 seqStorePtr->sequences++;
Yann Collet14983e72015-11-11 21:38:21 +01001313}
1314
1315
Yann Collet7d360282016-02-12 00:07:30 +01001316/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +01001317* Match length counter
1318***************************************/
Yann Collet5054ee02015-11-23 13:34:21 +01001319static unsigned ZSTD_NbCommonBytes (register size_t val)
Yann Collet14983e72015-11-11 21:38:21 +01001320{
Yann Collet863ec402016-01-28 17:56:33 +01001321 if (MEM_isLittleEndian()) {
1322 if (MEM_64bits()) {
Yann Collet14983e72015-11-11 21:38:21 +01001323# if defined(_MSC_VER) && defined(_WIN64)
1324 unsigned long r = 0;
1325 _BitScanForward64( &r, (U64)val );
Yann Colletd6080882015-12-09 09:05:22 +01001326 return (unsigned)(r>>3);
Yann Collet14983e72015-11-11 21:38:21 +01001327# elif defined(__GNUC__) && (__GNUC__ >= 3)
1328 return (__builtin_ctzll((U64)val) >> 3);
1329# else
Yann Collete348dad2017-04-20 11:14:13 -07001330 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
1331 0, 3, 1, 3, 1, 4, 2, 7,
1332 0, 2, 3, 6, 1, 5, 3, 5,
1333 1, 3, 4, 4, 2, 5, 6, 7,
1334 7, 0, 1, 2, 3, 3, 4, 6,
1335 2, 6, 5, 5, 3, 4, 5, 6,
1336 7, 1, 2, 4, 6, 4, 4, 5,
1337 7, 2, 6, 5, 7, 6, 7, 7 };
Yann Collet14983e72015-11-11 21:38:21 +01001338 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
1339# endif
Yann Collet863ec402016-01-28 17:56:33 +01001340 } else { /* 32 bits */
Yann Collet14983e72015-11-11 21:38:21 +01001341# if defined(_MSC_VER)
1342 unsigned long r=0;
1343 _BitScanForward( &r, (U32)val );
Yann Colletd6080882015-12-09 09:05:22 +01001344 return (unsigned)(r>>3);
Yann Collet14983e72015-11-11 21:38:21 +01001345# elif defined(__GNUC__) && (__GNUC__ >= 3)
1346 return (__builtin_ctz((U32)val) >> 3);
1347# else
Yann Collete348dad2017-04-20 11:14:13 -07001348 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
1349 3, 2, 2, 1, 3, 2, 0, 1,
1350 3, 3, 1, 2, 2, 2, 2, 0,
1351 3, 1, 2, 0, 1, 0, 1, 1 };
Yann Collet14983e72015-11-11 21:38:21 +01001352 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
1353# endif
1354 }
Yann Collet863ec402016-01-28 17:56:33 +01001355 } else { /* Big Endian CPU */
1356 if (MEM_64bits()) {
Yann Collet14983e72015-11-11 21:38:21 +01001357# if defined(_MSC_VER) && defined(_WIN64)
1358 unsigned long r = 0;
1359 _BitScanReverse64( &r, val );
1360 return (unsigned)(r>>3);
1361# elif defined(__GNUC__) && (__GNUC__ >= 3)
1362 return (__builtin_clzll(val) >> 3);
1363# else
1364 unsigned r;
1365 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
1366 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
1367 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
1368 r += (!val);
1369 return r;
1370# endif
Yann Collet863ec402016-01-28 17:56:33 +01001371 } else { /* 32 bits */
Yann Collet14983e72015-11-11 21:38:21 +01001372# if defined(_MSC_VER)
1373 unsigned long r = 0;
1374 _BitScanReverse( &r, (unsigned long)val );
1375 return (unsigned)(r>>3);
1376# elif defined(__GNUC__) && (__GNUC__ >= 3)
1377 return (__builtin_clz((U32)val) >> 3);
1378# else
1379 unsigned r;
1380 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
1381 r += (!val);
1382 return r;
1383# endif
Yann Collet863ec402016-01-28 17:56:33 +01001384 } }
Yann Collet14983e72015-11-11 21:38:21 +01001385}
1386
1387
Yann Colleta436a522016-06-20 23:34:04 +02001388static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
Yann Collet14983e72015-11-11 21:38:21 +01001389{
1390 const BYTE* const pStart = pIn;
Yann Colleta436a522016-06-20 23:34:04 +02001391 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
Yann Collet14983e72015-11-11 21:38:21 +01001392
Yann Colleta436a522016-06-20 23:34:04 +02001393 while (pIn < pInLoopLimit) {
Yann Collet7591a7f2016-05-20 11:44:43 +02001394 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
Yann Collet14983e72015-11-11 21:38:21 +01001395 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
1396 pIn += ZSTD_NbCommonBytes(diff);
1397 return (size_t)(pIn - pStart);
1398 }
Yann Collet14983e72015-11-11 21:38:21 +01001399 if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
1400 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
1401 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
1402 return (size_t)(pIn - pStart);
1403}
1404
Yann Collet04b12d82016-02-11 06:23:24 +01001405/** ZSTD_count_2segments() :
Yann Collet7d360282016-02-12 00:07:30 +01001406* can count match length with `ip` & `match` in 2 different segments.
Yann Collet5054ee02015-11-23 13:34:21 +01001407* convention : on reaching mEnd, match count continue starting from iStart
1408*/
1409static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
1410{
Yann Collet7591a7f2016-05-20 11:44:43 +02001411 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
Yann Collet731ef162016-07-27 21:05:12 +02001412 size_t const matchLength = ZSTD_count(ip, match, vEnd);
1413 if (match + matchLength != mEnd) return matchLength;
1414 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
Yann Collet5054ee02015-11-23 13:34:21 +01001415}
1416
Yann Collet14983e72015-11-11 21:38:21 +01001417
Yann Collet863ec402016-01-28 17:56:33 +01001418/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +01001419* Hashes
Yann Colletf3eca252015-10-22 15:31:46 +01001420***************************************/
inikepcc52a972016-02-19 10:09:35 +01001421static const U32 prime3bytes = 506832829U;
1422static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
Yann Collete6fa70a2017-04-20 17:28:31 -07001423MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
inikepcc52a972016-02-19 10:09:35 +01001424
Yann Collet4b100f42015-10-30 15:49:48 +01001425static const U32 prime4bytes = 2654435761U;
Yann Collet863ec402016-01-28 17:56:33 +01001426static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
Yann Collet5be2dd22015-11-11 13:43:58 +01001427static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
Yann Collet2acb5d32015-10-29 16:49:43 +01001428
Yann Collet4b100f42015-10-30 15:49:48 +01001429static const U64 prime5bytes = 889523592379ULL;
Yann Collet863ec402016-01-28 17:56:33 +01001430static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
Yann Collet4f0a3932016-02-07 04:00:27 +01001431static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
Yann Collet4b100f42015-10-30 15:49:48 +01001432
1433static const U64 prime6bytes = 227718039650203ULL;
Yann Collet863ec402016-01-28 17:56:33 +01001434static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
Yann Collet4f0a3932016-02-07 04:00:27 +01001435static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
Yann Collet4b100f42015-10-30 15:49:48 +01001436
Yann Collet14983e72015-11-11 21:38:21 +01001437static const U64 prime7bytes = 58295818150454627ULL;
Yann Collet863ec402016-01-28 17:56:33 +01001438static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
Yann Collet4f0a3932016-02-07 04:00:27 +01001439static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001440
Yann Collet45dc3562016-07-12 09:47:31 +02001441static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
1442static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
1443static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
1444
Yann Collet5be2dd22015-11-11 13:43:58 +01001445static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
Yann Collet4b100f42015-10-30 15:49:48 +01001446{
1447 switch(mls)
1448 {
1449 default:
Yann Collet5be2dd22015-11-11 13:43:58 +01001450 case 4: return ZSTD_hash4Ptr(p, hBits);
1451 case 5: return ZSTD_hash5Ptr(p, hBits);
1452 case 6: return ZSTD_hash6Ptr(p, hBits);
1453 case 7: return ZSTD_hash7Ptr(p, hBits);
Yann Collet45dc3562016-07-12 09:47:31 +02001454 case 8: return ZSTD_hash8Ptr(p, hBits);
Yann Collet4b100f42015-10-30 15:49:48 +01001455 }
1456}
Yann Collet2acb5d32015-10-29 16:49:43 +01001457
Yann Collet863ec402016-01-28 17:56:33 +01001458
Yann Collet2ce49232016-02-02 14:36:49 +01001459/*-*************************************
Yann Collet1f44b3f2015-11-05 17:32:18 +01001460* Fast Scan
1461***************************************/
Yann Collet417890c2015-12-04 17:16:37 +01001462static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
1463{
1464 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001465 U32 const hBits = zc->appliedParams.cParams.hashLog;
Yann Collet417890c2015-12-04 17:16:37 +01001466 const BYTE* const base = zc->base;
1467 const BYTE* ip = base + zc->nextToUpdate;
Yann Collet731ef162016-07-27 21:05:12 +02001468 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
Yann Collet37f3d1b2016-03-19 15:11:42 +01001469 const size_t fastHashFillStep = 3;
Yann Collet417890c2015-12-04 17:16:37 +01001470
Yann Colletfb810d62016-01-28 00:18:06 +01001471 while(ip <= iend) {
Yann Collet417890c2015-12-04 17:16:37 +01001472 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
Yann Collet37f3d1b2016-03-19 15:11:42 +01001473 ip += fastHashFillStep;
Yann Collet417890c2015-12-04 17:16:37 +01001474 }
1475}
1476
1477
Yann Collet1f44b3f2015-11-05 17:32:18 +01001478FORCE_INLINE
Yann Collet4266c0a2016-06-14 01:49:25 +02001479void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
Yann Collet280f9a82016-08-08 00:44:00 +02001480 const void* src, size_t srcSize,
1481 const U32 mls)
Yann Collet1f44b3f2015-11-05 17:32:18 +01001482{
Yann Collet4266c0a2016-06-14 01:49:25 +02001483 U32* const hashTable = cctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001484 U32 const hBits = cctx->appliedParams.cParams.hashLog;
Yann Collet4266c0a2016-06-14 01:49:25 +02001485 seqStore_t* seqStorePtr = &(cctx->seqStore);
1486 const BYTE* const base = cctx->base;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001487 const BYTE* const istart = (const BYTE*)src;
Yann Collet805a52a2015-11-06 10:52:17 +01001488 const BYTE* ip = istart;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001489 const BYTE* anchor = istart;
Yann Collet731ef162016-07-27 21:05:12 +02001490 const U32 lowestIndex = cctx->dictLimit;
Yann Collet4266c0a2016-06-14 01:49:25 +02001491 const BYTE* const lowest = base + lowestIndex;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001492 const BYTE* const iend = istart + srcSize;
Yann Collet731ef162016-07-27 21:05:12 +02001493 const BYTE* const ilimit = iend - HASH_READ_SIZE;
Yann Collet92d75662016-07-03 01:10:53 +02001494 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1495 U32 offsetSaved = 0;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001496
Yann Collet1f44b3f2015-11-05 17:32:18 +01001497 /* init */
Yann Collet4266c0a2016-06-14 01:49:25 +02001498 ip += (ip==lowest);
1499 { U32 const maxRep = (U32)(ip-lowest);
Yann Collet92d75662016-07-03 01:10:53 +02001500 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1501 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
Yann Collet4266c0a2016-06-14 01:49:25 +02001502 }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001503
1504 /* Main Search Loop */
Yann Collet4266c0a2016-06-14 01:49:25 +02001505 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
Yann Colleta436a522016-06-20 23:34:04 +02001506 size_t mLength;
Yann Collet43dfe012016-06-13 21:43:06 +02001507 size_t const h = ZSTD_hashPtr(ip, hBits, mls);
1508 U32 const current = (U32)(ip-base);
1509 U32 const matchIndex = hashTable[h];
Yann Colletd94efbf2015-12-29 14:29:08 +01001510 const BYTE* match = base + matchIndex;
Yann Collet96ffa422016-01-02 01:16:28 +01001511 hashTable[h] = current; /* update hash table */
Yann Collet1f44b3f2015-11-05 17:32:18 +01001512
Yann Collet280f9a82016-08-08 00:44:00 +02001513 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
Yann Collet45dc3562016-07-12 09:47:31 +02001514 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
Yann Collet402fdcf2015-11-20 12:46:08 +01001515 ip++;
Yann Colleta436a522016-06-20 23:34:04 +02001516 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1517 } else {
Yann Collet92d75662016-07-03 01:10:53 +02001518 U32 offset;
Yann Colleta436a522016-06-20 23:34:04 +02001519 if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001520 ip += ((ip-anchor) >> g_searchStrength) + 1;
1521 continue;
1522 }
Yann Collet45dc3562016-07-12 09:47:31 +02001523 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
Yann Collet92d75662016-07-03 01:10:53 +02001524 offset = (U32)(ip-match);
Yann Colleta436a522016-06-20 23:34:04 +02001525 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
Yann Collet402fdcf2015-11-20 12:46:08 +01001526 offset_2 = offset_1;
1527 offset_1 = offset;
inikep59453082016-03-16 15:35:14 +01001528
Yann Colleta436a522016-06-20 23:34:04 +02001529 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Collet402fdcf2015-11-20 12:46:08 +01001530 }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001531
Yann Collet402fdcf2015-11-20 12:46:08 +01001532 /* match found */
Yann Colleta436a522016-06-20 23:34:04 +02001533 ip += mLength;
Yann Collet402fdcf2015-11-20 12:46:08 +01001534 anchor = ip;
1535
Yann Colletfb810d62016-01-28 00:18:06 +01001536 if (ip <= ilimit) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001537 /* Fill Table */
Yann Colletecd651b2016-01-07 15:35:18 +01001538 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
Yann Collet402fdcf2015-11-20 12:46:08 +01001539 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1540 /* check immediate repcode */
1541 while ( (ip <= ilimit)
Yann Collet4266c0a2016-06-14 01:49:25 +02001542 && ( (offset_2>0)
Yann Collet43dfe012016-06-13 21:43:06 +02001543 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001544 /* store sequence */
Yann Collet45dc3562016-07-12 09:47:31 +02001545 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
Yann Collet92d75662016-07-03 01:10:53 +02001546 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
Yann Collet402fdcf2015-11-20 12:46:08 +01001547 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
Yann Colleta436a522016-06-20 23:34:04 +02001548 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1549 ip += rLength;
Yann Collet402fdcf2015-11-20 12:46:08 +01001550 anchor = ip;
1551 continue; /* faster when present ... (?) */
Yann Colletfb810d62016-01-28 00:18:06 +01001552 } } }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001553
Yann Collet4266c0a2016-06-14 01:49:25 +02001554 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001555 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1556 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
Yann Collet4266c0a2016-06-14 01:49:25 +02001557
Yann Collet70e45772016-03-19 18:08:32 +01001558 /* Last Literals */
1559 { size_t const lastLLSize = iend - anchor;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001560 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1561 seqStorePtr->lit += lastLLSize;
1562 }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001563}
1564
1565
Yann Collet82260dd2016-02-11 07:14:25 +01001566static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
Yann Collet59d1f792016-01-23 19:28:41 +01001567 const void* src, size_t srcSize)
Yann Collet1f44b3f2015-11-05 17:32:18 +01001568{
Yann Collet1ad7c822017-05-22 17:06:04 -07001569 const U32 mls = ctx->appliedParams.cParams.searchLength;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001570 switch(mls)
1571 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001572 default: /* includes case 3 */
Yann Collet1f44b3f2015-11-05 17:32:18 +01001573 case 4 :
Yann Collet59d1f792016-01-23 19:28:41 +01001574 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001575 case 5 :
Yann Collet59d1f792016-01-23 19:28:41 +01001576 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001577 case 6 :
Yann Collet59d1f792016-01-23 19:28:41 +01001578 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001579 case 7 :
Yann Collet59d1f792016-01-23 19:28:41 +01001580 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001581 }
1582}
Yann Colletf3eca252015-10-22 15:31:46 +01001583
Yann Colletf3eca252015-10-22 15:31:46 +01001584
Yann Collet82260dd2016-02-11 07:14:25 +01001585static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
Yann Collet59d1f792016-01-23 19:28:41 +01001586 const void* src, size_t srcSize,
1587 const U32 mls)
Yann Collet89db5e02015-11-13 11:27:46 +01001588{
1589 U32* hashTable = ctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001590 const U32 hBits = ctx->appliedParams.cParams.hashLog;
Yann Collet89db5e02015-11-13 11:27:46 +01001591 seqStore_t* seqStorePtr = &(ctx->seqStore);
1592 const BYTE* const base = ctx->base;
1593 const BYTE* const dictBase = ctx->dictBase;
1594 const BYTE* const istart = (const BYTE*)src;
1595 const BYTE* ip = istart;
1596 const BYTE* anchor = istart;
Yann Collet43dfe012016-06-13 21:43:06 +02001597 const U32 lowestIndex = ctx->lowLimit;
1598 const BYTE* const dictStart = dictBase + lowestIndex;
Yann Collet89db5e02015-11-13 11:27:46 +01001599 const U32 dictLimit = ctx->dictLimit;
Yann Collet743402c2015-11-20 12:03:53 +01001600 const BYTE* const lowPrefixPtr = base + dictLimit;
1601 const BYTE* const dictEnd = dictBase + dictLimit;
Yann Collet89db5e02015-11-13 11:27:46 +01001602 const BYTE* const iend = istart + srcSize;
1603 const BYTE* const ilimit = iend - 8;
Yann Collet4266c0a2016-06-14 01:49:25 +02001604 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
Yann Collet89db5e02015-11-13 11:27:46 +01001605
Yann Colleta436a522016-06-20 23:34:04 +02001606 /* Search Loop */
Yann Colletfb810d62016-01-28 00:18:06 +01001607 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
Yann Collet89db5e02015-11-13 11:27:46 +01001608 const size_t h = ZSTD_hashPtr(ip, hBits, mls);
Yann Collet743402c2015-11-20 12:03:53 +01001609 const U32 matchIndex = hashTable[h];
Yann Collet89db5e02015-11-13 11:27:46 +01001610 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
Yann Collet6bcdeac2015-11-26 11:43:00 +01001611 const BYTE* match = matchBase + matchIndex;
Yann Collet89db5e02015-11-13 11:27:46 +01001612 const U32 current = (U32)(ip-base);
Yann Colleta436a522016-06-20 23:34:04 +02001613 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
Yann Collet402fdcf2015-11-20 12:46:08 +01001614 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
Yann Collet89db5e02015-11-13 11:27:46 +01001615 const BYTE* repMatch = repBase + repIndex;
Yann Colleta436a522016-06-20 23:34:04 +02001616 size_t mLength;
Yann Collet89db5e02015-11-13 11:27:46 +01001617 hashTable[h] = current; /* update hash table */
1618
Yann Colleta436a522016-06-20 23:34:04 +02001619 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
Yann Collet4266c0a2016-06-14 01:49:25 +02001620 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001621 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete6fa70a2017-04-20 17:28:31 -07001622 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
Yann Collet743402c2015-11-20 12:03:53 +01001623 ip++;
Yann Colleta436a522016-06-20 23:34:04 +02001624 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
Yann Colletfb810d62016-01-28 00:18:06 +01001625 } else {
Yann Collet43dfe012016-06-13 21:43:06 +02001626 if ( (matchIndex < lowestIndex) ||
Yann Collet52447382016-03-20 16:00:00 +01001627 (MEM_read32(match) != MEM_read32(ip)) ) {
1628 ip += ((ip-anchor) >> g_searchStrength) + 1;
1629 continue;
1630 }
1631 { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
Yann Collet5054ee02015-11-23 13:34:21 +01001632 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
Yann Colleta436a522016-06-20 23:34:04 +02001633 U32 offset;
Yann Collete6fa70a2017-04-20 17:28:31 -07001634 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
Yann Colleta436a522016-06-20 23:34:04 +02001635 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
Yann Collet402fdcf2015-11-20 12:46:08 +01001636 offset = current - matchIndex;
1637 offset_2 = offset_1;
1638 offset_1 = offset;
Yann Colleta436a522016-06-20 23:34:04 +02001639 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Colletfb810d62016-01-28 00:18:06 +01001640 } }
Yann Collet89db5e02015-11-13 11:27:46 +01001641
Yann Collet5054ee02015-11-23 13:34:21 +01001642 /* found a match : store it */
Yann Colleta436a522016-06-20 23:34:04 +02001643 ip += mLength;
Yann Collet402fdcf2015-11-20 12:46:08 +01001644 anchor = ip;
1645
Yann Colletfb810d62016-01-28 00:18:06 +01001646 if (ip <= ilimit) {
Yann Collet6bcdeac2015-11-26 11:43:00 +01001647 /* Fill Table */
Yann Collet3e21ec52016-09-06 15:36:19 +02001648 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
Yann Collet402fdcf2015-11-20 12:46:08 +01001649 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1650 /* check immediate repcode */
Yann Colletfb810d62016-01-28 00:18:06 +01001651 while (ip <= ilimit) {
Yann Collet27caf2a2016-04-01 15:48:48 +02001652 U32 const current2 = (U32)(ip-base);
1653 U32 const repIndex2 = current2 - offset_2;
Yann Collet402fdcf2015-11-20 12:46:08 +01001654 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
Yann Collet4266c0a2016-06-14 01:49:25 +02001655 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1656 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
Yann Collet5054ee02015-11-23 13:34:21 +01001657 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
Yann Collete6fa70a2017-04-20 17:28:31 -07001658 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
Yann Collet5054ee02015-11-23 13:34:21 +01001659 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
inikep7bc19b62016-04-06 09:46:01 +02001660 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
Yann Collet5054ee02015-11-23 13:34:21 +01001661 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
inikep7bc19b62016-04-06 09:46:01 +02001662 ip += repLength2;
Yann Collet402fdcf2015-11-20 12:46:08 +01001663 anchor = ip;
1664 continue;
1665 }
Yann Collet743402c2015-11-20 12:03:53 +01001666 break;
Yann Colletfb810d62016-01-28 00:18:06 +01001667 } } }
Yann Collet89db5e02015-11-13 11:27:46 +01001668
Yann Collet4266c0a2016-06-14 01:49:25 +02001669 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001670 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
Yann Collet4266c0a2016-06-14 01:49:25 +02001671
Yann Collet89db5e02015-11-13 11:27:46 +01001672 /* Last Literals */
Yann Collet70e45772016-03-19 18:08:32 +01001673 { size_t const lastLLSize = iend - anchor;
Yann Collet89db5e02015-11-13 11:27:46 +01001674 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1675 seqStorePtr->lit += lastLLSize;
1676 }
Yann Collet89db5e02015-11-13 11:27:46 +01001677}
1678
1679
Yann Collet82260dd2016-02-11 07:14:25 +01001680static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
Yann Collet89db5e02015-11-13 11:27:46 +01001681 const void* src, size_t srcSize)
1682{
Yann Collet1ad7c822017-05-22 17:06:04 -07001683 U32 const mls = ctx->appliedParams.cParams.searchLength;
Yann Collet89db5e02015-11-13 11:27:46 +01001684 switch(mls)
1685 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001686 default: /* includes case 3 */
Yann Collet89db5e02015-11-13 11:27:46 +01001687 case 4 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001688 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001689 case 5 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001690 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001691 case 6 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001692 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001693 case 7 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001694 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001695 }
1696}
1697
1698
Yann Collet04b12d82016-02-11 06:23:24 +01001699/*-*************************************
Yann Collet45dc3562016-07-12 09:47:31 +02001700* Double Fast
1701***************************************/
1702static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
1703{
1704 U32* const hashLarge = cctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001705 U32 const hBitsL = cctx->appliedParams.cParams.hashLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001706 U32* const hashSmall = cctx->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001707 U32 const hBitsS = cctx->appliedParams.cParams.chainLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001708 const BYTE* const base = cctx->base;
1709 const BYTE* ip = base + cctx->nextToUpdate;
Yann Collet731ef162016-07-27 21:05:12 +02001710 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
Yann Collet45dc3562016-07-12 09:47:31 +02001711 const size_t fastHashFillStep = 3;
1712
1713 while(ip <= iend) {
1714 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
1715 hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
1716 ip += fastHashFillStep;
1717 }
1718}
1719
1720
1721FORCE_INLINE
1722void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
1723 const void* src, size_t srcSize,
1724 const U32 mls)
1725{
1726 U32* const hashLong = cctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001727 const U32 hBitsL = cctx->appliedParams.cParams.hashLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001728 U32* const hashSmall = cctx->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001729 const U32 hBitsS = cctx->appliedParams.cParams.chainLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001730 seqStore_t* seqStorePtr = &(cctx->seqStore);
1731 const BYTE* const base = cctx->base;
1732 const BYTE* const istart = (const BYTE*)src;
1733 const BYTE* ip = istart;
1734 const BYTE* anchor = istart;
1735 const U32 lowestIndex = cctx->dictLimit;
1736 const BYTE* const lowest = base + lowestIndex;
1737 const BYTE* const iend = istart + srcSize;
Yann Collet731ef162016-07-27 21:05:12 +02001738 const BYTE* const ilimit = iend - HASH_READ_SIZE;
Yann Collet45dc3562016-07-12 09:47:31 +02001739 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1740 U32 offsetSaved = 0;
1741
1742 /* init */
1743 ip += (ip==lowest);
1744 { U32 const maxRep = (U32)(ip-lowest);
1745 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1746 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
1747 }
1748
1749 /* Main Search Loop */
1750 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
1751 size_t mLength;
1752 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
1753 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
1754 U32 const current = (U32)(ip-base);
1755 U32 const matchIndexL = hashLong[h2];
1756 U32 const matchIndexS = hashSmall[h];
1757 const BYTE* matchLong = base + matchIndexL;
1758 const BYTE* match = base + matchIndexS;
1759 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
1760
Yann Colletc17e0202017-04-20 12:50:02 -07001761 assert(offset_1 <= current); /* supposed guaranteed by construction */
1762 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
Yann Collete6fa70a2017-04-20 17:28:31 -07001763 /* favor repcode */
Yann Collet45dc3562016-07-12 09:47:31 +02001764 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
1765 ip++;
1766 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1767 } else {
Yann Colleteed20812016-07-12 15:11:40 +02001768 U32 offset;
Yann Collet45dc3562016-07-12 09:47:31 +02001769 if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
1770 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
Yann Colleteed20812016-07-12 15:11:40 +02001771 offset = (U32)(ip-matchLong);
Yann Collet45dc3562016-07-12 09:47:31 +02001772 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1773 } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
Yann Collete6fa70a2017-04-20 17:28:31 -07001774 size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1775 U32 const matchIndexL3 = hashLong[hl3];
1776 const BYTE* matchL3 = base + matchIndexL3;
1777 hashLong[hl3] = current + 1;
1778 if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
1779 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
Yann Colletc54692f2016-08-24 01:10:42 +02001780 ip++;
Yann Collete6fa70a2017-04-20 17:28:31 -07001781 offset = (U32)(ip-matchL3);
1782 while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
Yann Colletc54692f2016-08-24 01:10:42 +02001783 } else {
1784 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
1785 offset = (U32)(ip-match);
1786 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1787 }
Yann Collet45dc3562016-07-12 09:47:31 +02001788 } else {
1789 ip += ((ip-anchor) >> g_searchStrength) + 1;
1790 continue;
1791 }
1792
1793 offset_2 = offset_1;
1794 offset_1 = offset;
1795
1796 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1797 }
1798
1799 /* match found */
1800 ip += mLength;
1801 anchor = ip;
1802
1803 if (ip <= ilimit) {
1804 /* Fill Table */
1805 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
1806 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
1807 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
1808 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1809
1810 /* check immediate repcode */
1811 while ( (ip <= ilimit)
1812 && ( (offset_2>0)
1813 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1814 /* store sequence */
1815 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
Yann Colleteed20812016-07-12 15:11:40 +02001816 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
Yann Collet45dc3562016-07-12 09:47:31 +02001817 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
1818 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
1819 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1820 ip += rLength;
1821 anchor = ip;
1822 continue; /* faster when present ... (?) */
1823 } } }
1824
1825 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001826 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1827 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
Yann Collet45dc3562016-07-12 09:47:31 +02001828
1829 /* Last Literals */
1830 { size_t const lastLLSize = iend - anchor;
1831 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1832 seqStorePtr->lit += lastLLSize;
1833 }
1834}
1835
1836
1837static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1838{
Yann Collet1ad7c822017-05-22 17:06:04 -07001839 const U32 mls = ctx->appliedParams.cParams.searchLength;
Yann Collet45dc3562016-07-12 09:47:31 +02001840 switch(mls)
1841 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001842 default: /* includes case 3 */
Yann Collet45dc3562016-07-12 09:47:31 +02001843 case 4 :
1844 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
1845 case 5 :
1846 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
1847 case 6 :
1848 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
1849 case 7 :
1850 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
1851 }
1852}
1853
1854
1855static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
1856 const void* src, size_t srcSize,
1857 const U32 mls)
1858{
1859 U32* const hashLong = ctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001860 U32 const hBitsL = ctx->appliedParams.cParams.hashLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001861 U32* const hashSmall = ctx->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001862 U32 const hBitsS = ctx->appliedParams.cParams.chainLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001863 seqStore_t* seqStorePtr = &(ctx->seqStore);
1864 const BYTE* const base = ctx->base;
1865 const BYTE* const dictBase = ctx->dictBase;
1866 const BYTE* const istart = (const BYTE*)src;
1867 const BYTE* ip = istart;
1868 const BYTE* anchor = istart;
1869 const U32 lowestIndex = ctx->lowLimit;
1870 const BYTE* const dictStart = dictBase + lowestIndex;
1871 const U32 dictLimit = ctx->dictLimit;
1872 const BYTE* const lowPrefixPtr = base + dictLimit;
1873 const BYTE* const dictEnd = dictBase + dictLimit;
1874 const BYTE* const iend = istart + srcSize;
1875 const BYTE* const ilimit = iend - 8;
1876 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1877
1878 /* Search Loop */
1879 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1880 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
1881 const U32 matchIndex = hashSmall[hSmall];
1882 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1883 const BYTE* match = matchBase + matchIndex;
1884
1885 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
1886 const U32 matchLongIndex = hashLong[hLong];
1887 const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
1888 const BYTE* matchLong = matchLongBase + matchLongIndex;
1889
1890 const U32 current = (U32)(ip-base);
1891 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1892 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1893 const BYTE* repMatch = repBase + repIndex;
1894 size_t mLength;
1895 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
1896
1897 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1898 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1899 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1900 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
1901 ip++;
1902 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1903 } else {
1904 if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
1905 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
1906 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
1907 U32 offset;
1908 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
1909 offset = current - matchLongIndex;
1910 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1911 offset_2 = offset_1;
1912 offset_1 = offset;
1913 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Colletc54692f2016-08-24 01:10:42 +02001914
Yann Collet73d74a02016-07-12 13:03:48 +02001915 } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
Yann Colletc54692f2016-08-24 01:10:42 +02001916 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1917 U32 const matchIndex3 = hashLong[h3];
1918 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
1919 const BYTE* match3 = match3Base + matchIndex3;
Yann Collet45dc3562016-07-12 09:47:31 +02001920 U32 offset;
Yann Colletc54692f2016-08-24 01:10:42 +02001921 hashLong[h3] = current + 1;
1922 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1923 const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
1924 const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
1925 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
1926 ip++;
1927 offset = current+1 - matchIndex3;
1928 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1929 } else {
1930 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1931 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1932 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
1933 offset = current - matchIndex;
1934 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1935 }
Yann Collet45dc3562016-07-12 09:47:31 +02001936 offset_2 = offset_1;
1937 offset_1 = offset;
1938 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Colletc54692f2016-08-24 01:10:42 +02001939
Yann Collet45dc3562016-07-12 09:47:31 +02001940 } else {
1941 ip += ((ip-anchor) >> g_searchStrength) + 1;
1942 continue;
1943 } }
1944
1945 /* found a match : store it */
1946 ip += mLength;
1947 anchor = ip;
1948
1949 if (ip <= ilimit) {
1950 /* Fill Table */
Nick Terrellf35ef5c2017-03-09 12:51:33 -08001951 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
1952 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
Yann Collet45dc3562016-07-12 09:47:31 +02001953 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1954 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
1955 /* check immediate repcode */
1956 while (ip <= ilimit) {
1957 U32 const current2 = (U32)(ip-base);
1958 U32 const repIndex2 = current2 - offset_2;
1959 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1960 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1961 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1962 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07001963 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
Yann Collet45dc3562016-07-12 09:47:31 +02001964 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1965 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1966 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
1967 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
1968 ip += repLength2;
1969 anchor = ip;
1970 continue;
1971 }
1972 break;
1973 } } }
1974
1975 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001976 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
Yann Collet45dc3562016-07-12 09:47:31 +02001977
1978 /* Last Literals */
1979 { size_t const lastLLSize = iend - anchor;
1980 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1981 seqStorePtr->lit += lastLLSize;
1982 }
1983}
1984
1985
1986static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
1987 const void* src, size_t srcSize)
1988{
Yann Collet1ad7c822017-05-22 17:06:04 -07001989 U32 const mls = ctx->appliedParams.cParams.searchLength;
Yann Collet45dc3562016-07-12 09:47:31 +02001990 switch(mls)
1991 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001992 default: /* includes case 3 */
Yann Collet45dc3562016-07-12 09:47:31 +02001993 case 4 :
1994 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
1995 case 5 :
1996 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
1997 case 6 :
1998 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
1999 case 7 :
2000 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
2001 }
2002}
2003
2004
2005/*-*************************************
Yann Collet96b9f0b2015-11-04 03:52:54 +01002006* Binary Tree search
Yann Colletf3eca252015-10-22 15:31:46 +01002007***************************************/
Yann Collet04b12d82016-02-11 06:23:24 +01002008/** ZSTD_insertBt1() : add one or multiple positions to tree.
2009* ip : assumed <= iend-8 .
Yann Collet06eade52015-11-23 14:23:47 +01002010* @return : nb of positions added */
Yann Collet1358f912016-01-01 07:29:39 +01002011static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
2012 U32 extDict)
Yann Collet96b9f0b2015-11-04 03:52:54 +01002013{
Yann Collet731ef162016-07-27 21:05:12 +02002014 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002015 U32 const hashLog = zc->appliedParams.cParams.hashLog;
Yann Collet731ef162016-07-27 21:05:12 +02002016 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
2017 U32* const bt = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002018 U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
Yann Collet731ef162016-07-27 21:05:12 +02002019 U32 const btMask = (1 << btLog) - 1;
2020 U32 matchIndex = hashTable[h];
Yann Collet96b9f0b2015-11-04 03:52:54 +01002021 size_t commonLengthSmaller=0, commonLengthLarger=0;
2022 const BYTE* const base = zc->base;
Yann Collet1358f912016-01-01 07:29:39 +01002023 const BYTE* const dictBase = zc->dictBase;
2024 const U32 dictLimit = zc->dictLimit;
2025 const BYTE* const dictEnd = dictBase + dictLimit;
2026 const BYTE* const prefixStart = base + dictLimit;
Yann Collet2b361cf2016-10-14 16:03:34 -07002027 const BYTE* match;
Yann Collet6c3e2e72015-12-11 10:44:07 +01002028 const U32 current = (U32)(ip-base);
Yann Collete9eba602015-11-08 15:08:03 +01002029 const U32 btLow = btMask >= current ? 0 : current - btMask;
Yann Collet96b9f0b2015-11-04 03:52:54 +01002030 U32* smallerPtr = bt + 2*(current&btMask);
Yann Colleta87278a2016-01-17 00:12:55 +01002031 U32* largerPtr = smallerPtr + 1;
Yann Collet59d70632015-11-04 12:05:27 +01002032 U32 dummy32; /* to be nullified at the end */
Yann Collet731ef162016-07-27 21:05:12 +02002033 U32 const windowLow = zc->lowLimit;
Yann Collet72e84cf2015-12-31 19:08:44 +01002034 U32 matchEndIdx = current+8;
Yann Colletb8a6f682016-02-15 17:06:29 +01002035 size_t bestLength = 8;
Yann Colletc0932082016-06-30 14:07:30 +02002036#ifdef ZSTD_C_PREDICT
Yann Collet7beaa052016-01-21 11:57:45 +01002037 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
2038 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
2039 predictedSmall += (predictedSmall>0);
2040 predictedLarge += (predictedLarge>0);
Yann Colletc0932082016-06-30 14:07:30 +02002041#endif /* ZSTD_C_PREDICT */
Yann Colletf48e35c2015-11-07 01:13:31 +01002042
Yann Collet6c3e2e72015-12-11 10:44:07 +01002043 hashTable[h] = current; /* Update Hash Table */
Yann Collet96b9f0b2015-11-04 03:52:54 +01002044
Yann Colletfb810d62016-01-28 00:18:06 +01002045 while (nbCompares-- && (matchIndex > windowLow)) {
Yann Collet25f46dc2016-11-29 16:59:27 -08002046 U32* const nextPtr = bt + 2*(matchIndex & btMask);
Yann Collet96b9f0b2015-11-04 03:52:54 +01002047 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
Yann Collet25f46dc2016-11-29 16:59:27 -08002048
Yann Colletc0932082016-06-30 14:07:30 +02002049#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
Yann Collet70e8c382016-02-10 13:37:52 +01002050 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
Yann Colletfb810d62016-01-28 00:18:06 +01002051 if (matchIndex == predictedSmall) {
2052 /* no need to check length, result known */
Yann Colleta87278a2016-01-17 00:12:55 +01002053 *smallerPtr = matchIndex;
2054 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2055 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
2056 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
Yann Collet7beaa052016-01-21 11:57:45 +01002057 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
Yann Colleta87278a2016-01-17 00:12:55 +01002058 continue;
2059 }
Yann Colletfb810d62016-01-28 00:18:06 +01002060 if (matchIndex == predictedLarge) {
Yann Colleta87278a2016-01-17 00:12:55 +01002061 *largerPtr = matchIndex;
2062 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2063 largerPtr = nextPtr;
2064 matchIndex = nextPtr[0];
Yann Collet7beaa052016-01-21 11:57:45 +01002065 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
Yann Colleta87278a2016-01-17 00:12:55 +01002066 continue;
2067 }
Yann Collet04b12d82016-02-11 06:23:24 +01002068#endif
Yann Colletfb810d62016-01-28 00:18:06 +01002069 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
Yann Collet1358f912016-01-01 07:29:39 +01002070 match = base + matchIndex;
2071 if (match[matchLength] == ip[matchLength])
2072 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
Yann Colletfb810d62016-01-28 00:18:06 +01002073 } else {
Yann Collet1358f912016-01-01 07:29:39 +01002074 match = dictBase + matchIndex;
2075 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
2076 if (matchIndex+matchLength >= dictLimit)
Nick Terrellf35ef5c2017-03-09 12:51:33 -08002077 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
Yann Collet1358f912016-01-01 07:29:39 +01002078 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01002079
Yann Colletb8a6f682016-02-15 17:06:29 +01002080 if (matchLength > bestLength) {
2081 bestLength = matchLength;
2082 if (matchLength > matchEndIdx - matchIndex)
2083 matchEndIdx = matchIndex + (U32)matchLength;
2084 }
Yann Colletee3f4512015-12-29 22:26:09 +01002085
Yann Collet59d70632015-11-04 12:05:27 +01002086 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
Yann Collet1358f912016-01-01 07:29:39 +01002087 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
Yann Collet96b9f0b2015-11-04 03:52:54 +01002088
Yann Colletfb810d62016-01-28 00:18:06 +01002089 if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
Yann Collet96b9f0b2015-11-04 03:52:54 +01002090 /* match is smaller than current */
2091 *smallerPtr = matchIndex; /* update smaller idx */
2092 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
Yann Colletf48e35c2015-11-07 01:13:31 +01002093 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
Yann Collet96b9f0b2015-11-04 03:52:54 +01002094 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
Yann Colletf48e35c2015-11-07 01:13:31 +01002095 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
Yann Colletfb810d62016-01-28 00:18:06 +01002096 } else {
Yann Collet96b9f0b2015-11-04 03:52:54 +01002097 /* match is larger than current */
2098 *largerPtr = matchIndex;
2099 commonLengthLarger = matchLength;
Yann Colletf48e35c2015-11-07 01:13:31 +01002100 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
Yann Collet96b9f0b2015-11-04 03:52:54 +01002101 largerPtr = nextPtr;
Yann Colletf48e35c2015-11-07 01:13:31 +01002102 matchIndex = nextPtr[0];
Yann Colletfb810d62016-01-28 00:18:06 +01002103 } }
Yann Collet96b9f0b2015-11-04 03:52:54 +01002104
Yann Collet59d70632015-11-04 12:05:27 +01002105 *smallerPtr = *largerPtr = 0;
Yann Colleta436a522016-06-20 23:34:04 +02002106 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
Yann Colletb8a6f682016-02-15 17:06:29 +01002107 if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
2108 return 1;
Yann Collet96b9f0b2015-11-04 03:52:54 +01002109}
2110
2111
Yann Collet82260dd2016-02-11 07:14:25 +01002112static size_t ZSTD_insertBtAndFindBestMatch (
Yann Collet03526e12015-11-23 15:29:15 +01002113 ZSTD_CCtx* zc,
2114 const BYTE* const ip, const BYTE* const iend,
2115 size_t* offsetPtr,
Yann Collet2cc12cb2016-01-01 07:47:58 +01002116 U32 nbCompares, const U32 mls,
2117 U32 extDict)
Yann Collet03526e12015-11-23 15:29:15 +01002118{
Yann Collet731ef162016-07-27 21:05:12 +02002119 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002120 U32 const hashLog = zc->appliedParams.cParams.hashLog;
Yann Collet731ef162016-07-27 21:05:12 +02002121 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
2122 U32* const bt = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002123 U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
Yann Collet731ef162016-07-27 21:05:12 +02002124 U32 const btMask = (1 << btLog) - 1;
Yann Collet03526e12015-11-23 15:29:15 +01002125 U32 matchIndex = hashTable[h];
2126 size_t commonLengthSmaller=0, commonLengthLarger=0;
2127 const BYTE* const base = zc->base;
2128 const BYTE* const dictBase = zc->dictBase;
2129 const U32 dictLimit = zc->dictLimit;
2130 const BYTE* const dictEnd = dictBase + dictLimit;
2131 const BYTE* const prefixStart = base + dictLimit;
2132 const U32 current = (U32)(ip-base);
2133 const U32 btLow = btMask >= current ? 0 : current - btMask;
2134 const U32 windowLow = zc->lowLimit;
2135 U32* smallerPtr = bt + 2*(current&btMask);
2136 U32* largerPtr = bt + 2*(current&btMask) + 1;
Yann Collet72e84cf2015-12-31 19:08:44 +01002137 U32 matchEndIdx = current+8;
Yann Collet03526e12015-11-23 15:29:15 +01002138 U32 dummy32; /* to be nullified at the end */
inikep64d7bcb2016-04-07 19:14:09 +02002139 size_t bestLength = 0;
Yann Collet03526e12015-11-23 15:29:15 +01002140
Yann Collet6c3e2e72015-12-11 10:44:07 +01002141 hashTable[h] = current; /* Update Hash Table */
Yann Collet03526e12015-11-23 15:29:15 +01002142
Yann Colletfb810d62016-01-28 00:18:06 +01002143 while (nbCompares-- && (matchIndex > windowLow)) {
Yann Collet25f46dc2016-11-29 16:59:27 -08002144 U32* const nextPtr = bt + 2*(matchIndex & btMask);
Yann Collet03526e12015-11-23 15:29:15 +01002145 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
2146 const BYTE* match;
2147
Yann Colletfb810d62016-01-28 00:18:06 +01002148 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
Yann Collet03526e12015-11-23 15:29:15 +01002149 match = base + matchIndex;
2150 if (match[matchLength] == ip[matchLength])
2151 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
Yann Colletfb810d62016-01-28 00:18:06 +01002152 } else {
Yann Collet03526e12015-11-23 15:29:15 +01002153 match = dictBase + matchIndex;
2154 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
Yann Collet225179d2015-11-23 16:52:22 +01002155 if (matchIndex+matchLength >= dictLimit)
Nick Terrellf35ef5c2017-03-09 12:51:33 -08002156 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
Yann Collet03526e12015-11-23 15:29:15 +01002157 }
2158
Yann Colletfb810d62016-01-28 00:18:06 +01002159 if (matchLength > bestLength) {
Yann Colletee3f4512015-12-29 22:26:09 +01002160 if (matchLength > matchEndIdx - matchIndex)
Yann Collet48da1642015-12-29 23:40:02 +01002161 matchEndIdx = matchIndex + (U32)matchLength;
Yann Collet49bb0042016-06-04 20:17:38 +02002162 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
inikep75716852016-04-06 12:34:42 +02002163 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
Yann Collet03526e12015-11-23 15:29:15 +01002164 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
2165 break; /* drop, to guarantee consistency (miss a little bit of compression) */
2166 }
2167
Yann Colletfb810d62016-01-28 00:18:06 +01002168 if (match[matchLength] < ip[matchLength]) {
Yann Collet03526e12015-11-23 15:29:15 +01002169 /* match is smaller than current */
2170 *smallerPtr = matchIndex; /* update smaller idx */
2171 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
2172 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2173 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
2174 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
Yann Colletfb810d62016-01-28 00:18:06 +01002175 } else {
Yann Collet03526e12015-11-23 15:29:15 +01002176 /* match is larger than current */
2177 *largerPtr = matchIndex;
2178 commonLengthLarger = matchLength;
2179 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2180 largerPtr = nextPtr;
2181 matchIndex = nextPtr[0];
Yann Collet768c6bc2016-02-10 14:01:49 +01002182 } }
Yann Collet03526e12015-11-23 15:29:15 +01002183
2184 *smallerPtr = *largerPtr = 0;
2185
Yann Collet72e84cf2015-12-31 19:08:44 +01002186 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
inikep64d7bcb2016-04-07 19:14:09 +02002187 return bestLength;
Yann Collet03526e12015-11-23 15:29:15 +01002188}
2189
Yann Collet2cc12cb2016-01-01 07:47:58 +01002190
Yann Colletb8a6f682016-02-15 17:06:29 +01002191static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
Yann Collet82260dd2016-02-11 07:14:25 +01002192{
2193 const BYTE* const base = zc->base;
2194 const U32 target = (U32)(ip - base);
2195 U32 idx = zc->nextToUpdate;
Yann Colletb8a6f682016-02-15 17:06:29 +01002196
2197 while(idx < target)
2198 idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
Yann Collet82260dd2016-02-11 07:14:25 +01002199}
2200
Yann Collet52447382016-03-20 16:00:00 +01002201/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
Yann Collet82260dd2016-02-11 07:14:25 +01002202static size_t ZSTD_BtFindBestMatch (
Yann Collet2cc12cb2016-01-01 07:47:58 +01002203 ZSTD_CCtx* zc,
2204 const BYTE* const ip, const BYTE* const iLimit,
2205 size_t* offsetPtr,
2206 const U32 maxNbAttempts, const U32 mls)
2207{
2208 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
Yann Colletb8a6f682016-02-15 17:06:29 +01002209 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
Yann Collet2cc12cb2016-01-01 07:47:58 +01002210 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
2211}
2212
2213
Yann Collet768c6bc2016-02-10 14:01:49 +01002214static size_t ZSTD_BtFindBestMatch_selectMLS (
Yann Collet2cc12cb2016-01-01 07:47:58 +01002215 ZSTD_CCtx* zc, /* Index table will be updated */
2216 const BYTE* ip, const BYTE* const iLimit,
2217 size_t* offsetPtr,
2218 const U32 maxNbAttempts, const U32 matchLengthSearch)
2219{
2220 switch(matchLengthSearch)
2221 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002222 default : /* includes case 3 */
Yann Collet2cc12cb2016-01-01 07:47:58 +01002223 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
2224 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
Yann Collet933ce4a2017-03-29 14:32:15 -07002225 case 7 :
Yann Collet2cc12cb2016-01-01 07:47:58 +01002226 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
2227 }
2228}
2229
2230
Yann Colletb8a6f682016-02-15 17:06:29 +01002231static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
2232{
2233 const BYTE* const base = zc->base;
2234 const U32 target = (U32)(ip - base);
2235 U32 idx = zc->nextToUpdate;
2236
2237 while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
2238}
2239
inikep64d7bcb2016-04-07 19:14:09 +02002240
Yann Collet03526e12015-11-23 15:29:15 +01002241/** Tree updater, providing best match */
Yann Collet82260dd2016-02-11 07:14:25 +01002242static size_t ZSTD_BtFindBestMatch_extDict (
Yann Collet03526e12015-11-23 15:29:15 +01002243 ZSTD_CCtx* zc,
2244 const BYTE* const ip, const BYTE* const iLimit,
2245 size_t* offsetPtr,
2246 const U32 maxNbAttempts, const U32 mls)
2247{
Yann Colletee3f4512015-12-29 22:26:09 +01002248 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
Yann Colletb8a6f682016-02-15 17:06:29 +01002249 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
Yann Collet2cc12cb2016-01-01 07:47:58 +01002250 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
Yann Collet03526e12015-11-23 15:29:15 +01002251}
2252
2253
Yann Collet82260dd2016-02-11 07:14:25 +01002254static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
Yann Collet03526e12015-11-23 15:29:15 +01002255 ZSTD_CCtx* zc, /* Index table will be updated */
2256 const BYTE* ip, const BYTE* const iLimit,
2257 size_t* offsetPtr,
2258 const U32 maxNbAttempts, const U32 matchLengthSearch)
2259{
2260 switch(matchLengthSearch)
2261 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002262 default : /* includes case 3 */
Yann Collet03526e12015-11-23 15:29:15 +01002263 case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
2264 case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
Yann Collet933ce4a2017-03-29 14:32:15 -07002265 case 7 :
Yann Collet03526e12015-11-23 15:29:15 +01002266 case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
2267 }
2268}
2269
2270
Yann Collet5106a762015-11-05 15:00:24 +01002271
Yann Collet731ef162016-07-27 21:05:12 +02002272/* *********************************
inikep64d7bcb2016-04-07 19:14:09 +02002273* Hash Chain
Yann Collet731ef162016-07-27 21:05:12 +02002274***********************************/
inikep64d7bcb2016-04-07 19:14:09 +02002275#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
2276
2277/* Update chains up to ip (excluded)
Anders Oleson517577b2017-02-20 12:08:59 -08002278 Assumption : always within prefix (i.e. not within extDict) */
inikep64d7bcb2016-04-07 19:14:09 +02002279FORCE_INLINE
2280U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
2281{
2282 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002283 const U32 hashLog = zc->appliedParams.cParams.hashLog;
inikep64d7bcb2016-04-07 19:14:09 +02002284 U32* const chainTable = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002285 const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
inikep64d7bcb2016-04-07 19:14:09 +02002286 const BYTE* const base = zc->base;
2287 const U32 target = (U32)(ip - base);
2288 U32 idx = zc->nextToUpdate;
2289
Yann Collet22d76322016-06-21 08:01:51 +02002290 while(idx < target) { /* catch up */
inikep64d7bcb2016-04-07 19:14:09 +02002291 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
2292 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
2293 hashTable[h] = idx;
2294 idx++;
2295 }
2296
2297 zc->nextToUpdate = target;
2298 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
2299}
2300
2301
Nick Terrell55fc1f92017-05-24 13:50:10 -07002302/* inlining is important to hardwire a hot branch (template emulation) */
2303FORCE_INLINE
inikep64d7bcb2016-04-07 19:14:09 +02002304size_t ZSTD_HcFindBestMatch_generic (
2305 ZSTD_CCtx* zc, /* Index table will be updated */
2306 const BYTE* const ip, const BYTE* const iLimit,
2307 size_t* offsetPtr,
2308 const U32 maxNbAttempts, const U32 mls, const U32 extDict)
2309{
2310 U32* const chainTable = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002311 const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
inikep64d7bcb2016-04-07 19:14:09 +02002312 const U32 chainMask = chainSize-1;
2313 const BYTE* const base = zc->base;
2314 const BYTE* const dictBase = zc->dictBase;
2315 const U32 dictLimit = zc->dictLimit;
2316 const BYTE* const prefixStart = base + dictLimit;
2317 const BYTE* const dictEnd = dictBase + dictLimit;
2318 const U32 lowLimit = zc->lowLimit;
2319 const U32 current = (U32)(ip-base);
2320 const U32 minChain = current > chainSize ? current - chainSize : 0;
2321 int nbAttempts=maxNbAttempts;
Yann Collete42afbc2017-04-26 11:39:35 -07002322 size_t ml=4-1;
inikep64d7bcb2016-04-07 19:14:09 +02002323
2324 /* HC4 match finder */
2325 U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
2326
Yann Collet22d76322016-06-21 08:01:51 +02002327 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
inikep64d7bcb2016-04-07 19:14:09 +02002328 const BYTE* match;
2329 size_t currentMl=0;
2330 if ((!extDict) || matchIndex >= dictLimit) {
2331 match = base + matchIndex;
2332 if (match[ml] == ip[ml]) /* potentially better */
2333 currentMl = ZSTD_count(ip, match, iLimit);
2334 } else {
2335 match = dictBase + matchIndex;
2336 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
Yann Collete42afbc2017-04-26 11:39:35 -07002337 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002338 }
2339
2340 /* save best solution */
Yann Colletc17e0202017-04-20 12:50:02 -07002341 if (currentMl > ml) {
2342 ml = currentMl;
2343 *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
2344 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
2345 }
inikep64d7bcb2016-04-07 19:14:09 +02002346
2347 if (matchIndex <= minChain) break;
2348 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
2349 }
2350
2351 return ml;
2352}
2353
2354
2355FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
2356 ZSTD_CCtx* zc,
2357 const BYTE* ip, const BYTE* const iLimit,
2358 size_t* offsetPtr,
2359 const U32 maxNbAttempts, const U32 matchLengthSearch)
2360{
2361 switch(matchLengthSearch)
2362 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002363 default : /* includes case 3 */
inikep64d7bcb2016-04-07 19:14:09 +02002364 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
2365 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
Yann Collet933ce4a2017-03-29 14:32:15 -07002366 case 7 :
inikep64d7bcb2016-04-07 19:14:09 +02002367 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
2368 }
2369}
2370
2371
2372FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
2373 ZSTD_CCtx* zc,
2374 const BYTE* ip, const BYTE* const iLimit,
2375 size_t* offsetPtr,
2376 const U32 maxNbAttempts, const U32 matchLengthSearch)
2377{
2378 switch(matchLengthSearch)
2379 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002380 default : /* includes case 3 */
inikep64d7bcb2016-04-07 19:14:09 +02002381 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
2382 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
Yann Collet933ce4a2017-03-29 14:32:15 -07002383 case 7 :
inikep64d7bcb2016-04-07 19:14:09 +02002384 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
2385 }
2386}
2387
inikep64d7bcb2016-04-07 19:14:09 +02002388
Yann Collet287b7d92015-11-22 13:24:05 +01002389/* *******************************
inikep64d7bcb2016-04-07 19:14:09 +02002390* Common parser - lazy strategy
inikepfaa8d8a2016-04-05 19:01:10 +02002391*********************************/
Yann Collet96b9f0b2015-11-04 03:52:54 +01002392FORCE_INLINE
inikep64d7bcb2016-04-07 19:14:09 +02002393void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
2394 const void* src, size_t srcSize,
2395 const U32 searchMethod, const U32 depth)
Yann Collet96b9f0b2015-11-04 03:52:54 +01002396{
inikepfaa8d8a2016-04-05 19:01:10 +02002397 seqStore_t* seqStorePtr = &(ctx->seqStore);
2398 const BYTE* const istart = (const BYTE*)src;
2399 const BYTE* ip = istart;
2400 const BYTE* anchor = istart;
2401 const BYTE* const iend = istart + srcSize;
2402 const BYTE* const ilimit = iend - 8;
2403 const BYTE* const base = ctx->base + ctx->dictLimit;
Yann Collet96b9f0b2015-11-04 03:52:54 +01002404
Yann Collet1ad7c822017-05-22 17:06:04 -07002405 U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
2406 U32 const mls = ctx->appliedParams.cParams.searchLength;
Yann Collet96b9f0b2015-11-04 03:52:54 +01002407
inikep64d7bcb2016-04-07 19:14:09 +02002408 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2409 size_t* offsetPtr,
2410 U32 maxNbAttempts, U32 matchLengthSearch);
Yann Collet43dfe012016-06-13 21:43:06 +02002411 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
Yann Collet9634f672016-07-03 01:23:58 +02002412 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
inikep64d7bcb2016-04-07 19:14:09 +02002413
inikepfaa8d8a2016-04-05 19:01:10 +02002414 /* init */
Yann Collet4266c0a2016-06-14 01:49:25 +02002415 ip += (ip==base);
inikep64d7bcb2016-04-07 19:14:09 +02002416 ctx->nextToUpdate3 = ctx->nextToUpdate;
Yann Collet9634f672016-07-03 01:23:58 +02002417 { U32 const maxRep = (U32)(ip-base);
2418 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
2419 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
2420 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01002421
inikepfaa8d8a2016-04-05 19:01:10 +02002422 /* Match Loop */
2423 while (ip < ilimit) {
2424 size_t matchLength=0;
2425 size_t offset=0;
2426 const BYTE* start=ip+1;
Yann Collet5106a762015-11-05 15:00:24 +01002427
inikepfaa8d8a2016-04-05 19:01:10 +02002428 /* check repCode */
Yann Collet9634f672016-07-03 01:23:58 +02002429 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
inikepfaa8d8a2016-04-05 19:01:10 +02002430 /* repcode : we take it */
Yann Collete42afbc2017-04-26 11:39:35 -07002431 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002432 if (depth==0) goto _storeSequence;
Yann Collet5106a762015-11-05 15:00:24 +01002433 }
Yann Collet5be2dd22015-11-11 13:43:58 +01002434
inikepfaa8d8a2016-04-05 19:01:10 +02002435 /* first search (depth 0) */
2436 { size_t offsetFound = 99999999;
inikep64d7bcb2016-04-07 19:14:09 +02002437 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
inikepfaa8d8a2016-04-05 19:01:10 +02002438 if (ml2 > matchLength)
inikep75716852016-04-06 12:34:42 +02002439 matchLength = ml2, start = ip, offset=offsetFound;
inikepfaa8d8a2016-04-05 19:01:10 +02002440 }
Yann Collet5106a762015-11-05 15:00:24 +01002441
Yann Collete42afbc2017-04-26 11:39:35 -07002442 if (matchLength < 4) {
inikepfaa8d8a2016-04-05 19:01:10 +02002443 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
2444 continue;
2445 }
2446
inikep64d7bcb2016-04-07 19:14:09 +02002447 /* let's try to find a better solution */
2448 if (depth>=1)
2449 while (ip<ilimit) {
2450 ip ++;
Yann Collet9634f672016-07-03 01:23:58 +02002451 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
Yann Collete42afbc2017-04-26 11:39:35 -07002452 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002453 int const gain2 = (int)(mlRep * 3);
Yann Collet49bb0042016-06-04 20:17:38 +02002454 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002455 if ((mlRep >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002456 matchLength = mlRep, offset = 0, start = ip;
2457 }
2458 { size_t offset2=99999999;
2459 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002460 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2461 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
Yann Collete42afbc2017-04-26 11:39:35 -07002462 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002463 matchLength = ml2, offset = offset2, start = ip;
2464 continue; /* search a better one */
2465 } }
inikepfaa8d8a2016-04-05 19:01:10 +02002466
inikep64d7bcb2016-04-07 19:14:09 +02002467 /* let's find an even better one */
2468 if ((depth==2) && (ip<ilimit)) {
2469 ip ++;
Yann Collet9634f672016-07-03 01:23:58 +02002470 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
Yann Collete42afbc2017-04-26 11:39:35 -07002471 size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002472 int const gain2 = (int)(ml2 * 4);
Yann Collet49bb0042016-06-04 20:17:38 +02002473 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002474 if ((ml2 >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002475 matchLength = ml2, offset = 0, start = ip;
2476 }
2477 { size_t offset2=99999999;
2478 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002479 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2480 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
Yann Collete42afbc2017-04-26 11:39:35 -07002481 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002482 matchLength = ml2, offset = offset2, start = ip;
2483 continue;
2484 } } }
2485 break; /* nothing found : store previous solution */
2486 }
2487
Nick Terrell55f9cd42017-06-19 15:12:28 -07002488 /* NOTE:
2489 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
2490 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
2491 * overflows the pointer, which is undefined behavior.
2492 */
inikep64d7bcb2016-04-07 19:14:09 +02002493 /* catch up */
2494 if (offset) {
Yann Colletc17e0202017-04-20 12:50:02 -07002495 while ( (start > anchor)
2496 && (start > base+offset-ZSTD_REP_MOVE)
Nick Terrell55f9cd42017-06-19 15:12:28 -07002497 && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */
inikep64d7bcb2016-04-07 19:14:09 +02002498 { start--; matchLength++; }
Yann Collet9634f672016-07-03 01:23:58 +02002499 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
inikep64d7bcb2016-04-07 19:14:09 +02002500 }
inikepfaa8d8a2016-04-05 19:01:10 +02002501 /* store sequence */
inikep64d7bcb2016-04-07 19:14:09 +02002502_storeSequence:
inikepfaa8d8a2016-04-05 19:01:10 +02002503 { size_t const litLength = start - anchor;
Yann Colletd57dffb2016-07-03 01:48:26 +02002504 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
inikepfaa8d8a2016-04-05 19:01:10 +02002505 anchor = ip = start + matchLength;
2506 }
Yann Collet48537162016-04-07 15:24:29 +02002507
inikepfaa8d8a2016-04-05 19:01:10 +02002508 /* check immediate repcode */
2509 while ( (ip <= ilimit)
Yann Collet9634f672016-07-03 01:23:58 +02002510 && ((offset_2>0)
2511 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
inikepfaa8d8a2016-04-05 19:01:10 +02002512 /* store sequence */
Yann Collete42afbc2017-04-26 11:39:35 -07002513 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
Yann Collet9634f672016-07-03 01:23:58 +02002514 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
inikep7bc19b62016-04-06 09:46:01 +02002515 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2516 ip += matchLength;
inikepfaa8d8a2016-04-05 19:01:10 +02002517 anchor = ip;
2518 continue; /* faster when present ... (?) */
inikep64d7bcb2016-04-07 19:14:09 +02002519 } }
inikepfaa8d8a2016-04-05 19:01:10 +02002520
Yann Collet4266c0a2016-06-14 01:49:25 +02002521 /* Save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08002522 ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
2523 ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
Yann Collet4266c0a2016-06-14 01:49:25 +02002524
inikepfaa8d8a2016-04-05 19:01:10 +02002525 /* Last Literals */
2526 { size_t const lastLLSize = iend - anchor;
2527 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2528 seqStorePtr->lit += lastLLSize;
Yann Collet5106a762015-11-05 15:00:24 +01002529 }
Yann Collet5106a762015-11-05 15:00:24 +01002530}
2531
Yann Collet5be2dd22015-11-11 13:43:58 +01002532
inikep64d7bcb2016-04-07 19:14:09 +02002533static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2534{
2535 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
2536}
2537
2538static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2539{
2540 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
2541}
2542
2543static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2544{
2545 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
2546}
2547
2548static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2549{
2550 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
2551}
2552
2553
inikepfaa8d8a2016-04-05 19:01:10 +02002554FORCE_INLINE
inikep64d7bcb2016-04-07 19:14:09 +02002555void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
2556 const void* src, size_t srcSize,
2557 const U32 searchMethod, const U32 depth)
Yann Collet5be2dd22015-11-11 13:43:58 +01002558{
inikepfaa8d8a2016-04-05 19:01:10 +02002559 seqStore_t* seqStorePtr = &(ctx->seqStore);
2560 const BYTE* const istart = (const BYTE*)src;
2561 const BYTE* ip = istart;
2562 const BYTE* anchor = istart;
2563 const BYTE* const iend = istart + srcSize;
2564 const BYTE* const ilimit = iend - 8;
2565 const BYTE* const base = ctx->base;
2566 const U32 dictLimit = ctx->dictLimit;
Yann Collet43dfe012016-06-13 21:43:06 +02002567 const U32 lowestIndex = ctx->lowLimit;
inikepfaa8d8a2016-04-05 19:01:10 +02002568 const BYTE* const prefixStart = base + dictLimit;
2569 const BYTE* const dictBase = ctx->dictBase;
2570 const BYTE* const dictEnd = dictBase + dictLimit;
2571 const BYTE* const dictStart = dictBase + ctx->lowLimit;
2572
Yann Collet1ad7c822017-05-22 17:06:04 -07002573 const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
2574 const U32 mls = ctx->appliedParams.cParams.searchLength;
inikepfaa8d8a2016-04-05 19:01:10 +02002575
inikep64d7bcb2016-04-07 19:14:09 +02002576 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2577 size_t* offsetPtr,
2578 U32 maxNbAttempts, U32 matchLengthSearch);
2579 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
2580
Yann Collet302ff032016-07-03 01:28:16 +02002581 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
inikepfaa8d8a2016-04-05 19:01:10 +02002582
Yann Collet302ff032016-07-03 01:28:16 +02002583 /* init */
inikep64d7bcb2016-04-07 19:14:09 +02002584 ctx->nextToUpdate3 = ctx->nextToUpdate;
Yann Collet4266c0a2016-06-14 01:49:25 +02002585 ip += (ip == prefixStart);
inikepfaa8d8a2016-04-05 19:01:10 +02002586
2587 /* Match Loop */
2588 while (ip < ilimit) {
2589 size_t matchLength=0;
2590 size_t offset=0;
2591 const BYTE* start=ip+1;
inikep64d7bcb2016-04-07 19:14:09 +02002592 U32 current = (U32)(ip-base);
inikepfaa8d8a2016-04-05 19:01:10 +02002593
2594 /* check repCode */
Yann Collet302ff032016-07-03 01:28:16 +02002595 { const U32 repIndex = (U32)(current+1 - offset_1);
inikepfaa8d8a2016-04-05 19:01:10 +02002596 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2597 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002598 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikep64d7bcb2016-04-07 19:14:09 +02002599 if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
inikepfaa8d8a2016-04-05 19:01:10 +02002600 /* repcode detected we should take it */
2601 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002602 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002603 if (depth==0) goto _storeSequence;
inikepfaa8d8a2016-04-05 19:01:10 +02002604 } }
2605
2606 /* first search (depth 0) */
2607 { size_t offsetFound = 99999999;
inikep64d7bcb2016-04-07 19:14:09 +02002608 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
inikepfaa8d8a2016-04-05 19:01:10 +02002609 if (ml2 > matchLength)
inikep75716852016-04-06 12:34:42 +02002610 matchLength = ml2, start = ip, offset=offsetFound;
inikepfaa8d8a2016-04-05 19:01:10 +02002611 }
2612
Yann Collete42afbc2017-04-26 11:39:35 -07002613 if (matchLength < 4) {
inikepfaa8d8a2016-04-05 19:01:10 +02002614 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
2615 continue;
2616 }
2617
inikep64d7bcb2016-04-07 19:14:09 +02002618 /* let's try to find a better solution */
2619 if (depth>=1)
2620 while (ip<ilimit) {
2621 ip ++;
2622 current++;
2623 /* check repCode */
2624 if (offset) {
Yann Collet302ff032016-07-03 01:28:16 +02002625 const U32 repIndex = (U32)(current - offset_1);
inikep64d7bcb2016-04-07 19:14:09 +02002626 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2627 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002628 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikep64d7bcb2016-04-07 19:14:09 +02002629 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2630 /* repcode detected */
2631 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002632 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002633 int const gain2 = (int)(repLength * 3);
Yann Collet49bb0042016-06-04 20:17:38 +02002634 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002635 if ((repLength >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002636 matchLength = repLength, offset = 0, start = ip;
2637 } }
2638
2639 /* search match, depth 1 */
2640 { size_t offset2=99999999;
2641 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002642 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2643 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
Yann Collete42afbc2017-04-26 11:39:35 -07002644 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002645 matchLength = ml2, offset = offset2, start = ip;
2646 continue; /* search a better one */
2647 } }
2648
2649 /* let's find an even better one */
2650 if ((depth==2) && (ip<ilimit)) {
2651 ip ++;
2652 current++;
2653 /* check repCode */
2654 if (offset) {
Yann Collet302ff032016-07-03 01:28:16 +02002655 const U32 repIndex = (U32)(current - offset_1);
inikep64d7bcb2016-04-07 19:14:09 +02002656 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2657 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002658 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikep64d7bcb2016-04-07 19:14:09 +02002659 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2660 /* repcode detected */
2661 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002662 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
Yann Colletc17e0202017-04-20 12:50:02 -07002663 int const gain2 = (int)(repLength * 4);
2664 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002665 if ((repLength >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002666 matchLength = repLength, offset = 0, start = ip;
2667 } }
2668
2669 /* search match, depth 2 */
2670 { size_t offset2=99999999;
2671 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002672 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2673 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
Yann Collete42afbc2017-04-26 11:39:35 -07002674 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002675 matchLength = ml2, offset = offset2, start = ip;
2676 continue;
2677 } } }
2678 break; /* nothing found : store previous solution */
2679 }
2680
inikepfaa8d8a2016-04-05 19:01:10 +02002681 /* catch up */
inikep64d7bcb2016-04-07 19:14:09 +02002682 if (offset) {
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002683 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
inikepfaa8d8a2016-04-05 19:01:10 +02002684 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2685 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
inikep64d7bcb2016-04-07 19:14:09 +02002686 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
Yann Collet302ff032016-07-03 01:28:16 +02002687 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
Yann Collet48537162016-04-07 15:24:29 +02002688 }
inikepfaa8d8a2016-04-05 19:01:10 +02002689
inikepfaa8d8a2016-04-05 19:01:10 +02002690 /* store sequence */
inikep64d7bcb2016-04-07 19:14:09 +02002691_storeSequence:
inikepfaa8d8a2016-04-05 19:01:10 +02002692 { size_t const litLength = start - anchor;
Yann Colletd57dffb2016-07-03 01:48:26 +02002693 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
inikepfaa8d8a2016-04-05 19:01:10 +02002694 anchor = ip = start + matchLength;
2695 }
2696
2697 /* check immediate repcode */
2698 while (ip <= ilimit) {
Yann Collet302ff032016-07-03 01:28:16 +02002699 const U32 repIndex = (U32)((ip-base) - offset_2);
inikepfaa8d8a2016-04-05 19:01:10 +02002700 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2701 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002702 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikepfaa8d8a2016-04-05 19:01:10 +02002703 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2704 /* repcode detected we should take it */
2705 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002706 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
Yann Collet302ff032016-07-03 01:28:16 +02002707 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
inikepfaa8d8a2016-04-05 19:01:10 +02002708 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2709 ip += matchLength;
2710 anchor = ip;
2711 continue; /* faster when present ... (?) */
2712 }
2713 break;
2714 } }
2715
Yann Collet4266c0a2016-06-14 01:49:25 +02002716 /* Save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08002717 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
Yann Collet4266c0a2016-06-14 01:49:25 +02002718
inikepfaa8d8a2016-04-05 19:01:10 +02002719 /* Last Literals */
2720 { size_t const lastLLSize = iend - anchor;
2721 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2722 seqStorePtr->lit += lastLLSize;
Yann Collet5106a762015-11-05 15:00:24 +01002723 }
2724}
2725
2726
Yann Collet59d1f792016-01-23 19:28:41 +01002727void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Collet9a24e592015-11-22 02:53:43 +01002728{
inikep64d7bcb2016-04-07 19:14:09 +02002729 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
Yann Collet9a24e592015-11-22 02:53:43 +01002730}
2731
Yann Collet59d1f792016-01-23 19:28:41 +01002732static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Colletb7fc88e2015-11-22 03:12:28 +01002733{
Yann Colleta1249dc2016-01-25 04:22:03 +01002734 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
Yann Colletb7fc88e2015-11-22 03:12:28 +01002735}
Yann Collet9a24e592015-11-22 02:53:43 +01002736
Yann Collet59d1f792016-01-23 19:28:41 +01002737static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Colleta85c77b2015-11-22 12:22:04 +01002738{
Yann Colleta1249dc2016-01-25 04:22:03 +01002739 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
Yann Colleta85c77b2015-11-22 12:22:04 +01002740}
2741
Yann Collet59d1f792016-01-23 19:28:41 +01002742static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Collet5054ee02015-11-23 13:34:21 +01002743{
Yann Colleta1249dc2016-01-25 04:22:03 +01002744 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
Yann Collet5054ee02015-11-23 13:34:21 +01002745}
2746
inikepef519412016-04-21 11:08:43 +02002747
inikepef519412016-04-21 11:08:43 +02002748/* The optimal parser */
2749#include "zstd_opt.h"
2750
2751static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2752{
Yann Colletd4f4e582016-06-27 01:31:35 +02002753#ifdef ZSTD_OPT_H_91842398743
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002754 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
2755#else
2756 (void)ctx; (void)src; (void)srcSize;
2757 return;
2758#endif
2759}
2760
Nick Terrelleeb31ee2017-03-09 11:44:25 -08002761static void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002762{
2763#ifdef ZSTD_OPT_H_91842398743
2764 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
Yann Colletd4f4e582016-06-27 01:31:35 +02002765#else
2766 (void)ctx; (void)src; (void)srcSize;
2767 return;
2768#endif
inikepef519412016-04-21 11:08:43 +02002769}
2770
inikepd3b8d7a2016-02-22 10:06:17 +01002771static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
inikepe2bfe242016-01-31 11:25:48 +01002772{
Yann Colletd4f4e582016-06-27 01:31:35 +02002773#ifdef ZSTD_OPT_H_91842398743
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002774 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
2775#else
2776 (void)ctx; (void)src; (void)srcSize;
2777 return;
2778#endif
2779}
2780
Nick Terrelleeb31ee2017-03-09 11:44:25 -08002781static void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002782{
2783#ifdef ZSTD_OPT_H_91842398743
2784 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
Yann Colletd4f4e582016-06-27 01:31:35 +02002785#else
2786 (void)ctx; (void)src; (void)srcSize;
2787 return;
2788#endif
inikepe2bfe242016-01-31 11:25:48 +01002789}
2790
Yann Collet7a231792015-11-21 15:27:35 +01002791
Yann Colletb44ab822017-06-20 14:11:49 -07002792/* ZSTD_selectBlockCompressor() :
2793 * assumption : strat is a valid strategy */
Yann Collet59d1f792016-01-23 19:28:41 +01002794typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
Yann Colletb923f652016-01-26 03:14:20 +01002795static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
Yann Collet59d70632015-11-04 12:05:27 +01002796{
Yann Colleta5ffe3d2017-05-12 16:29:19 -07002797 static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
Yann Colletb44ab822017-06-20 14:11:49 -07002798 { ZSTD_compressBlock_fast /* default for 0 */,
Yann Colleta5ffe3d2017-05-12 16:29:19 -07002799 ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
Yann Colletc17e0202017-04-20 12:50:02 -07002800 ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
Nick Terrell5f2c7212017-05-10 16:49:58 -07002801 ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra },
Yann Colletb44ab822017-06-20 14:11:49 -07002802 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
Yann Colleta5ffe3d2017-05-12 16:29:19 -07002803 ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
Yann Colletc17e0202017-04-20 12:50:02 -07002804 ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
Nick Terrell5f2c7212017-05-10 16:49:58 -07002805 ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
Yann Collet7fe531e2015-11-29 02:38:09 +01002806 };
Yann Colleta5ffe3d2017-05-12 16:29:19 -07002807 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
Yann Colletb44ab822017-06-20 14:11:49 -07002808 assert((U32)strat >= (U32)ZSTD_fast);
2809 assert((U32)strat <= (U32)ZSTD_btultra);
Yann Collet7fe531e2015-11-29 02:38:09 +01002810
Yann Colletb44ab822017-06-20 14:11:49 -07002811 return blockCompressor[extDict!=0][(U32)strat];
Yann Collet59d70632015-11-04 12:05:27 +01002812}
2813
2814
Yann Colletd1b26842016-03-15 01:24:33 +01002815static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Colletbe2010e2015-10-31 12:57:14 +01002816{
Yann Collet1ad7c822017-05-22 17:06:04 -07002817 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->lowLimit < zc->dictLimit);
inikep98e08cb2016-08-10 15:00:30 +02002818 const BYTE* const base = zc->base;
2819 const BYTE* const istart = (const BYTE*)src;
2820 const U32 current = (U32)(istart-base);
Yann Collet2ce49232016-02-02 14:36:49 +01002821 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
Yann Collet19cab462016-06-17 12:54:52 +02002822 ZSTD_resetSeqStore(&(zc->seqStore));
inikep98e08cb2016-08-10 15:00:30 +02002823 if (current > zc->nextToUpdate + 384)
Yann Colletc17e0202017-04-20 12:50:02 -07002824 zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */
Yann Collet59d1f792016-01-23 19:28:41 +01002825 blockCompressor(zc, src, srcSize);
Yann Colletd1b26842016-03-15 01:24:33 +01002826 return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
Yann Colletbe2010e2015-10-31 12:57:14 +01002827}
2828
2829
Yann Colletdb8e21d2017-05-12 13:46:49 -07002830/*! ZSTD_compress_frameChunk() :
Yann Colletc991cc12016-07-28 00:55:43 +02002831* Compress a chunk of data into one or multiple blocks.
2832* All blocks will be terminated, all input will be consumed.
2833* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2834* Frame is supposed already started (header already produced)
2835* @return : compressed size, or an error code
2836*/
Yann Colletdb8e21d2017-05-12 13:46:49 -07002837static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002838 void* dst, size_t dstCapacity,
Yann Colletc991cc12016-07-28 00:55:43 +02002839 const void* src, size_t srcSize,
2840 U32 lastFrameChunk)
Yann Colletf3eca252015-10-22 15:31:46 +01002841{
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002842 size_t blockSize = cctx->blockSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002843 size_t remaining = srcSize;
2844 const BYTE* ip = (const BYTE*)src;
2845 BYTE* const ostart = (BYTE*)dst;
2846 BYTE* op = ostart;
Yann Collet1ad7c822017-05-22 17:06:04 -07002847 U32 const maxDist = 1 << cctx->appliedParams.cParams.windowLog;
Yann Collet9b11b462015-11-01 12:40:22 +01002848
Yann Collet1ad7c822017-05-22 17:06:04 -07002849 if (cctx->appliedParams.fParams.checksumFlag && srcSize)
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002850 XXH64_update(&cctx->xxhState, src, srcSize);
2851
Yann Collet2ce49232016-02-02 14:36:49 +01002852 while (remaining) {
Yann Colletc991cc12016-07-28 00:55:43 +02002853 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
Yann Collet3e358272015-11-04 18:19:39 +01002854 size_t cSize;
2855
Yann Colletc17e0202017-04-20 12:50:02 -07002856 if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
2857 return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
Yann Collet3e358272015-11-04 18:19:39 +01002858 if (remaining < blockSize) blockSize = remaining;
Yann Collet89db5e02015-11-13 11:27:46 +01002859
Yann Collet346efcc2016-08-02 14:26:00 +02002860 /* preemptive overflow correction */
Sean Purcell881abe42017-03-07 16:52:23 -08002861 if (cctx->lowLimit > (3U<<29)) {
Yann Collet1ad7c822017-05-22 17:06:04 -07002862 U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->appliedParams.cParams.hashLog, cctx->appliedParams.cParams.strategy)) - 1;
Yann Colletc261f712016-12-12 00:25:07 +01002863 U32 const current = (U32)(ip - cctx->base);
Yann Collet1ad7c822017-05-22 17:06:04 -07002864 U32 const newCurrent = (current & cycleMask) + (1 << cctx->appliedParams.cParams.windowLog);
Yann Colletc261f712016-12-12 00:25:07 +01002865 U32 const correction = current - newCurrent;
2866 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
Yann Collet346efcc2016-08-02 14:26:00 +02002867 ZSTD_reduceIndex(cctx, correction);
2868 cctx->base += correction;
2869 cctx->dictBase += correction;
Yann Colletc261f712016-12-12 00:25:07 +01002870 cctx->lowLimit -= correction;
Yann Collet346efcc2016-08-02 14:26:00 +02002871 cctx->dictLimit -= correction;
2872 if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
2873 else cctx->nextToUpdate -= correction;
2874 }
2875
Yann Collet06e76972017-01-25 16:39:03 -08002876 if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
Yann Collet70e45772016-03-19 18:08:32 +01002877 /* enforce maxDist */
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002878 U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
2879 if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
2880 if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
Yann Colletc3652152015-11-24 14:06:07 +01002881 }
Yann Collet89db5e02015-11-13 11:27:46 +01002882
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002883 cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
inikepfb5df612016-05-24 15:36:37 +02002884 if (ZSTD_isError(cSize)) return cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002885
Yann Collet2ce49232016-02-02 14:36:49 +01002886 if (cSize == 0) { /* block is not compressible */
Yann Colletc991cc12016-07-28 00:55:43 +02002887 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2888 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2889 MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
2890 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2891 cSize = ZSTD_blockHeaderSize+blockSize;
Yann Collet2ce49232016-02-02 14:36:49 +01002892 } else {
Yann Colletc991cc12016-07-28 00:55:43 +02002893 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
Yann Collet6fa05a22016-07-20 14:58:49 +02002894 MEM_writeLE24(op, cBlockHeader24);
Yann Colletc991cc12016-07-28 00:55:43 +02002895 cSize += ZSTD_blockHeaderSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002896 }
2897
2898 remaining -= blockSize;
Yann Colletd1b26842016-03-15 01:24:33 +01002899 dstCapacity -= cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002900 ip += blockSize;
2901 op += cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002902 }
2903
Yann Collet62470b42016-07-28 15:29:08 +02002904 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
Yann Colletf3eca252015-10-22 15:31:46 +01002905 return op-ostart;
2906}
2907
2908
Yann Collet6236eba2016-04-12 15:52:33 +02002909static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
Yann Colletc46fb922016-05-29 05:01:04 +02002910 ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
Yann Collet6236eba2016-04-12 15:52:33 +02002911{ BYTE* const op = (BYTE*)dst;
Yann Collet31533ba2017-04-27 00:29:04 -07002912 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2913 U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
Yann Collet731ef162016-07-27 21:05:12 +02002914 U32 const checksumFlag = params.fParams.checksumFlag>0;
2915 U32 const windowSize = 1U << params.cParams.windowLog;
Sean Purcell2db72492017-02-09 10:50:43 -08002916 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
Yann Collet731ef162016-07-27 21:05:12 +02002917 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2918 U32 const fcsCode = params.fParams.contentSizeFlag ?
Nick Terrell55fc1f92017-05-24 13:50:10 -07002919 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
Yann Collet731ef162016-07-27 21:05:12 +02002920 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
Yann Colletc46fb922016-05-29 05:01:04 +02002921 size_t pos;
2922
2923 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
Yann Collet009d6042017-05-19 10:17:59 -07002924 DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
Yann Collet0be6fd32017-05-08 16:08:01 -07002925 !params.fParams.noDictIDFlag, dictID, dictIDSizeCode);
Yann Collet6236eba2016-04-12 15:52:33 +02002926
2927 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
Yann Collet673f0d72016-06-06 00:26:38 +02002928 op[4] = frameHeaderDecriptionByte; pos=5;
Eric Biggerse4d02652016-07-26 10:42:19 -07002929 if (!singleSegment) op[pos++] = windowLogByte;
Yann Colletc46fb922016-05-29 05:01:04 +02002930 switch(dictIDSizeCode)
2931 {
Yann Colletcd2892f2017-06-01 09:44:54 -07002932 default: assert(0); /* impossible */
Yann Colletc46fb922016-05-29 05:01:04 +02002933 case 0 : break;
2934 case 1 : op[pos] = (BYTE)(dictID); pos++; break;
Yann Colletd4180ca2016-07-27 21:21:36 +02002935 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
Yann Colletc46fb922016-05-29 05:01:04 +02002936 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2937 }
Yann Collet673f0d72016-06-06 00:26:38 +02002938 switch(fcsCode)
Yann Collet6236eba2016-04-12 15:52:33 +02002939 {
Yann Colletcd2892f2017-06-01 09:44:54 -07002940 default: assert(0); /* impossible */
Eric Biggerse4d02652016-07-26 10:42:19 -07002941 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
Yann Collet673f0d72016-06-06 00:26:38 +02002942 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2943 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
Yann Colletc46fb922016-05-29 05:01:04 +02002944 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
Yann Collet6236eba2016-04-12 15:52:33 +02002945 }
Yann Colletc46fb922016-05-29 05:01:04 +02002946 return pos;
Yann Collet6236eba2016-04-12 15:52:33 +02002947}
2948
2949
Yann Collet346efcc2016-08-02 14:26:00 +02002950static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
Yann Collet7cbe79a2016-03-23 22:31:57 +01002951 void* dst, size_t dstCapacity,
Yann Colletbf42c8e2016-01-09 01:08:23 +01002952 const void* src, size_t srcSize,
Yann Colletc991cc12016-07-28 00:55:43 +02002953 U32 frame, U32 lastFrameChunk)
Yann Colletf3eca252015-10-22 15:31:46 +01002954{
Yann Collet2acb5d32015-10-29 16:49:43 +01002955 const BYTE* const ip = (const BYTE*) src;
Yann Collet6236eba2016-04-12 15:52:33 +02002956 size_t fhSize = 0;
Yann Colletecd651b2016-01-07 15:35:18 +01002957
Yann Colleta3d99262017-06-29 14:44:49 -07002958 DEBUGLOG(5, "ZSTD_compressContinue_internal");
2959 DEBUGLOG(5, "stage: %u", cctx->stage);
Yann Collet346efcc2016-08-02 14:26:00 +02002960 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
Yann Colletd4180ca2016-07-27 21:21:36 +02002961
Yann Collet346efcc2016-08-02 14:26:00 +02002962 if (frame && (cctx->stage==ZSTDcs_init)) {
Yann Colleta0ba8492017-06-16 13:29:17 -07002963 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
2964 cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
Yann Collet6236eba2016-04-12 15:52:33 +02002965 if (ZSTD_isError(fhSize)) return fhSize;
2966 dstCapacity -= fhSize;
2967 dst = (char*)dst + fhSize;
Yann Collet346efcc2016-08-02 14:26:00 +02002968 cctx->stage = ZSTDcs_ongoing;
Yann Colletecd651b2016-01-07 15:35:18 +01002969 }
Yann Colletf3eca252015-10-22 15:31:46 +01002970
Yann Collet417890c2015-12-04 17:16:37 +01002971 /* Check if blocks follow each other */
Yann Collet346efcc2016-08-02 14:26:00 +02002972 if (src != cctx->nextSrc) {
Yann Collet417890c2015-12-04 17:16:37 +01002973 /* not contiguous */
Yann Collet346efcc2016-08-02 14:26:00 +02002974 ptrdiff_t const delta = cctx->nextSrc - ip;
2975 cctx->lowLimit = cctx->dictLimit;
2976 cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
2977 cctx->dictBase = cctx->base;
2978 cctx->base -= delta;
2979 cctx->nextToUpdate = cctx->dictLimit;
2980 if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
Yann Collet417890c2015-12-04 17:16:37 +01002981 }
2982
Yann Collet346efcc2016-08-02 14:26:00 +02002983 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
2984 if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
2985 ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
2986 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
2987 cctx->lowLimit = lowLimitMax;
Yann Colletf3eca252015-10-22 15:31:46 +01002988 }
2989
Yann Collet346efcc2016-08-02 14:26:00 +02002990 cctx->nextSrc = ip + srcSize;
Yann Collet89db5e02015-11-13 11:27:46 +01002991
Yann Collet5eb749e2017-01-11 18:21:25 +01002992 if (srcSize) {
2993 size_t const cSize = frame ?
Yann Colletdb8e21d2017-05-12 13:46:49 -07002994 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
Yann Collet346efcc2016-08-02 14:26:00 +02002995 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
Yann Colletecd651b2016-01-07 15:35:18 +01002996 if (ZSTD_isError(cSize)) return cSize;
Yann Collet20d5e032017-04-11 18:34:02 -07002997 cctx->consumedSrcSize += srcSize;
Yann Collet6236eba2016-04-12 15:52:33 +02002998 return cSize + fhSize;
Yann Collet5eb749e2017-01-11 18:21:25 +01002999 } else
3000 return fhSize;
Yann Colletf3eca252015-10-22 15:31:46 +01003001}
3002
Yann Colletbf42c8e2016-01-09 01:08:23 +01003003
Yann Collet5b567392016-07-28 01:17:22 +02003004size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
Yann Collet7cbe79a2016-03-23 22:31:57 +01003005 void* dst, size_t dstCapacity,
Yann Colletbf42c8e2016-01-09 01:08:23 +01003006 const void* src, size_t srcSize)
3007{
Yann Collet20d5e032017-04-11 18:34:02 -07003008 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
Yann Collet5b567392016-07-28 01:17:22 +02003009}
3010
3011
Yann Colletfa3671e2017-05-19 10:51:30 -07003012size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
Yann Colletbf42c8e2016-01-09 01:08:23 +01003013{
Yann Colletfa3671e2017-05-19 10:51:30 -07003014 U32 const cLevel = cctx->compressionLevel;
3015 ZSTD_compressionParameters cParams = (cLevel == ZSTD_CLEVEL_CUSTOM) ?
Yann Collet1ad7c822017-05-22 17:06:04 -07003016 cctx->appliedParams.cParams :
Yann Colletfa3671e2017-05-19 10:51:30 -07003017 ZSTD_getCParams(cLevel, 0, 0);
3018 return MIN (ZSTD_BLOCKSIZE_MAX, 1 << cParams.windowLog);
Yann Colletcf05b9d2016-07-18 16:52:10 +02003019}
3020
3021size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
3022{
Yann Colletfa3671e2017-05-19 10:51:30 -07003023 size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
Yann Collet961b6a02016-07-15 11:56:53 +02003024 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
Yann Collet20d5e032017-04-11 18:34:02 -07003025 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
Yann Colletbf42c8e2016-01-09 01:08:23 +01003026}
3027
Yann Collet16a0b102017-03-24 12:46:46 -07003028/*! ZSTD_loadDictionaryContent() :
3029 * @return : 0, or an error code
3030 */
Yann Colletb923f652016-01-26 03:14:20 +01003031static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
Yann Collet417890c2015-12-04 17:16:37 +01003032{
3033 const BYTE* const ip = (const BYTE*) src;
3034 const BYTE* const iend = ip + srcSize;
Yann Colletf3eca252015-10-22 15:31:46 +01003035
Yann Collet417890c2015-12-04 17:16:37 +01003036 /* input becomes current prefix */
3037 zc->lowLimit = zc->dictLimit;
3038 zc->dictLimit = (U32)(zc->nextSrc - zc->base);
3039 zc->dictBase = zc->base;
3040 zc->base += ip - zc->nextSrc;
3041 zc->nextToUpdate = zc->dictLimit;
Yann Collet06e76972017-01-25 16:39:03 -08003042 zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
Yann Collet417890c2015-12-04 17:16:37 +01003043
3044 zc->nextSrc = iend;
Yann Collet731ef162016-07-27 21:05:12 +02003045 if (srcSize <= HASH_READ_SIZE) return 0;
Yann Collet417890c2015-12-04 17:16:37 +01003046
Yann Collet1ad7c822017-05-22 17:06:04 -07003047 switch(zc->appliedParams.cParams.strategy)
Yann Collet417890c2015-12-04 17:16:37 +01003048 {
3049 case ZSTD_fast:
Yann Collet1ad7c822017-05-22 17:06:04 -07003050 ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
Yann Collet417890c2015-12-04 17:16:37 +01003051 break;
3052
Yann Collet45dc3562016-07-12 09:47:31 +02003053 case ZSTD_dfast:
Yann Collet1ad7c822017-05-22 17:06:04 -07003054 ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
Yann Collet45dc3562016-07-12 09:47:31 +02003055 break;
3056
Yann Collet417890c2015-12-04 17:16:37 +01003057 case ZSTD_greedy:
3058 case ZSTD_lazy:
3059 case ZSTD_lazy2:
Yann Collet16a0b102017-03-24 12:46:46 -07003060 if (srcSize >= HASH_READ_SIZE)
Yann Collet1ad7c822017-05-22 17:06:04 -07003061 ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->appliedParams.cParams.searchLength);
Yann Collet417890c2015-12-04 17:16:37 +01003062 break;
3063
3064 case ZSTD_btlazy2:
Yann Colletcefef8c2016-02-15 07:21:54 +01003065 case ZSTD_btopt:
Nick Terrelleeb31ee2017-03-09 11:44:25 -08003066 case ZSTD_btultra:
Yann Collet16a0b102017-03-24 12:46:46 -07003067 if (srcSize >= HASH_READ_SIZE)
Yann Collet1ad7c822017-05-22 17:06:04 -07003068 ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->appliedParams.cParams.searchLog, zc->appliedParams.cParams.searchLength);
Yann Collet417890c2015-12-04 17:16:37 +01003069 break;
3070
3071 default:
Yann Colletcd2892f2017-06-01 09:44:54 -07003072 assert(0); /* not possible : not a valid strategy id */
Yann Collet417890c2015-12-04 17:16:37 +01003073 }
3074
Nick Terrellecf90ca2017-02-13 18:27:34 -08003075 zc->nextToUpdate = (U32)(iend - zc->base);
Yann Collet417890c2015-12-04 17:16:37 +01003076 return 0;
3077}
3078
3079
Nick Terrellf9c9af32016-10-19 17:22:08 -07003080/* Dictionaries that assign zero probability to symbols that show up causes problems
3081 when FSE encoding. Refuse dictionaries that assign zero probability to symbols
3082 that we may encounter during compression.
3083 NOTE: This behavior is not standard and could be improved in the future. */
3084static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
3085 U32 s;
3086 if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
3087 for (s = 0; s <= maxSymbolValue; ++s) {
3088 if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
3089 }
3090 return 0;
3091}
3092
3093
Yann Colletb923f652016-01-26 03:14:20 +01003094/* Dictionary format :
Yann Colletbea78e82017-03-22 18:09:11 -07003095 * See :
3096 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
3097 */
Yann Collet16a0b102017-03-24 12:46:46 -07003098/*! ZSTD_loadZstdDictionary() :
3099 * @return : 0, or an error code
3100 * assumptions : magic number supposed already checked
3101 * dictSize supposed > 8
Yann Colletbea78e82017-03-22 18:09:11 -07003102 */
Yann Collet16a0b102017-03-24 12:46:46 -07003103static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
Yann Colletb923f652016-01-26 03:14:20 +01003104{
Yann Collet52a06222016-06-15 13:53:34 +02003105 const BYTE* dictPtr = (const BYTE*)dict;
3106 const BYTE* const dictEnd = dictPtr + dictSize;
Nick Terrellf9c9af32016-10-19 17:22:08 -07003107 short offcodeNCount[MaxOff+1];
3108 unsigned offcodeMaxValue = MaxOff;
Yann Collet643d9a22016-12-01 16:24:04 -08003109 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
Yann Colletfb810d62016-01-28 00:18:06 +01003110
Yann Colletbea78e82017-03-22 18:09:11 -07003111 dictPtr += 4; /* skip magic number */
Yann Collet1ad7c822017-05-22 17:06:04 -07003112 cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
Yann Colletbea78e82017-03-22 18:09:11 -07003113 dictPtr += 4;
3114
Yann Collet71ddeb62017-04-20 22:54:54 -07003115 { size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003116 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003117 dictPtr += hufHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003118 }
Yann Colletfb810d62016-01-28 00:18:06 +01003119
Nick Terrellf9c9af32016-10-19 17:22:08 -07003120 { unsigned offcodeLog;
Yann Collet52a06222016-06-15 13:53:34 +02003121 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003122 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
Nick Terrellbfd943a2016-10-17 16:55:52 -07003123 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
Nick Terrellf9c9af32016-10-19 17:22:08 -07003124 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
Yann Colletc17e0202017-04-20 12:50:02 -07003125 CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)),
3126 dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003127 dictPtr += offcodeHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003128 }
Yann Colletfb810d62016-01-28 00:18:06 +01003129
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003130 { short matchlengthNCount[MaxML+1];
Nick Terrellbfd943a2016-10-17 16:55:52 -07003131 unsigned matchlengthMaxValue = MaxML, matchlengthLog;
Yann Collet52a06222016-06-15 13:53:34 +02003132 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003133 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
Nick Terrellbfd943a2016-10-17 16:55:52 -07003134 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
Nick Terrellf9c9af32016-10-19 17:22:08 -07003135 /* Every match length code must have non-zero probability */
Yann Colletc17e0202017-04-20 12:50:02 -07003136 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
3137 CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)),
3138 dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003139 dictPtr += matchlengthHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003140 }
Yann Colletfb810d62016-01-28 00:18:06 +01003141
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003142 { short litlengthNCount[MaxLL+1];
Nick Terrellbfd943a2016-10-17 16:55:52 -07003143 unsigned litlengthMaxValue = MaxLL, litlengthLog;
Yann Collet52a06222016-06-15 13:53:34 +02003144 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003145 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
Nick Terrellbfd943a2016-10-17 16:55:52 -07003146 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
Nick Terrellf9c9af32016-10-19 17:22:08 -07003147 /* Every literal length code must have non-zero probability */
Yann Colletc17e0202017-04-20 12:50:02 -07003148 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
3149 CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)),
3150 dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003151 dictPtr += litlengthHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003152 }
Yann Colletfb810d62016-01-28 00:18:06 +01003153
Yann Collet52a06222016-06-15 13:53:34 +02003154 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
Yann Collet16a0b102017-03-24 12:46:46 -07003155 cctx->rep[0] = MEM_readLE32(dictPtr+0);
3156 cctx->rep[1] = MEM_readLE32(dictPtr+4);
3157 cctx->rep[2] = MEM_readLE32(dictPtr+8);
Yann Collet52a06222016-06-15 13:53:34 +02003158 dictPtr += 12;
3159
Yann Colletbea78e82017-03-22 18:09:11 -07003160 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
3161 U32 offcodeMax = MaxOff;
3162 if (dictContentSize <= ((U32)-1) - 128 KB) {
3163 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
3164 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
Nick Terrellb2c39a22016-10-24 14:11:27 -07003165 }
Yann Colletbea78e82017-03-22 18:09:11 -07003166 /* All offset values <= dictContentSize + 128 KB must be representable */
Nick Terrellf9c9af32016-10-19 17:22:08 -07003167 CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
Yann Colletbea78e82017-03-22 18:09:11 -07003168 /* All repCodes must be <= dictContentSize and != 0*/
3169 { U32 u;
3170 for (u=0; u<3; u++) {
3171 if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted);
3172 if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
Yann Collet16a0b102017-03-24 12:46:46 -07003173 } }
Nick Terrellf9c9af32016-10-19 17:22:08 -07003174
Yann Collet71ddeb62017-04-20 22:54:54 -07003175 cctx->fseCTables_ready = 1;
3176 cctx->hufCTable_repeatMode = HUF_repeat_valid;
Yann Collet16a0b102017-03-24 12:46:46 -07003177 return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
3178 }
Yann Colletb923f652016-01-26 03:14:20 +01003179}
3180
Yann Colletd1b26842016-03-15 01:24:33 +01003181/** ZSTD_compress_insertDictionary() :
3182* @return : 0, or an error code */
Yann Collet7bd1a292017-06-21 11:50:33 -07003183static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
3184 const void* dict, size_t dictSize,
3185 ZSTD_dictMode_e dictMode)
Yann Colletb923f652016-01-26 03:14:20 +01003186{
Yann Collet204b6b72017-06-21 15:13:00 -07003187 DEBUGLOG(5, "ZSTD_compress_insertDictionary");
Yann Colletc46fb922016-05-29 05:01:04 +02003188 if ((dict==NULL) || (dictSize<=8)) return 0;
Yann Colletb923f652016-01-26 03:14:20 +01003189
Yann Collet7bd1a292017-06-21 11:50:33 -07003190 /* dict restricted modes */
3191 if (dictMode==ZSTD_dm_rawContent)
Yann Collet16a0b102017-03-24 12:46:46 -07003192 return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
Yann Colletd1b26842016-03-15 01:24:33 +01003193
Yann Collet7d381612017-06-27 13:50:34 -07003194 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
Yann Collet204b6b72017-06-21 15:13:00 -07003195 if (dictMode == ZSTD_dm_auto) {
3196 DEBUGLOG(5, "raw content dictionary detected");
Yann Collet7bd1a292017-06-21 11:50:33 -07003197 return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
Yann Collet204b6b72017-06-21 15:13:00 -07003198 }
Yann Collet7bd1a292017-06-21 11:50:33 -07003199 if (dictMode == ZSTD_dm_fullDict)
3200 return ERROR(dictionary_wrong);
3201 assert(0); /* impossible */
3202 }
3203
3204 /* dict as full zstd dictionary */
Yann Collet16a0b102017-03-24 12:46:46 -07003205 return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
Yann Colletecd651b2016-01-07 15:35:18 +01003206}
3207
Yann Collet27caf2a2016-04-01 15:48:48 +02003208/*! ZSTD_compressBegin_internal() :
Yann Colletc3bce242017-06-20 16:09:11 -07003209 * @return : 0, or an error code */
Yann Colleta7737f62016-09-06 09:44:59 +02003210static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
Yann Collet1c8e1942016-01-26 16:31:22 +01003211 const void* dict, size_t dictSize,
Yann Collet7bd1a292017-06-21 11:50:33 -07003212 ZSTD_dictMode_e dictMode,
Yann Collet18803372017-05-22 18:21:51 -07003213 const ZSTD_CDict* cdict,
Yann Collet5ac72b42017-05-23 11:18:24 -07003214 ZSTD_parameters params, U64 pledgedSrcSize,
3215 ZSTD_buffered_policy_e zbuff)
Yann Colletf3eca252015-10-22 15:31:46 +01003216{
Yann Colleta3d99262017-06-29 14:44:49 -07003217 DEBUGLOG(4, "ZSTD_compressBegin_internal");
Yann Collet2084b042017-07-03 15:52:19 -07003218 DEBUGLOG(4, "dict ? %s", dict ? "dict" : (cdict ? "cdict" : "none"));
Yann Colleta3d99262017-06-29 14:44:49 -07003219 DEBUGLOG(4, "dictMode : %u", (U32)dictMode);
Yann Collet5ac72b42017-05-23 11:18:24 -07003220 /* params are supposed to be fully validated at this point */
Yann Colletab9162e2017-04-11 10:46:20 -07003221 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
Yann Collet18803372017-05-22 18:21:51 -07003222 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3223
Yann Collet204b6b72017-06-21 15:13:00 -07003224 if (cdict && cdict->dictContentSize>0) {
Yann Collet5ac72b42017-05-23 11:18:24 -07003225 return ZSTD_copyCCtx_internal(cctx, cdict->refContext,
Yann Collet204b6b72017-06-21 15:13:00 -07003226 params.fParams, pledgedSrcSize,
3227 zbuff);
3228 }
Yann Collet18803372017-05-22 18:21:51 -07003229
Yann Collet204b6b72017-06-21 15:13:00 -07003230 CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
3231 ZSTDcrp_continue, zbuff) );
Yann Collet7bd1a292017-06-21 11:50:33 -07003232 return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode);
Yann Collet88fcd292015-11-25 14:42:45 +01003233}
3234
3235
Yann Collet27caf2a2016-04-01 15:48:48 +02003236/*! ZSTD_compressBegin_advanced() :
3237* @return : 0, or an error code */
Yann Collet81e13ef2016-06-07 00:51:51 +02003238size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
Yann Collet27caf2a2016-04-01 15:48:48 +02003239 const void* dict, size_t dictSize,
Yann Collet52c04fe2016-07-07 11:53:18 +02003240 ZSTD_parameters params, unsigned long long pledgedSrcSize)
Yann Collet27caf2a2016-04-01 15:48:48 +02003241{
3242 /* compression parameters verification and optimization */
Yann Colletcf409a72016-09-26 16:41:05 +02003243 CHECK_F(ZSTD_checkCParams(params.cParams));
Yann Collet7bd1a292017-06-21 11:50:33 -07003244 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
Yann Collet5ac72b42017-05-23 11:18:24 -07003245 params, pledgedSrcSize, ZSTDb_not_buffered);
Yann Collet27caf2a2016-04-01 15:48:48 +02003246}
3247
3248
Yann Collet81e13ef2016-06-07 00:51:51 +02003249size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
Yann Colletb923f652016-01-26 03:14:20 +01003250{
Yann Collet6c6e1752016-06-27 15:28:45 +02003251 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
Yann Collet7bd1a292017-06-21 11:50:33 -07003252 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
Yann Collet5ac72b42017-05-23 11:18:24 -07003253 params, 0, ZSTDb_not_buffered);
Yann Collet1c8e1942016-01-26 16:31:22 +01003254}
Yann Collet083fcc82015-10-25 14:06:35 +01003255
inikep19bd48f2016-04-04 12:10:00 +02003256
Yann Colletb05c4822017-01-12 02:01:28 +01003257size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
Yann Collet083fcc82015-10-25 14:06:35 +01003258{
Yann Colletb05c4822017-01-12 02:01:28 +01003259 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
Yann Collet083fcc82015-10-25 14:06:35 +01003260}
3261
3262
Yann Collet62470b42016-07-28 15:29:08 +02003263/*! ZSTD_writeEpilogue() :
3264* Ends a frame.
Yann Collet88fcd292015-11-25 14:42:45 +01003265* @return : nb of bytes written into dst (or an error code) */
Yann Collet62470b42016-07-28 15:29:08 +02003266static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
Yann Collet2acb5d32015-10-29 16:49:43 +01003267{
Yann Colletc991cc12016-07-28 00:55:43 +02003268 BYTE* const ostart = (BYTE*)dst;
3269 BYTE* op = ostart;
Yann Collet6236eba2016-04-12 15:52:33 +02003270 size_t fhSize = 0;
Yann Collet2acb5d32015-10-29 16:49:43 +01003271
Yann Collet009d6042017-05-19 10:17:59 -07003272 DEBUGLOG(5, "ZSTD_writeEpilogue");
Yann Collet87c18b22016-08-26 01:43:47 +02003273 if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
Yann Collet887e7da2016-04-11 20:12:27 +02003274
3275 /* special case : empty frame */
Yann Colletc991cc12016-07-28 00:55:43 +02003276 if (cctx->stage == ZSTDcs_init) {
Yann Collet1ad7c822017-05-22 17:06:04 -07003277 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
Yann Collet6236eba2016-04-12 15:52:33 +02003278 if (ZSTD_isError(fhSize)) return fhSize;
3279 dstCapacity -= fhSize;
3280 op += fhSize;
Yann Collet731ef162016-07-27 21:05:12 +02003281 cctx->stage = ZSTDcs_ongoing;
Yann Colletecd651b2016-01-07 15:35:18 +01003282 }
3283
Yann Colletc991cc12016-07-28 00:55:43 +02003284 if (cctx->stage != ZSTDcs_ending) {
3285 /* write one last empty block, make it the "last" block */
3286 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
3287 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
3288 MEM_writeLE32(op, cBlockHeader24);
3289 op += ZSTD_blockHeaderSize;
3290 dstCapacity -= ZSTD_blockHeaderSize;
3291 }
3292
Yann Collet1ad7c822017-05-22 17:06:04 -07003293 if (cctx->appliedParams.fParams.checksumFlag) {
Yann Colletc991cc12016-07-28 00:55:43 +02003294 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
3295 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
3296 MEM_writeLE32(op, checksum);
3297 op += 4;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003298 }
Yann Collet2acb5d32015-10-29 16:49:43 +01003299
Yann Collet731ef162016-07-27 21:05:12 +02003300 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
Yann Colletc991cc12016-07-28 00:55:43 +02003301 return op-ostart;
Yann Collet2acb5d32015-10-29 16:49:43 +01003302}
3303
Yann Colletfd416f12016-01-30 03:14:15 +01003304
Yann Collet62470b42016-07-28 15:29:08 +02003305size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
3306 void* dst, size_t dstCapacity,
3307 const void* src, size_t srcSize)
3308{
3309 size_t endResult;
Yann Collet009d6042017-05-19 10:17:59 -07003310 size_t const cSize = ZSTD_compressContinue_internal(cctx,
3311 dst, dstCapacity, src, srcSize,
3312 1 /* frame mode */, 1 /* last chunk */);
Yann Collet62470b42016-07-28 15:29:08 +02003313 if (ZSTD_isError(cSize)) return cSize;
3314 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
3315 if (ZSTD_isError(endResult)) return endResult;
Yann Collet1ad7c822017-05-22 17:06:04 -07003316 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
Yann Collet2cf77552017-06-16 12:34:41 -07003317 DEBUGLOG(5, "end of frame : controlling src size");
Yann Colleta0ba8492017-06-16 13:29:17 -07003318 if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
Yann Collet2cf77552017-06-16 12:34:41 -07003319 DEBUGLOG(5, "error : pledgedSrcSize = %u, while realSrcSize = %u",
Yann Colletaee916e2017-06-16 17:01:46 -07003320 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
Yann Collet0be6fd32017-05-08 16:08:01 -07003321 return ERROR(srcSize_wrong);
Yann Collet9e73f2f2017-06-16 12:24:01 -07003322 } }
Yann Collet62470b42016-07-28 15:29:08 +02003323 return cSize + endResult;
3324}
3325
3326
Yann Collet19c10022016-07-28 01:25:46 +02003327static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
Yann Colletd1b26842016-03-15 01:24:33 +01003328 void* dst, size_t dstCapacity,
Yann Collet88fcd292015-11-25 14:42:45 +01003329 const void* src, size_t srcSize,
Yann Collet31683c02015-12-18 01:26:48 +01003330 const void* dict,size_t dictSize,
Yann Collet88fcd292015-11-25 14:42:45 +01003331 ZSTD_parameters params)
Yann Colletf3eca252015-10-22 15:31:46 +01003332{
Yann Collet7bd1a292017-06-21 11:50:33 -07003333 CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
3334 params, srcSize, ZSTDb_not_buffered) );
Yann Collet62470b42016-07-28 15:29:08 +02003335 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
Yann Colletf3eca252015-10-22 15:31:46 +01003336}
3337
Yann Collet21588e32016-03-30 16:50:44 +02003338size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
3339 void* dst, size_t dstCapacity,
3340 const void* src, size_t srcSize,
3341 const void* dict,size_t dictSize,
3342 ZSTD_parameters params)
3343{
Yann Colletcf409a72016-09-26 16:41:05 +02003344 CHECK_F(ZSTD_checkCParams(params.cParams));
Yann Collet21588e32016-03-30 16:50:44 +02003345 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
3346}
3347
Yann Colletc17e0202017-04-20 12:50:02 -07003348size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
3349 const void* dict, size_t dictSize, int compressionLevel)
Yann Collet31683c02015-12-18 01:26:48 +01003350{
Yann Collet407a11f2016-11-03 15:52:01 -07003351 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
Yann Collet3b719252016-03-30 19:48:05 +02003352 params.fParams.contentSizeFlag = 1;
Yann Collet21588e32016-03-30 16:50:44 +02003353 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
Yann Collet31683c02015-12-18 01:26:48 +01003354}
3355
Yann Colletd1b26842016-03-15 01:24:33 +01003356size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
Yann Collet083fcc82015-10-25 14:06:35 +01003357{
Yann Collet21588e32016-03-30 16:50:44 +02003358 return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
Yann Collet083fcc82015-10-25 14:06:35 +01003359}
3360
Yann Colletd1b26842016-03-15 01:24:33 +01003361size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
Yann Colletf3eca252015-10-22 15:31:46 +01003362{
Yann Collet44fe9912015-10-29 22:02:40 +01003363 size_t result;
Yann Collet5be2dd22015-11-11 13:43:58 +01003364 ZSTD_CCtx ctxBody;
Yann Collet712def92015-10-29 18:41:45 +01003365 memset(&ctxBody, 0, sizeof(ctxBody));
Yann Colletae728a42017-05-30 17:11:39 -07003366 ctxBody.customMem = ZSTD_defaultCMem;
Yann Colletd1b26842016-03-15 01:24:33 +01003367 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
Yann Colletae728a42017-05-30 17:11:39 -07003368 ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
Yann Collet44fe9912015-10-29 22:02:40 +01003369 return result;
Yann Colletf3eca252015-10-22 15:31:46 +01003370}
Yann Colletfdcad6d2015-12-17 23:50:15 +01003371
Yann Colletfd416f12016-01-30 03:14:15 +01003372
Yann Collet81e13ef2016-06-07 00:51:51 +02003373/* ===== Dictionary API ===== */
3374
Yann Collet09ae03a2017-06-26 16:47:32 -07003375/*! ZSTD_estimateCDictSize_advanced() :
Yann Colleta1d67042017-05-08 17:51:49 -07003376 * Estimate amount of memory that will be needed to create a dictionary with following arguments */
Yann Collet09ae03a2017-06-26 16:47:32 -07003377size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, unsigned byReference)
Yann Colleta1d67042017-05-08 17:51:49 -07003378{
Yann Collet7bd1a292017-06-21 11:50:33 -07003379 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
Yann Collet31af8292017-06-26 15:52:39 -07003380 DEBUGLOG(5, "CCtx estimate : %u", (U32)ZSTD_estimateCCtxSize_advanced(cParams));
3381 return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_advanced(cParams)
Yann Collet25989e32017-05-25 15:07:37 -07003382 + (byReference ? 0 : dictSize);
Yann Colleta1d67042017-05-08 17:51:49 -07003383}
3384
Yann Collet09ae03a2017-06-26 16:47:32 -07003385size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
3386{
3387 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3388 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, 0);
3389}
3390
Yann Colletd7c65892016-09-15 02:50:27 +02003391size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
3392{
3393 if (cdict==NULL) return 0; /* support sizeof on NULL */
Yann Collet7bd1a292017-06-21 11:50:33 -07003394 DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
3395 DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext));
Yann Colletaca113f2016-12-23 22:25:03 +01003396 return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
Yann Colletd7c65892016-09-15 02:50:27 +02003397}
3398
Yann Collet1c3ab0c2017-04-27 12:57:11 -07003399static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams)
3400{
3401 ZSTD_parameters params;
3402 params.cParams = cParams;
3403 params.fParams = fParams;
3404 return params;
3405}
3406
Yann Colletcdf7e822017-05-25 18:05:49 -07003407static size_t ZSTD_initCDict_internal(
3408 ZSTD_CDict* cdict,
Yann Collet7bd1a292017-06-21 11:50:33 -07003409 const void* dictBuffer, size_t dictSize,
3410 unsigned byReference, ZSTD_dictMode_e dictMode,
Yann Colletcdf7e822017-05-25 18:05:49 -07003411 ZSTD_compressionParameters cParams)
3412{
Yann Collet204b6b72017-06-21 15:13:00 -07003413 DEBUGLOG(5, "ZSTD_initCDict_internal, mode %u", (U32)dictMode);
Yann Colletcdf7e822017-05-25 18:05:49 -07003414 if ((byReference) || (!dictBuffer) || (!dictSize)) {
3415 cdict->dictBuffer = NULL;
3416 cdict->dictContent = dictBuffer;
3417 } else {
3418 void* const internalBuffer = ZSTD_malloc(dictSize, cdict->refContext->customMem);
Yann Colletcdf7e822017-05-25 18:05:49 -07003419 cdict->dictBuffer = internalBuffer;
3420 cdict->dictContent = internalBuffer;
Yann Colletc3bce242017-06-20 16:09:11 -07003421 if (!internalBuffer) return ERROR(memory_allocation);
3422 memcpy(internalBuffer, dictBuffer, dictSize);
Yann Colletcdf7e822017-05-25 18:05:49 -07003423 }
3424 cdict->dictContentSize = dictSize;
3425
3426 { ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */,
3427 0 /* checksumFlag */, 0 /* noDictIDFlag */ }; /* dummy */
3428 ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams);
Yann Collet7bd1a292017-06-21 11:50:33 -07003429 CHECK_F( ZSTD_compressBegin_internal(cdict->refContext,
3430 cdict->dictContent, dictSize, dictMode,
3431 NULL,
3432 params, ZSTD_CONTENTSIZE_UNKNOWN,
3433 ZSTDb_not_buffered) );
Yann Colletcdf7e822017-05-25 18:05:49 -07003434 }
3435
3436 return 0;
3437}
3438
Yann Collet7bd1a292017-06-21 11:50:33 -07003439ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
3440 unsigned byReference, ZSTD_dictMode_e dictMode,
Yann Collet31533ba2017-04-27 00:29:04 -07003441 ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
Yann Collet81e13ef2016-06-07 00:51:51 +02003442{
Yann Collet204b6b72017-06-21 15:13:00 -07003443 DEBUGLOG(5, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode);
Yann Colletae728a42017-05-30 17:11:39 -07003444 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
Yann Collet81e13ef2016-06-07 00:51:51 +02003445
Yann Collet466f92e2017-06-20 16:25:29 -07003446 { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
Yann Collet81e13ef2016-06-07 00:51:51 +02003447 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
3448
Yann Collet1f57c2e2016-12-21 16:20:11 +01003449 if (!cdict || !cctx) {
Yann Collet23b6e052016-08-28 21:05:43 -07003450 ZSTD_free(cdict, customMem);
Przemyslaw Skibinskid8114e52017-02-21 18:59:56 +01003451 ZSTD_freeCCtx(cctx);
Yann Collet81e13ef2016-06-07 00:51:51 +02003452 return NULL;
3453 }
Yann Colletcdf7e822017-05-25 18:05:49 -07003454 cdict->refContext = cctx;
Yann Collet81e13ef2016-06-07 00:51:51 +02003455
Yann Colletcdf7e822017-05-25 18:05:49 -07003456 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
Yann Collet7bd1a292017-06-21 11:50:33 -07003457 dictBuffer, dictSize,
3458 byReference, dictMode,
Yann Colletcdf7e822017-05-25 18:05:49 -07003459 cParams) )) {
3460 ZSTD_freeCDict(cdict);
3461 return NULL;
Nick Terrell3b9cdf92016-10-12 20:54:42 -07003462 }
Yann Collet1f57c2e2016-12-21 16:20:11 +01003463
Yann Collet81e13ef2016-06-07 00:51:51 +02003464 return cdict;
3465 }
3466}
3467
3468ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
3469{
Yann Collet31533ba2017-04-27 00:29:04 -07003470 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
Yann Collet7bd1a292017-06-21 11:50:33 -07003471 return ZSTD_createCDict_advanced(dict, dictSize,
3472 0 /* byReference */, ZSTD_dm_auto,
3473 cParams, ZSTD_defaultCMem);
Yann Collet1f57c2e2016-12-21 16:20:11 +01003474}
3475
3476ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
3477{
Yann Collet31533ba2017-04-27 00:29:04 -07003478 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
Yann Collet7bd1a292017-06-21 11:50:33 -07003479 return ZSTD_createCDict_advanced(dict, dictSize,
3480 1 /* byReference */, ZSTD_dm_auto,
3481 cParams, ZSTD_defaultCMem);
Yann Collet81e13ef2016-06-07 00:51:51 +02003482}
3483
3484size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
3485{
Yann Collet23b6e052016-08-28 21:05:43 -07003486 if (cdict==NULL) return 0; /* support free on NULL */
Yann Collet993060e2016-09-21 16:46:08 +02003487 { ZSTD_customMem const cMem = cdict->refContext->customMem;
Yann Collet23b6e052016-08-28 21:05:43 -07003488 ZSTD_freeCCtx(cdict->refContext);
Yann Collet4e5eea62016-12-21 16:44:35 +01003489 ZSTD_free(cdict->dictBuffer, cMem);
Yann Collet23b6e052016-08-28 21:05:43 -07003490 ZSTD_free(cdict, cMem);
3491 return 0;
3492 }
Yann Collet81e13ef2016-06-07 00:51:51 +02003493}
3494
Yann Colletcdf7e822017-05-25 18:05:49 -07003495/*! ZSTD_initStaticCDict_advanced() :
3496 * Generate a digested dictionary in provided memory area.
3497 * workspace: The memory area to emplace the dictionary into.
3498 * Provided pointer must 8-bytes aligned.
3499 * It must outlive dictionary usage.
3500 * workspaceSize: Use ZSTD_estimateCDictSize()
3501 * to determine how large workspace must be.
3502 * cParams : use ZSTD_getCParams() to transform a compression level
3503 * into its relevants cParams.
3504 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
3505 * Note : there is no corresponding "free" function.
3506 * Since workspace was allocated externally, it must be freed externally.
3507 */
3508ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
Yann Collet7bd1a292017-06-21 11:50:33 -07003509 const void* dict, size_t dictSize,
3510 unsigned byReference, ZSTD_dictMode_e dictMode,
Yann Colletcdf7e822017-05-25 18:05:49 -07003511 ZSTD_compressionParameters cParams)
3512{
Yann Collet31af8292017-06-26 15:52:39 -07003513 size_t const cctxSize = ZSTD_estimateCCtxSize_advanced(cParams);
Yann Colletcdf7e822017-05-25 18:05:49 -07003514 size_t const neededSize = sizeof(ZSTD_CDict) + (byReference ? 0 : dictSize)
3515 + cctxSize;
3516 ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
3517 void* ptr;
Yann Collet2cf77552017-06-16 12:34:41 -07003518 DEBUGLOG(5, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7);
Yann Colletcdf7e822017-05-25 18:05:49 -07003519 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
Yann Collet2cf77552017-06-16 12:34:41 -07003520 DEBUGLOG(5, "(workspaceSize < neededSize) : (%u < %u) => %u",
Yann Colletcdf7e822017-05-25 18:05:49 -07003521 (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
3522 if (workspaceSize < neededSize) return NULL;
3523
3524 if (!byReference) {
3525 memcpy(cdict+1, dict, dictSize);
3526 dict = cdict+1;
3527 ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
3528 } else {
3529 ptr = cdict+1;
3530 }
3531 cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
3532
3533 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
Yann Collet7bd1a292017-06-21 11:50:33 -07003534 dict, dictSize,
3535 1 /* byReference */, dictMode,
3536 cParams) ))
Yann Colletcdf7e822017-05-25 18:05:49 -07003537 return NULL;
3538
3539 return cdict;
3540}
3541
Yann Collet8c910d22017-06-03 01:15:02 -07003542ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
Yann Collet95162342016-10-25 16:19:52 -07003543 return ZSTD_getParamsFromCCtx(cdict->refContext);
3544}
3545
Yann Collet715b9aa2017-04-18 13:55:53 -07003546/* ZSTD_compressBegin_usingCDict_advanced() :
Yann Collet4f818182017-04-17 17:57:35 -07003547 * cdict must be != NULL */
Yann Collet715b9aa2017-04-18 13:55:53 -07003548size_t ZSTD_compressBegin_usingCDict_advanced(
Yann Collet4f818182017-04-17 17:57:35 -07003549 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
3550 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
Yann Collet4cb21292016-09-15 14:54:07 +02003551{
Yann Collet5ac72b42017-05-23 11:18:24 -07003552 if (cdict==NULL) return ERROR(dictionary_wrong);
Yann Collet18803372017-05-22 18:21:51 -07003553 { ZSTD_parameters params = cdict->refContext->appliedParams;
Yann Collet4f818182017-04-17 17:57:35 -07003554 params.fParams = fParams;
Yann Collet18803372017-05-22 18:21:51 -07003555 DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced");
Yann Collet7bd1a292017-06-21 11:50:33 -07003556 return ZSTD_compressBegin_internal(cctx,
3557 NULL, 0, ZSTD_dm_auto,
3558 cdict,
3559 params, pledgedSrcSize,
3560 ZSTDb_not_buffered);
Sean Purcell2db72492017-02-09 10:50:43 -08003561 }
Yann Collet4cb21292016-09-15 14:54:07 +02003562}
3563
Yann Collet4f818182017-04-17 17:57:35 -07003564/* ZSTD_compressBegin_usingCDict() :
3565 * pledgedSrcSize=0 means "unknown"
3566 * if pledgedSrcSize>0, it will enable contentSizeFlag */
Yann Collet768df122017-04-26 15:42:10 -07003567size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
Yann Collet4f818182017-04-17 17:57:35 -07003568{
Yann Collet768df122017-04-26 15:42:10 -07003569 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
Yann Collet009d6042017-05-19 10:17:59 -07003570 DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
Yann Collet768df122017-04-26 15:42:10 -07003571 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
Yann Collet4f818182017-04-17 17:57:35 -07003572}
3573
Yann Colletf4bd8572017-04-27 11:31:55 -07003574size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
3575 void* dst, size_t dstCapacity,
3576 const void* src, size_t srcSize,
3577 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
3578{
3579 CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
3580 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
Yann Collet81e13ef2016-06-07 00:51:51 +02003581}
3582
Yann Collet07639052016-08-03 01:57:57 +02003583/*! ZSTD_compress_usingCDict() :
Yann Collet4f818182017-04-17 17:57:35 -07003584 * Compression using a digested Dictionary.
3585 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
3586 * Note that compression parameters are decided at CDict creation time
3587 * while frame parameters are hardcoded */
Yann Collet4cb21292016-09-15 14:54:07 +02003588size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
3589 void* dst, size_t dstCapacity,
3590 const void* src, size_t srcSize,
3591 const ZSTD_CDict* cdict)
Yann Collet81e13ef2016-06-07 00:51:51 +02003592{
Yann Collet4f818182017-04-17 17:57:35 -07003593 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
Yann Colletf4bd8572017-04-27 11:31:55 -07003594 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
Yann Collet81e13ef2016-06-07 00:51:51 +02003595}
3596
3597
3598
Yann Collet104e5b02016-08-12 13:04:27 +02003599/* ******************************************************************
3600* Streaming
3601********************************************************************/
Yann Collet5a0c8e22016-08-12 01:20:36 +02003602
Yann Collet5a0c8e22016-08-12 01:20:36 +02003603ZSTD_CStream* ZSTD_createCStream(void)
3604{
Yann Colletae728a42017-05-30 17:11:39 -07003605 return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003606}
3607
Yann Colletdde10b22017-06-26 17:44:26 -07003608ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
3609{
3610 return ZSTD_initStaticCCtx(workspace, workspaceSize);
3611}
3612
Yann Collet5a0c8e22016-08-12 01:20:36 +02003613ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
Yann Colletae728a42017-05-30 17:11:39 -07003614{ /* CStream and CCtx are now same object */
Yann Collet6fb2f242017-05-10 11:06:06 -07003615 return ZSTD_createCCtx_advanced(customMem);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003616}
3617
3618size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
3619{
Yann Collet78553662017-05-08 17:15:00 -07003620 return ZSTD_freeCCtx(zcs); /* same object */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003621}
3622
Yann Collet5a0c8e22016-08-12 01:20:36 +02003623
3624
Yann Collet104e5b02016-08-12 13:04:27 +02003625/*====== Initialization ======*/
3626
Yann Colletfa3671e2017-05-19 10:51:30 -07003627size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003628
Yann Colletc17e0202017-04-20 12:50:02 -07003629size_t ZSTD_CStreamOutSize(void)
3630{
Yann Colletfa3671e2017-05-19 10:51:30 -07003631 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
Yann Colletc17e0202017-04-20 12:50:02 -07003632}
Yann Collet5a0c8e22016-08-12 01:20:36 +02003633
Yann Collet1ad7c822017-05-22 17:06:04 -07003634static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
Yann Collet2e427422017-06-27 17:09:12 -07003635 const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
3636 const ZSTD_CDict* cdict,
Yann Colletb7372932017-06-27 15:49:12 -07003637 ZSTD_parameters params, unsigned long long pledgedSrcSize)
Yann Collet4cb21292016-09-15 14:54:07 +02003638{
Yann Colleta3d99262017-06-29 14:44:49 -07003639 DEBUGLOG(4, "ZSTD_resetCStream_internal");
Yann Colletb7372932017-06-27 15:49:12 -07003640 /* params are supposed to be fully validated at this point */
3641 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
3642 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
Yann Colletee5b7252016-10-27 14:20:55 -07003643
Yann Collet7bd1a292017-06-21 11:50:33 -07003644 CHECK_F( ZSTD_compressBegin_internal(zcs,
Yann Collet2e427422017-06-27 17:09:12 -07003645 dict, dictSize, dictMode,
Yann Colletb7372932017-06-27 15:49:12 -07003646 cdict,
Yann Collet7bd1a292017-06-21 11:50:33 -07003647 params, pledgedSrcSize,
3648 ZSTDb_buffered) );
Yann Collet4cb21292016-09-15 14:54:07 +02003649
3650 zcs->inToCompress = 0;
3651 zcs->inBuffPos = 0;
3652 zcs->inBuffTarget = zcs->blockSize;
3653 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
Yann Collet0be6fd32017-05-08 16:08:01 -07003654 zcs->streamStage = zcss_load;
Yann Collet4cb21292016-09-15 14:54:07 +02003655 zcs->frameEnded = 0;
3656 return 0; /* ready to go */
3657}
3658
Sean Purcell2db72492017-02-09 10:50:43 -08003659size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
3660{
Yann Collet1ad7c822017-05-22 17:06:04 -07003661 ZSTD_parameters params = zcs->requestedParams;
Yann Collet0be6fd32017-05-08 16:08:01 -07003662 params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
Yann Colletb0739bc2017-05-22 17:45:15 -07003663 DEBUGLOG(5, "ZSTD_resetCStream");
Yann Collet009d6042017-05-19 10:17:59 -07003664 if (zcs->compressionLevel != ZSTD_CLEVEL_CUSTOM) {
3665 params.cParams = ZSTD_getCParams(zcs->compressionLevel, pledgedSrcSize, 0 /* dictSize */);
3666 }
Yann Collet2e427422017-06-27 17:09:12 -07003667 return ZSTD_resetCStream_internal(zcs, NULL, 0, zcs->dictMode, zcs->cdict, params, pledgedSrcSize);
Sean Purcell2db72492017-02-09 10:50:43 -08003668}
Sean Purcell2db72492017-02-09 10:50:43 -08003669
Yann Collet204b6b72017-06-21 15:13:00 -07003670/*! ZSTD_initCStream_internal() :
Yann Colletb7372932017-06-27 15:49:12 -07003671 * Note : not static, but hidden (not exposed). Used by zstdmt_compress.c
Yann Collet204b6b72017-06-21 15:13:00 -07003672 * Assumption 1 : params are valid
3673 * Assumption 2 : either dict, or cdict, is defined, not both */
Yann Collet8c910d22017-06-03 01:15:02 -07003674size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
3675 const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
3676 ZSTD_parameters params, unsigned long long pledgedSrcSize)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003677{
Yann Collet204b6b72017-06-21 15:13:00 -07003678 DEBUGLOG(5, "ZSTD_initCStream_internal");
Yann Collet4b987ad2017-04-10 17:50:44 -07003679 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
Yann Collet8c910d22017-06-03 01:15:02 -07003680 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
Yann Collete88034f2017-04-10 22:24:02 -07003681
Yann Collete88034f2017-04-10 22:24:02 -07003682 if (dict && dictSize >= 8) {
Yann Collet204b6b72017-06-21 15:13:00 -07003683 DEBUGLOG(5, "loading dictionary of size %u", (U32)dictSize);
Yann Colletc7fe2622017-05-23 13:16:00 -07003684 if (zcs->staticSize) { /* static CCtx : never uses malloc */
3685 /* incompatible with internal cdict creation */
3686 return ERROR(memory_allocation);
Yann Collet02d37aa2017-04-05 14:53:51 -07003687 }
Yann Collete88034f2017-04-10 22:24:02 -07003688 ZSTD_freeCDict(zcs->cdictLocal);
Yann Collet7bd1a292017-06-21 11:50:33 -07003689 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
Yann Collet2e427422017-06-27 17:09:12 -07003690 zcs->dictContentByRef, zcs->dictMode,
Yann Collet204b6b72017-06-21 15:13:00 -07003691 params.cParams, zcs->customMem);
Yann Collete88034f2017-04-10 22:24:02 -07003692 zcs->cdict = zcs->cdictLocal;
Yann Collet466f92e2017-06-20 16:25:29 -07003693 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
Yann Collet8c910d22017-06-03 01:15:02 -07003694 } else {
3695 if (cdict) {
3696 ZSTD_parameters const cdictParams = ZSTD_getParamsFromCDict(cdict);
3697 params.cParams = cdictParams.cParams; /* cParams are enforced from cdict */
3698 }
Yann Collet466f92e2017-06-20 16:25:29 -07003699 ZSTD_freeCDict(zcs->cdictLocal);
3700 zcs->cdictLocal = NULL;
Yann Collet8c910d22017-06-03 01:15:02 -07003701 zcs->cdict = cdict;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003702 }
Sean Purcell57d423c2017-01-17 11:04:08 -08003703
Yann Collet8c910d22017-06-03 01:15:02 -07003704 zcs->requestedParams = params;
3705 zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
Yann Collet2e427422017-06-27 17:09:12 -07003706 return ZSTD_resetCStream_internal(zcs, NULL, 0, zcs->dictMode, zcs->cdict, params, pledgedSrcSize);
Yann Colletee5b7252016-10-27 14:20:55 -07003707}
3708
3709/* ZSTD_initCStream_usingCDict_advanced() :
Yann Collet5a0c8e22016-08-12 01:20:36 +02003710 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
Yann Collet8c910d22017-06-03 01:15:02 -07003711size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
3712 const ZSTD_CDict* cdict,
3713 ZSTD_frameParameters fParams,
3714 unsigned long long pledgedSrcSize)
3715{ /* cannot handle NULL cdict (does not know what to do) */
3716 if (!cdict) return ERROR(dictionary_wrong);
Sean Purcell2db72492017-02-09 10:50:43 -08003717 { ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003718 params.fParams = fParams;
Yann Collet8c910d22017-06-03 01:15:02 -07003719 return ZSTD_initCStream_internal(zcs,
3720 NULL, 0, cdict,
3721 params, pledgedSrcSize);
Yann Collete88034f2017-04-10 22:24:02 -07003722 }
3723}
3724
3725/* note : cdict must outlive compression session */
3726size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
3727{
Yann Collet8c910d22017-06-03 01:15:02 -07003728 ZSTD_frameParameters const fParams = { 0 /* contentSize */, 0 /* checksum */, 0 /* hideDictID */ };
3729 return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, 0); /* note : will check that cdict != NULL */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003730}
3731
Yann Collet95162342016-10-25 16:19:52 -07003732size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
3733 const void* dict, size_t dictSize,
3734 ZSTD_parameters params, unsigned long long pledgedSrcSize)
3735{
Yann Collet4b987ad2017-04-10 17:50:44 -07003736 CHECK_F( ZSTD_checkCParams(params.cParams) );
Yann Collet1ad7c822017-05-22 17:06:04 -07003737 zcs->requestedParams = params;
3738 zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
Yann Collet8c910d22017-06-03 01:15:02 -07003739 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, pledgedSrcSize);
Yann Collet95162342016-10-25 16:19:52 -07003740}
3741
Yann Collet5a0c8e22016-08-12 01:20:36 +02003742size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3743{
3744 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
Yann Collet1ad7c822017-05-22 17:06:04 -07003745 zcs->compressionLevel = compressionLevel;
Yann Collet8c910d22017-06-03 01:15:02 -07003746 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, 0);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003747}
3748
Yann Collete795c8a2016-12-13 16:39:36 +01003749size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
3750{
Yann Colletd564faa2016-12-18 21:39:15 +01003751 ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
Yann Collete88034f2017-04-10 22:24:02 -07003752 params.fParams.contentSizeFlag = (pledgedSrcSize>0);
Yann Collet8c910d22017-06-03 01:15:02 -07003753 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, pledgedSrcSize);
Yann Collete795c8a2016-12-13 16:39:36 +01003754}
3755
Yann Collet5a0c8e22016-08-12 01:20:36 +02003756size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3757{
Yann Collete88034f2017-04-10 22:24:02 -07003758 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
Yann Collet8c910d22017-06-03 01:15:02 -07003759 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
Yann Colletcb327632016-08-23 00:30:31 +02003760}
Yann Collet5a0c8e22016-08-12 01:20:36 +02003761
Yann Collet104e5b02016-08-12 13:04:27 +02003762/*====== Compression ======*/
Yann Collet5a0c8e22016-08-12 01:20:36 +02003763
Yann Collet01b15492017-05-30 18:10:26 -07003764MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
3765 const void* src, size_t srcSize)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003766{
3767 size_t const length = MIN(dstCapacity, srcSize);
Yann Collet18ab5af2017-05-31 09:59:22 -07003768 if (length) memcpy(dst, src, length);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003769 return length;
3770}
3771
Yann Colleta3d99262017-06-29 14:44:49 -07003772/** ZSTD_compressStream_generic():
3773 * internal function for all *compressStream*() variants and *compress_generic()
3774 * @return : hint size for next input */
Yann Colletd5c046c2017-06-30 14:51:01 -07003775size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
3776 ZSTD_outBuffer* output,
3777 ZSTD_inBuffer* input,
3778 ZSTD_EndDirective const flushMode)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003779{
Yann Collet01b15492017-05-30 18:10:26 -07003780 const char* const istart = (const char*)input->src;
3781 const char* const iend = istart + input->size;
3782 const char* ip = istart + input->pos;
3783 char* const ostart = (char*)output->dst;
3784 char* const oend = ostart + output->size;
3785 char* op = ostart + output->pos;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003786 U32 someMoreWork = 1;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003787
Yann Collet58e8d792017-06-02 18:20:48 -07003788 /* check expectations */
Yann Collet2cb97742017-07-04 12:39:26 -07003789 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (U32)flushMode);
Yann Collet6d4fef32017-05-17 18:36:15 -07003790 assert(zcs->inBuff != NULL);
Yann Colletecb0f462017-06-21 17:25:01 -07003791 assert(zcs->inBuffSize>0);
Yann Collet6d4fef32017-05-17 18:36:15 -07003792 assert(zcs->outBuff!= NULL);
Yann Colletecb0f462017-06-21 17:25:01 -07003793 assert(zcs->outBuffSize>0);
Yann Collet58e8d792017-06-02 18:20:48 -07003794 assert(output->pos <= output->size);
3795 assert(input->pos <= input->size);
Yann Collet009d6042017-05-19 10:17:59 -07003796
Yann Collet5a0c8e22016-08-12 01:20:36 +02003797 while (someMoreWork) {
Yann Collet0be6fd32017-05-08 16:08:01 -07003798 switch(zcs->streamStage)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003799 {
Yann Collet1ad7c822017-05-22 17:06:04 -07003800 case zcss_init:
3801 /* call ZSTD_initCStream() first ! */
3802 return ERROR(init_missing);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003803
3804 case zcss_load:
Yann Colleta3d99262017-06-29 14:44:49 -07003805 if ( (flushMode == ZSTD_e_end)
Yann Collet2084b042017-07-03 15:52:19 -07003806 && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */
Yann Colleta3d99262017-06-29 14:44:49 -07003807 && (zcs->inBuffPos == 0) ) {
3808 /* shortcut to compression pass directly into output buffer */
3809 size_t const cSize = ZSTD_compressEnd(zcs,
3810 op, oend-op, ip, iend-ip);
3811 DEBUGLOG(4, "ZSTD_compressEnd : %u", (U32)cSize);
3812 if (ZSTD_isError(cSize)) return cSize;
3813 ip = iend;
3814 op += cSize;
3815 zcs->frameEnded = 1;
3816 ZSTD_startNewCompression(zcs);
3817 someMoreWork = 0; break;
Yann Collet2084b042017-07-03 15:52:19 -07003818 }
3819 /* complete loading into inBuffer */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003820 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
Yann Collet06589fe2017-05-31 10:03:20 -07003821 size_t const loaded = ZSTD_limitCopy(
3822 zcs->inBuff + zcs->inBuffPos, toLoad,
3823 ip, iend-ip);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003824 zcs->inBuffPos += loaded;
3825 ip += loaded;
Yann Collet009d6042017-05-19 10:17:59 -07003826 if ( (flushMode == ZSTD_e_continue)
3827 && (zcs->inBuffPos < zcs->inBuffTarget) ) {
3828 /* not enough input to fill full block : stop here */
3829 someMoreWork = 0; break;
3830 }
3831 if ( (flushMode == ZSTD_e_flush)
3832 && (zcs->inBuffPos == zcs->inToCompress) ) {
3833 /* empty */
3834 someMoreWork = 0; break;
Yann Collet559ee822017-06-16 11:58:21 -07003835 }
3836 }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003837 /* compress current block (note : this stage cannot be stopped in the middle) */
Yann Collet009d6042017-05-19 10:17:59 -07003838 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003839 { void* cDst;
3840 size_t cSize;
3841 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
3842 size_t oSize = oend-op;
Yann Collet009d6042017-05-19 10:17:59 -07003843 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003844 if (oSize >= ZSTD_compressBound(iSize))
Yann Collet559ee822017-06-16 11:58:21 -07003845 cDst = op; /* compress into output buffer, to skip flush stage */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003846 else
3847 cDst = zcs->outBuff, oSize = zcs->outBuffSize;
Yann Collet009d6042017-05-19 10:17:59 -07003848 cSize = lastBlock ?
3849 ZSTD_compressEnd(zcs, cDst, oSize,
3850 zcs->inBuff + zcs->inToCompress, iSize) :
3851 ZSTD_compressContinue(zcs, cDst, oSize,
3852 zcs->inBuff + zcs->inToCompress, iSize);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003853 if (ZSTD_isError(cSize)) return cSize;
Yann Collet009d6042017-05-19 10:17:59 -07003854 zcs->frameEnded = lastBlock;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003855 /* prepare next block */
3856 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
3857 if (zcs->inBuffTarget > zcs->inBuffSize)
Yann Collet009d6042017-05-19 10:17:59 -07003858 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
Yann Collet8b21ec42017-05-19 19:46:15 -07003859 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
3860 (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize);
3861 if (!lastBlock)
3862 assert(zcs->inBuffTarget <= zcs->inBuffSize);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003863 zcs->inToCompress = zcs->inBuffPos;
Yann Collet009d6042017-05-19 10:17:59 -07003864 if (cDst == op) { /* no need to flush */
3865 op += cSize;
3866 if (zcs->frameEnded) {
Yann Collet559ee822017-06-16 11:58:21 -07003867 DEBUGLOG(5, "Frame completed directly in outBuffer");
Yann Collet009d6042017-05-19 10:17:59 -07003868 someMoreWork = 0;
Yann Colletb26728c2017-06-16 14:00:46 -07003869 ZSTD_startNewCompression(zcs);
Yann Collet009d6042017-05-19 10:17:59 -07003870 }
3871 break;
3872 }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003873 zcs->outBuffContentSize = cSize;
3874 zcs->outBuffFlushedSize = 0;
Yann Collet009d6042017-05-19 10:17:59 -07003875 zcs->streamStage = zcss_flush; /* pass-through to flush stage */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003876 }
Jos Collin7cd7a752017-05-11 13:17:20 +05303877 /* fall-through */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003878 case zcss_flush:
Yann Collet009d6042017-05-19 10:17:59 -07003879 DEBUGLOG(5, "flush stage");
Yann Collet5a0c8e22016-08-12 01:20:36 +02003880 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
Yann Collet01b15492017-05-30 18:10:26 -07003881 size_t const flushed = ZSTD_limitCopy(op, oend-op,
3882 zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
Yann Collet2cb97742017-07-04 12:39:26 -07003883 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
3884 (U32)toFlush, (U32)(oend-op), (U32)flushed);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003885 op += flushed;
3886 zcs->outBuffFlushedSize += flushed;
Yann Collet01b15492017-05-30 18:10:26 -07003887 if (toFlush!=flushed) {
Yann Collet2cb97742017-07-04 12:39:26 -07003888 /* flush not fully completed, presumably because dst is too small */
3889 assert(op==oend);
Yann Collet01b15492017-05-30 18:10:26 -07003890 someMoreWork = 0;
3891 break;
3892 }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003893 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
Yann Collet009d6042017-05-19 10:17:59 -07003894 if (zcs->frameEnded) {
Yann Collet559ee822017-06-16 11:58:21 -07003895 DEBUGLOG(5, "Frame completed on flush");
Yann Collet009d6042017-05-19 10:17:59 -07003896 someMoreWork = 0;
Yann Colletb26728c2017-06-16 14:00:46 -07003897 ZSTD_startNewCompression(zcs);
Yann Collet009d6042017-05-19 10:17:59 -07003898 break;
3899 }
Yann Collet0be6fd32017-05-08 16:08:01 -07003900 zcs->streamStage = zcss_load;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003901 break;
3902 }
3903
Yann Colletcd2892f2017-06-01 09:44:54 -07003904 default: /* impossible */
3905 assert(0);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003906 }
3907 }
3908
Yann Collet01b15492017-05-30 18:10:26 -07003909 input->pos = ip - istart;
3910 output->pos = op - ostart;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003911 if (zcs->frameEnded) return 0;
3912 { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
3913 if (hintInSize==0) hintInSize = zcs->blockSize;
3914 return hintInSize;
3915 }
3916}
3917
Yann Collet53e17fb2016-08-17 01:39:22 +02003918size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003919{
Yann Collet01b15492017-05-30 18:10:26 -07003920 /* check conditions */
3921 if (output->pos > output->size) return ERROR(GENERIC);
3922 if (input->pos > input->size) return ERROR(GENERIC);
3923
3924 return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003925}
3926
Yann Colletf35e2de2017-06-05 18:32:48 -07003927/*! ZSTDMT_initCStream_internal() :
3928 * Private use only. Init streaming operation.
3929 * expects params to be valid.
3930 * must receive dict, or cdict, or none, but not both.
3931 * @return : 0, or an error code */
3932size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
3933 const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
3934 ZSTD_parameters params, unsigned long long pledgedSrcSize);
3935
3936
Yann Colletdeee6e52017-05-30 17:42:00 -07003937size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
3938 ZSTD_outBuffer* output,
3939 ZSTD_inBuffer* input,
3940 ZSTD_EndDirective endOp)
Yann Collet6d4fef32017-05-17 18:36:15 -07003941{
3942 /* check conditions */
Yann Colletdeee6e52017-05-30 17:42:00 -07003943 if (output->pos > output->size) return ERROR(GENERIC);
3944 if (input->pos > input->size) return ERROR(GENERIC);
Yann Collet6d4fef32017-05-17 18:36:15 -07003945 assert(cctx!=NULL);
Yann Collet01b15492017-05-30 18:10:26 -07003946
Yann Colleta3d99262017-06-29 14:44:49 -07003947 /* transparent initialization stage */
Yann Collet6d4fef32017-05-17 18:36:15 -07003948 if (cctx->streamStage == zcss_init) {
Yann Collet33a66392017-06-28 11:09:43 -07003949 const void* const prefix = cctx->prefix;
3950 size_t const prefixSize = cctx->prefixSize;
Yann Collet1ad7c822017-05-22 17:06:04 -07003951 ZSTD_parameters params = cctx->requestedParams;
Yann Collet6d4fef32017-05-17 18:36:15 -07003952 if (cctx->compressionLevel != ZSTD_CLEVEL_CUSTOM)
3953 params.cParams = ZSTD_getCParams(cctx->compressionLevel,
Yann Collet33a66392017-06-28 11:09:43 -07003954 cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
3955 cctx->prefix = NULL; cctx->prefixSize = 0; /* single usage */
3956 assert(prefix==NULL || cctx->cdict==NULL); /* only one can be set */
Yann Colletf129fd32017-06-11 18:46:09 -07003957
3958#ifdef ZSTD_MULTITHREAD
Yann Colletf35e2de2017-06-05 18:32:48 -07003959 if (cctx->nbThreads > 1) {
Yann Collet33a66392017-06-28 11:09:43 -07003960 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", cctx->nbThreads);
3961 CHECK_F( ZSTDMT_initCStream_internal(cctx->mtctx, prefix, prefixSize, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
Yann Collet23aace92017-06-11 18:32:36 -07003962 cctx->streamStage = zcss_load;
Yann Colletf129fd32017-06-11 18:46:09 -07003963 } else
3964#endif
3965 {
Yann Collet2e427422017-06-27 17:09:12 -07003966 CHECK_F( ZSTD_resetCStream_internal(cctx, prefix, prefixSize, cctx->dictMode, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
Yann Colletf35e2de2017-06-05 18:32:48 -07003967 } }
3968
Yann Colleta3d99262017-06-29 14:44:49 -07003969 /* compression stage */
Yann Colletf129fd32017-06-11 18:46:09 -07003970#ifdef ZSTD_MULTITHREAD
Yann Colletf35e2de2017-06-05 18:32:48 -07003971 if (cctx->nbThreads > 1) {
Yann Collet23aace92017-06-11 18:32:36 -07003972 size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
Yann Collet33a66392017-06-28 11:09:43 -07003973 DEBUGLOG(5, "ZSTDMT_compressStream_generic : %u", (U32)flushMin);
Yann Colletb26728c2017-06-16 14:00:46 -07003974 if ( ZSTD_isError(flushMin)
3975 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
3976 ZSTD_startNewCompression(cctx);
Yann Collet9e73f2f2017-06-16 12:24:01 -07003977 }
Yann Collet23aace92017-06-11 18:32:36 -07003978 return flushMin;
Yann Collet6d4fef32017-05-17 18:36:15 -07003979 }
Yann Colletf129fd32017-06-11 18:46:09 -07003980#endif
Yann Collet6d4fef32017-05-17 18:36:15 -07003981
Yann Collet01b15492017-05-30 18:10:26 -07003982 CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
Yann Collet559ee822017-06-16 11:58:21 -07003983 DEBUGLOG(5, "completed ZSTD_compress_generic");
Yann Colletdeee6e52017-05-30 17:42:00 -07003984 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
Yann Collet6d4fef32017-05-17 18:36:15 -07003985}
3986
Yann Colletdeee6e52017-05-30 17:42:00 -07003987size_t ZSTD_compress_generic_simpleArgs (
3988 ZSTD_CCtx* cctx,
3989 void* dst, size_t dstCapacity, size_t* dstPos,
3990 const void* src, size_t srcSize, size_t* srcPos,
3991 ZSTD_EndDirective endOp)
Yann Collet6d4fef32017-05-17 18:36:15 -07003992{
Yann Colletdeee6e52017-05-30 17:42:00 -07003993 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
3994 ZSTD_inBuffer input = { src, srcSize, *srcPos };
Yann Collet01b15492017-05-30 18:10:26 -07003995 /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
Yann Collet58e8d792017-06-02 18:20:48 -07003996 size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp);
Yann Colletdeee6e52017-05-30 17:42:00 -07003997 *dstPos = output.pos;
3998 *srcPos = input.pos;
Yann Collet58e8d792017-06-02 18:20:48 -07003999 return cErr;
Yann Collet5a0c8e22016-08-12 01:20:36 +02004000}
4001
4002
Yann Collet104e5b02016-08-12 13:04:27 +02004003/*====== Finalize ======*/
Yann Collet5a0c8e22016-08-12 01:20:36 +02004004
4005/*! ZSTD_flushStream() :
4006* @return : amount of data remaining to flush */
Yann Collet53e17fb2016-08-17 01:39:22 +02004007size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
Yann Collet5a0c8e22016-08-12 01:20:36 +02004008{
Yann Collet18ab5af2017-05-31 09:59:22 -07004009 ZSTD_inBuffer input = { NULL, 0, 0 };
Yann Collet01b15492017-05-30 18:10:26 -07004010 if (output->pos > output->size) return ERROR(GENERIC);
4011 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) );
4012 return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
Yann Collet5a0c8e22016-08-12 01:20:36 +02004013}
4014
4015
Yann Collet53e17fb2016-08-17 01:39:22 +02004016size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
Yann Collet5a0c8e22016-08-12 01:20:36 +02004017{
Yann Collet18ab5af2017-05-31 09:59:22 -07004018 ZSTD_inBuffer input = { NULL, 0, 0 };
Yann Collet01b15492017-05-30 18:10:26 -07004019 if (output->pos > output->size) return ERROR(GENERIC);
4020 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) );
Yann Collet2cb97742017-07-04 12:39:26 -07004021 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
4022 size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
4023 size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize;
4024 DEBUGLOG(5, "ZSTD_endStream : remaining to flush : %u",
4025 (unsigned)toFlush);
4026 return toFlush;
4027 }
Yann Collet5a0c8e22016-08-12 01:20:36 +02004028}
4029
4030
Yann Collet70e8c382016-02-10 13:37:52 +01004031/*-===== Pre-defined compression levels =====-*/
Yann Colletfd416f12016-01-30 03:14:15 +01004032
inikep2c5eeea2016-04-15 13:44:46 +02004033#define ZSTD_MAX_CLEVEL 22
Yann Collet41105342016-07-27 15:09:11 +02004034int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
Yann Collet7d968c72016-02-03 02:11:32 +01004035
Yann Collet3b719252016-03-30 19:48:05 +02004036static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
Yann Colletadbe74a2017-06-28 13:22:37 -07004037{ /* "default" - guarantees a monotonically increasing memory budget */
Yann Collet793c6492016-04-09 20:32:00 +02004038 /* W, C, H, S, L, TL, strat */
Yann Collete19a9ef2016-08-26 20:02:49 +02004039 { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
Yann Collet3c242e72016-07-13 14:56:24 +02004040 { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
4041 { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
Yann Colletadbe74a2017-06-28 13:22:37 -07004042 { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3 */
4043 { 20, 17, 18, 1, 5, 16, ZSTD_dfast }, /* level 4 */
4044 { 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */
4045 { 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
4046 { 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */
Yann Collete19a9ef2016-08-26 20:02:49 +02004047 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
Yann Colletadbe74a2017-06-28 13:22:37 -07004048 { 21, 19, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
Yann Collet3c242e72016-07-13 14:56:24 +02004049 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
4050 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
4051 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
4052 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
4053 { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
Yann Colletadbe74a2017-06-28 13:22:37 -07004054 { 22, 21, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
Yann Collet3c242e72016-07-13 14:56:24 +02004055 { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
Yann Colletadbe74a2017-06-28 13:22:37 -07004056 { 23, 22, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
Yann Collet29297c62017-04-27 17:44:01 -07004057 { 23, 22, 22, 5, 4, 32, ZSTD_btopt }, /* level 18 */
Yann Collete19a9ef2016-08-26 20:02:49 +02004058 { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
Nick Terrell374f8682017-05-10 17:48:42 -07004059 { 25, 25, 23, 7, 3, 64, ZSTD_btultra }, /* level 20 */
Yann Colletadbe74a2017-06-28 13:22:37 -07004060 { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */
Nick Terrell374f8682017-05-10 17:48:42 -07004061 { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */
Yann Colletfd416f12016-01-30 03:14:15 +01004062},
4063{ /* for srcSize <= 256 KB */
Yann Collet3b719252016-03-30 19:48:05 +02004064 /* W, C, H, S, L, T, strat */
Yann Collete19a9ef2016-08-26 20:02:49 +02004065 { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
Yann Colleta2cdffe2016-08-24 19:42:15 +02004066 { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
Yann Collet24b68a52016-08-24 14:22:26 +02004067 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
4068 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
4069 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
4070 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
4071 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
4072 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
4073 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4074 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4075 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
4076 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
4077 { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
4078 { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
Yann Collet78267d12016-04-08 12:36:19 +02004079 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
Yann Collet24b68a52016-08-24 14:22:26 +02004080 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
4081 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
4082 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
Yann Collet78267d12016-04-08 12:36:19 +02004083 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
4084 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
Nick Terrell374f8682017-05-10 17:48:42 -07004085 { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/
4086 { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/
4087 { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/
Yann Colletfd416f12016-01-30 03:14:15 +01004088},
4089{ /* for srcSize <= 128 KB */
Yann Collet3b719252016-03-30 19:48:05 +02004090 /* W, C, H, S, L, T, strat */
Yann Collet5894ea82016-07-22 14:36:46 +02004091 { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
4092 { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
4093 { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
4094 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
4095 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
4096 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
4097 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
4098 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
4099 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4100 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4101 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
4102 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
4103 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
4104 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
Yann Collet3b719252016-03-30 19:48:05 +02004105 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
4106 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
4107 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
4108 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
4109 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
4110 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
Nick Terrell374f8682017-05-10 17:48:42 -07004111 { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/
4112 { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/
4113 { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/
Yann Colletfd416f12016-01-30 03:14:15 +01004114},
4115{ /* for srcSize <= 16 KB */
Yann Collet3b719252016-03-30 19:48:05 +02004116 /* W, C, H, S, L, T, strat */
Yann Collet2b1a3632016-07-13 15:16:00 +02004117 { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
Yann Collete557fd52016-07-17 16:21:37 +02004118 { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
Yann Collet2b1a3632016-07-13 15:16:00 +02004119 { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
4120 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
4121 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
4122 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
4123 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
4124 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
4125 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
4126 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
Yann Collet3b719252016-03-30 19:48:05 +02004127 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
4128 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
4129 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
4130 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
4131 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
4132 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
4133 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
4134 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
4135 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
4136 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
Nick Terrell374f8682017-05-10 17:48:42 -07004137 { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
4138 { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
4139 { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/
Yann Colletfd416f12016-01-30 03:14:15 +01004140},
4141};
4142
Yann Collet03746622017-06-28 20:17:22 -07004143#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
Yann Collet81353512017-06-28 15:34:56 -07004144/* This function just controls
4145 * the monotonic memory budget increase of ZSTD_defaultCParameters[0].
Yann Collet2cb97742017-07-04 12:39:26 -07004146 * Run once, on first ZSTD_getCParams() usage, if ZSTD_DEBUG is enabled
Yann Collet81353512017-06-28 15:34:56 -07004147 */
4148MEM_STATIC void ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget(void)
4149{
Yann Collet03746622017-06-28 20:17:22 -07004150 int level;
4151 for (level=1; level<ZSTD_maxCLevel(); level++) {
4152 ZSTD_compressionParameters const c1 = ZSTD_defaultCParameters[0][level];
4153 ZSTD_compressionParameters const c2 = ZSTD_defaultCParameters[0][level+1];
Yann Collet03746622017-06-28 20:17:22 -07004154 assert(c1.windowLog <= c2.windowLog);
4155# define ZSTD_TABLECOST(h,c) ((1<<(h)) + (1<<(c)))
4156 assert(ZSTD_TABLECOST(c1.hashLog, c1.chainLog) <= ZSTD_TABLECOST(c2.hashLog, c2.chainLog));
Yann Collet81353512017-06-28 15:34:56 -07004157 }
Yann Collet81353512017-06-28 15:34:56 -07004158}
Yann Collet03746622017-06-28 20:17:22 -07004159#endif
Yann Collet81353512017-06-28 15:34:56 -07004160
Yann Collet236d94f2016-05-18 12:06:33 +02004161/*! ZSTD_getCParams() :
4162* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
4163* Size values are optional, provide 0 if not known or unused */
Yann Collet009d6042017-05-19 10:17:59 -07004164ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
Yann Colletfd416f12016-01-30 03:14:15 +01004165{
Yann Collet009d6042017-05-19 10:17:59 -07004166 size_t const addedSize = srcSizeHint ? 0 : 500;
4167 U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
Yann Collet3b719252016-03-30 19:48:05 +02004168 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
Yann Collet81353512017-06-28 15:34:56 -07004169
4170#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
4171 static int g_monotonicTest = 1;
4172 if (g_monotonicTest) {
4173 ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget();
4174 g_monotonicTest=0;
4175 }
4176#endif
4177
Yann Collet6d4fef32017-05-17 18:36:15 -07004178 if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */
Yann Colletfd416f12016-01-30 03:14:15 +01004179 if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
Yann Collete51d51b2017-06-20 17:44:55 -07004180 { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel];
4181 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
Yann Colletfd416f12016-01-30 03:14:15 +01004182}
Yann Collet3d2cd7f2016-06-27 15:12:26 +02004183
4184/*! ZSTD_getParams() :
Yann Colleta43a8542016-07-12 13:42:10 +02004185* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
Yann Collet3d2cd7f2016-06-27 15:12:26 +02004186* All fields of `ZSTD_frameParameters` are set to default (0) */
Yann Collet009d6042017-05-19 10:17:59 -07004187ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
Yann Collet3d2cd7f2016-06-27 15:12:26 +02004188 ZSTD_parameters params;
Yann Collet009d6042017-05-19 10:17:59 -07004189 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
Yann Collet3d2cd7f2016-06-27 15:12:26 +02004190 memset(&params, 0, sizeof(params));
4191 params.cParams = cParams;
4192 return params;
4193}