blob: 1d4f4f5a1cb7eea80bec38ba0649bae42148b7b2 [file] [log] [blame]
Yann Collet4ded9e52016-08-30 10:04:33 -07001/**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
Yann Colletf3eca252015-10-22 15:31:46 +01009
Yann Colletf3eca252015-10-22 15:31:46 +010010
Yann Collet7d360282016-02-12 00:07:30 +010011/*-*************************************
Yann Colletb0edb7f2017-05-12 15:31:53 -070012* Tuning parameters
13***************************************/
Yann Collet6d4fef32017-05-17 18:36:15 -070014#ifndef ZSTD_CLEVEL_DEFAULT
15# define ZSTD_CLEVEL_DEFAULT 3
Yann Colletb0edb7f2017-05-12 15:31:53 -070016#endif
17
18/*-*************************************
Yann Colletae7aa062016-02-03 02:46:46 +010019* Dependencies
Yann Colletf3eca252015-10-22 15:31:46 +010020***************************************/
Yann Colletd3b7f8d2016-06-04 19:47:02 +020021#include <string.h> /* memset */
Yann Collet14983e72015-11-11 21:38:21 +010022#include "mem.h"
Yann Collet5a0c8e22016-08-12 01:20:36 +020023#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
Yann Colletd0e2cd12016-06-05 00:58:01 +020024#include "fse.h"
Yann Collet130fe112016-06-05 00:42:28 +020025#define HUF_STATIC_LINKING_ONLY
26#include "huf.h"
Yann Colletd3b7f8d2016-06-04 19:47:02 +020027#include "zstd_internal.h" /* includes zstd.h */
Yann Colletc4a5a212017-06-01 17:56:14 -070028#include "zstdmt_compress.h"
Yann Colletf3eca252015-10-22 15:31:46 +010029
30
Yann Collet7d360282016-02-12 00:07:30 +010031/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +010032* Constants
Yann Colletf3eca252015-10-22 15:31:46 +010033***************************************/
Yann Colletbb604482016-03-19 15:18:42 +010034static const U32 g_searchStrength = 8; /* control skip over incompressible data */
Yann Collet731ef162016-07-27 21:05:12 +020035#define HASH_READ_SIZE 8
36typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
Yann Colletf3eca252015-10-22 15:31:46 +010037
Yann Collet71ddeb62017-04-20 22:54:54 -070038/* entropy tables always have same size */
39static size_t const hufCTable_size = HUF_CTABLE_SIZE(255);
Yann Collete42afbc2017-04-26 11:39:35 -070040static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL);
41static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff);
42static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML);
Yann Collet72712032017-04-20 23:21:19 -070043static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE;
Yann Colleta34a39c2017-04-20 18:17:58 -070044
Yann Colletf3eca252015-10-22 15:31:46 +010045
Yann Collet7d360282016-02-12 00:07:30 +010046/*-*************************************
Yann Collet59d1f792016-01-23 19:28:41 +010047* Helper functions
48***************************************/
Yann Collet3f75d522017-03-31 17:11:38 -070049size_t ZSTD_compressBound(size_t srcSize) {
Yann Collet30c76982017-03-31 18:27:03 -070050 size_t const lowLimit = 256 KB;
51 size_t const margin = (srcSize < lowLimit) ? (lowLimit-srcSize) >> 12 : 0; /* from 64 to 0 */
Yann Collet3f75d522017-03-31 17:11:38 -070052 return srcSize + (srcSize >> 8) + margin;
53}
Yann Collet59d1f792016-01-23 19:28:41 +010054
55
Yann Collet7d360282016-02-12 00:07:30 +010056/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +010057* Sequence storage
Yann Colletf3eca252015-10-22 15:31:46 +010058***************************************/
Yann Collet14983e72015-11-11 21:38:21 +010059static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
60{
Yann Collet14983e72015-11-11 21:38:21 +010061 ssPtr->lit = ssPtr->litStart;
Yann Colletc0ce4f12016-07-30 00:55:13 +020062 ssPtr->sequences = ssPtr->sequencesStart;
Yann Collet5d393572016-04-07 17:19:00 +020063 ssPtr->longLengthID = 0;
Yann Collet14983e72015-11-11 21:38:21 +010064}
65
66
Yann Collet7d360282016-02-12 00:07:30 +010067/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +010068* Context memory management
69***************************************/
Yann Collet6d4fef32017-05-17 18:36:15 -070070typedef enum { zcss_init=0, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
Yann Collet0be6fd32017-05-08 16:08:01 -070071
Yann Collet18803372017-05-22 18:21:51 -070072struct ZSTD_CDict_s {
73 void* dictBuffer;
74 const void* dictContent;
75 size_t dictContentSize;
76 ZSTD_CCtx* refContext;
Yann Collet8c910d22017-06-03 01:15:02 -070077}; /* typedef'd to ZSTD_CDict within "zstd.h" */
Yann Collet18803372017-05-22 18:21:51 -070078
Yann Colletaca113f2016-12-23 22:25:03 +010079struct ZSTD_CCtx_s {
Yann Collet89db5e02015-11-13 11:27:46 +010080 const BYTE* nextSrc; /* next block here to continue on current prefix */
Yann Colleteeb8ba12015-10-22 16:55:40 +010081 const BYTE* base; /* All regular indexes relative to this position */
82 const BYTE* dictBase; /* extDict indexes relative to this position */
Yann Colletf3eca252015-10-22 15:31:46 +010083 U32 dictLimit; /* below that point, need extDict */
Yann Colleteeb8ba12015-10-22 16:55:40 +010084 U32 lowLimit; /* below that point, no more data */
Yann Colletf3eca252015-10-22 15:31:46 +010085 U32 nextToUpdate; /* index from which to continue dictionary update */
inikepcc52a972016-02-19 10:09:35 +010086 U32 nextToUpdate3; /* index from which to continue dictionary update */
inikep7adceef2016-03-23 15:53:38 +010087 U32 hashLog3; /* dispatch table : larger == faster, more memory */
Yann Colletbb002742017-01-25 16:25:38 -080088 U32 loadedDictEnd; /* index of end of dictionary */
89 U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
Yann Collet14312d82017-02-23 23:42:12 -080090 U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
Yann Collet731ef162016-07-27 21:05:12 +020091 ZSTD_compressionStage_e stage;
Yann Collet4266c0a2016-06-14 01:49:25 +020092 U32 rep[ZSTD_REP_NUM];
Yann Colletb459aad2017-01-19 17:33:37 -080093 U32 repToConfirm[ZSTD_REP_NUM];
Yann Colletc46fb922016-05-29 05:01:04 +020094 U32 dictID;
Yann Colletb0edb7f2017-05-12 15:31:53 -070095 int compressionLevel;
Yann Collet1ad7c822017-05-22 17:06:04 -070096 ZSTD_parameters requestedParams;
97 ZSTD_parameters appliedParams;
Yann Collet712def92015-10-29 18:41:45 +010098 void* workSpace;
99 size_t workSpaceSize;
Yann Collet120230b2015-12-02 14:00:45 +0100100 size_t blockSize;
Yann Collet673f0d72016-06-06 00:26:38 +0200101 U64 frameContentSize;
Yann Collet20d5e032017-04-11 18:34:02 -0700102 U64 consumedSrcSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +0200103 XXH64_state_t xxhState;
inikep28669512016-06-02 13:04:18 +0200104 ZSTD_customMem customMem;
Yann Colletc7fe2622017-05-23 13:16:00 -0700105 size_t staticSize;
Yann Colletecd651b2016-01-07 15:35:18 +0100106
Yann Collet712def92015-10-29 18:41:45 +0100107 seqStore_t seqStore; /* sequences storage ptrs */
Yann Collet083fcc82015-10-25 14:06:35 +0100108 U32* hashTable;
inikepcc52a972016-02-19 10:09:35 +0100109 U32* hashTable3;
Yann Collet8a57b922016-04-04 13:49:18 +0200110 U32* chainTable;
Yann Collet71ddeb62017-04-20 22:54:54 -0700111 HUF_repeat hufCTable_repeatMode;
112 HUF_CElt* hufCTable;
113 U32 fseCTables_ready;
Yann Collet71aaa322017-04-20 23:03:38 -0700114 FSE_CTable* offcodeCTable;
115 FSE_CTable* matchlengthCTable;
116 FSE_CTable* litlengthCTable;
Yann Collete42afbc2017-04-26 11:39:35 -0700117 unsigned* entropyScratchSpace;
Yann Collet0be6fd32017-05-08 16:08:01 -0700118
119 /* streaming */
120 ZSTD_CDict* cdictLocal;
121 const ZSTD_CDict* cdict;
122 char* inBuff;
123 size_t inBuffSize;
124 size_t inToCompress;
125 size_t inBuffPos;
126 size_t inBuffTarget;
127 char* outBuff;
128 size_t outBuffSize;
129 size_t outBuffContentSize;
130 size_t outBuffFlushedSize;
131 ZSTD_cStreamStage streamStage;
132 U32 frameEnded;
Yann Colletc4a5a212017-06-01 17:56:14 -0700133
134 /* Multi-threading */
Yann Colletc35e5352017-06-01 18:44:06 -0700135 U32 nbThreads;
Yann Colletc4a5a212017-06-01 17:56:14 -0700136 ZSTDMT_CCtx* mtctx;
Yann Colletf3eca252015-10-22 15:31:46 +0100137};
138
Yann Colletc4a5a212017-06-01 17:56:14 -0700139
Yann Collet5be2dd22015-11-11 13:43:58 +0100140ZSTD_CCtx* ZSTD_createCCtx(void)
Yann Colletf3eca252015-10-22 15:31:46 +0100141{
Yann Colletae728a42017-05-30 17:11:39 -0700142 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
inikep50e82c02016-05-23 15:49:09 +0200143}
144
145ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
146{
Yann Collet69c2cdb2016-07-14 16:52:45 +0200147 ZSTD_CCtx* cctx;
inikep50e82c02016-05-23 15:49:09 +0200148
Yann Colletae728a42017-05-30 17:11:39 -0700149 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
inikep107e2432016-05-23 16:24:52 +0200150
Yann Colletc4f46b92017-05-30 17:45:37 -0700151 cctx = (ZSTD_CCtx*) ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
Yann Collet69c2cdb2016-07-14 16:52:45 +0200152 if (!cctx) return NULL;
Yann Colletbb002742017-01-25 16:25:38 -0800153 cctx->customMem = customMem;
Yann Collet6d4fef32017-05-17 18:36:15 -0700154 cctx->compressionLevel = ZSTD_CLEVEL_DEFAULT;
Yann Collet69c2cdb2016-07-14 16:52:45 +0200155 return cctx;
Yann Colletf3eca252015-10-22 15:31:46 +0100156}
157
Yann Colletc7fe2622017-05-23 13:16:00 -0700158ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
159{
160 ZSTD_CCtx* cctx = (ZSTD_CCtx*) workspace;
161 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
162 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
163 memset(workspace, 0, workspaceSize);
164 cctx->staticSize = workspaceSize;
165 cctx->workSpace = (void*)(cctx+1);
166 cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
167
168 /* entropy space (never moves) */
169 /* note : this code should be shared with resetCCtx, instead of copied */
170 { void* ptr = cctx->workSpace;
171 cctx->hufCTable = (HUF_CElt*)ptr;
Yann Collet0fdc71c2017-05-24 17:41:41 -0700172 ptr = (char*)cctx->hufCTable + hufCTable_size;
Yann Colletc7fe2622017-05-23 13:16:00 -0700173 cctx->offcodeCTable = (FSE_CTable*) ptr;
174 ptr = (char*)ptr + offcodeCTable_size;
175 cctx->matchlengthCTable = (FSE_CTable*) ptr;
176 ptr = (char*)ptr + matchlengthCTable_size;
177 cctx->litlengthCTable = (FSE_CTable*) ptr;
178 ptr = (char*)ptr + litlengthCTable_size;
179 assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */
180 cctx->entropyScratchSpace = (unsigned*) ptr;
181 }
182
183 return cctx;
184}
185
Yann Collet5be2dd22015-11-11 13:43:58 +0100186size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
Yann Colletf3eca252015-10-22 15:31:46 +0100187{
inikep36403962016-06-03 16:36:50 +0200188 if (cctx==NULL) return 0; /* support free on NULL */
Yann Colletc4a5a212017-06-01 17:56:14 -0700189 if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
Yann Collet23b6e052016-08-28 21:05:43 -0700190 ZSTD_free(cctx->workSpace, cctx->customMem);
Yann Collet78553662017-05-08 17:15:00 -0700191 cctx->workSpace = NULL;
192 ZSTD_freeCDict(cctx->cdictLocal);
193 cctx->cdictLocal = NULL;
Yann Colletc4a5a212017-06-01 17:56:14 -0700194 ZSTDMT_freeCCtx(cctx->mtctx);
195 cctx->mtctx = NULL;
Yann Collet23b6e052016-08-28 21:05:43 -0700196 ZSTD_free(cctx, cctx->customMem);
Yann Collet982ffc72016-02-05 02:33:10 +0100197 return 0; /* reserved as a potential error code in the future */
Yann Collet083fcc82015-10-25 14:06:35 +0100198}
199
Yann Collet70e3b312016-08-23 01:18:06 +0200200size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
Yann Collet3ae543c2016-07-11 03:12:17 +0200201{
Yann Colletd7c65892016-09-15 02:50:27 +0200202 if (cctx==NULL) return 0; /* support sizeof on NULL */
Yann Collet791d7442017-05-08 16:17:30 -0700203 return sizeof(*cctx) + cctx->workSpaceSize
204 + ZSTD_sizeof_CDict(cctx->cdictLocal)
Yann Colletc4a5a212017-06-01 17:56:14 -0700205 + cctx->outBuffSize + cctx->inBuffSize
206 + ZSTDMT_sizeof_CCtx(cctx->mtctx);
Yann Collet3ae543c2016-07-11 03:12:17 +0200207}
208
Yann Collet009d6042017-05-19 10:17:59 -0700209size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
210{
211 return ZSTD_sizeof_CCtx(zcs); /* same object */
212}
213
Yann Colletb0edb7f2017-05-12 15:31:53 -0700214/* private API call, for dictBuilder only */
215const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
216
Yann Collet1ad7c822017-05-22 17:06:04 -0700217static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx) { return cctx->appliedParams; }
Yann Colletb0edb7f2017-05-12 15:31:53 -0700218
Yann Colletef738c12017-05-12 13:53:46 -0700219/* older variant; will be deprecated */
Yann Colletbb002742017-01-25 16:25:38 -0800220size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
221{
222 switch(param)
223 {
Yann Collet06e76972017-01-25 16:39:03 -0800224 case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
Yann Collet14312d82017-02-23 23:42:12 -0800225 case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
Yann Colletbb002742017-01-25 16:25:38 -0800226 default: return ERROR(parameter_unknown);
227 }
228}
229
Yann Colletadd66f82017-05-12 15:59:48 -0700230
Yann Collet6d4fef32017-05-17 18:36:15 -0700231#define ZSTD_CLEVEL_CUSTOM 999
Yann Colletadd66f82017-05-12 15:59:48 -0700232static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
233{
Yann Collet1ad7c822017-05-22 17:06:04 -0700234 if (cctx->compressionLevel==ZSTD_CLEVEL_CUSTOM) return;
235 cctx->requestedParams.cParams = ZSTD_getCParams(cctx->compressionLevel,
236 cctx->frameContentSize, 0);
Yann Collet6d4fef32017-05-17 18:36:15 -0700237 cctx->compressionLevel = ZSTD_CLEVEL_CUSTOM;
Yann Colletadd66f82017-05-12 15:59:48 -0700238}
239
Yann Colletb0edb7f2017-05-12 15:31:53 -0700240size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
Yann Collet7d360282016-02-12 00:07:30 +0100241{
Yann Collet334a2882017-05-19 11:04:41 -0700242# define CLAMPCHECK(val,min,max) { \
243 if ((val<min) | (val>max)) { \
244 return ERROR(compressionParameter_unsupported); \
245 } }
246
Yann Collet24de7b02017-05-22 13:05:45 -0700247 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700248
249 switch(param)
250 {
251 case ZSTD_p_compressionLevel :
Yann Colletcd2892f2017-06-01 09:44:54 -0700252 if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel(); /* cap max compression level */
253 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
254 cctx->compressionLevel = value;
255 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700256
257 case ZSTD_p_windowLog :
Yann Colletcd2892f2017-06-01 09:44:54 -0700258 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
259 CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
260 ZSTD_cLevelToCParams(cctx);
261 cctx->requestedParams.cParams.windowLog = value;
262 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700263
264 case ZSTD_p_hashLog :
Yann Colletcd2892f2017-06-01 09:44:54 -0700265 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
266 CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
267 ZSTD_cLevelToCParams(cctx);
268 cctx->requestedParams.cParams.hashLog = value;
269 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700270
271 case ZSTD_p_chainLog :
Yann Colletcd2892f2017-06-01 09:44:54 -0700272 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
273 CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
274 ZSTD_cLevelToCParams(cctx);
275 cctx->requestedParams.cParams.chainLog = value;
276 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700277
278 case ZSTD_p_searchLog :
Yann Colletcd2892f2017-06-01 09:44:54 -0700279 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
280 CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
281 ZSTD_cLevelToCParams(cctx);
282 cctx->requestedParams.cParams.searchLog = value;
283 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700284
Yann Collet6d4fef32017-05-17 18:36:15 -0700285 case ZSTD_p_minMatch :
Yann Colletcd2892f2017-06-01 09:44:54 -0700286 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
287 CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
288 ZSTD_cLevelToCParams(cctx);
289 cctx->requestedParams.cParams.searchLength = value;
290 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700291
292 case ZSTD_p_targetLength :
Yann Colletcd2892f2017-06-01 09:44:54 -0700293 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
294 CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
295 ZSTD_cLevelToCParams(cctx);
296 cctx->requestedParams.cParams.targetLength = value;
297 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700298
299 case ZSTD_p_compressionStrategy :
Yann Colletcd2892f2017-06-01 09:44:54 -0700300 if (value == 0) return 0; /* special value : 0 means "don't change anything" */
301 CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
302 ZSTD_cLevelToCParams(cctx);
303 cctx->requestedParams.cParams.strategy = (ZSTD_strategy)value;
304 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700305
306#if 0
307 case ZSTD_p_windowSize : /* to be done later */
Yann Colletcd2892f2017-06-01 09:44:54 -0700308 return ERROR(compressionParameter_unsupported);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700309#endif
310
Yann Colletcd2892f2017-06-01 09:44:54 -0700311 case ZSTD_p_contentSizeFlag :
312 /* Content size written in frame header _when known_ (default:1) */
313 cctx->requestedParams.fParams.contentSizeFlag = value>0;
314 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700315
Yann Colletcd2892f2017-06-01 09:44:54 -0700316 case ZSTD_p_checksumFlag :
317 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
318 cctx->requestedParams.fParams.checksumFlag = value>0;
319 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700320
Yann Collet1ad7c822017-05-22 17:06:04 -0700321 case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
Yann Colletcd2892f2017-06-01 09:44:54 -0700322 DEBUGLOG(5, "set dictIDFlag = %u", (value>0));
323 cctx->requestedParams.fParams.noDictIDFlag = (value==0);
324 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700325
326 case ZSTD_p_refDictContent : /* to be done later */
Yann Colletcd2892f2017-06-01 09:44:54 -0700327 return ERROR(compressionParameter_unsupported);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700328
329 case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize,
330 * even when referencing into Dictionary content
331 * default : 0 when using a CDict, 1 when using a Prefix */
Yann Colletc35e5352017-06-01 18:44:06 -0700332 cctx->forceWindow = value>0;
333 cctx->loadedDictEnd = 0;
334 return 0;
335
336 case ZSTD_p_nbThreads:
337 if (value==0) return 0;
338#ifndef ZSTD_MULTITHREAD
339 if (value > 1) return ERROR(compressionParameter_unsupported);
340#endif
341 if ((value>1) && (cctx->nbThreads != value)) {
Yann Collet05ae4b22017-06-15 18:03:34 -0700342 if (cctx->staticSize) /* MT not compatible with static alloc */
343 return ERROR(compressionParameter_unsupported);
Yann Colletc35e5352017-06-01 18:44:06 -0700344 ZSTDMT_freeCCtx(cctx->mtctx);
345 cctx->nbThreads = value;
346 cctx->mtctx = ZSTDMT_createCCtx(value);
347 if (cctx->mtctx == NULL) return ERROR(memory_allocation);
348 }
349 cctx->nbThreads = 1;
350 return 0;
351
352 case ZSTDMT_p_jobSize:
353 if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
354 assert(cctx->mtctx != NULL);
355 return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_sectionSize, value);
356
357 case ZSTDMT_p_overlapSizeLog:
358 if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported);
359 assert(cctx->mtctx != NULL);
360 return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700361
362 case ZSTD_p_rawContentDict : /* load dictionary in "content-only" mode (no header analysis) (default:0) */
Yann Colletcd2892f2017-06-01 09:44:54 -0700363 cctx->forceRawDict = value>0;
364 return 0;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700365
366 default: return ERROR(parameter_unknown);
367 }
Yann Collet7d360282016-02-12 00:07:30 +0100368}
369
Yann Colletb0edb7f2017-05-12 15:31:53 -0700370ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
Yann Collet95162342016-10-25 16:19:52 -0700371{
Yann Collet24de7b02017-05-22 13:05:45 -0700372 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700373 cctx->frameContentSize = pledgedSrcSize;
374 return 0;
375}
376
Yann Collet6d4fef32017-05-17 18:36:15 -0700377ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
378{
Yann Collet24de7b02017-05-22 13:05:45 -0700379 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
Yann Colletc7fe2622017-05-23 13:16:00 -0700380 if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */
Yann Collet6d4fef32017-05-17 18:36:15 -0700381 ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */
382 if (dict==NULL || dictSize==0) { /* no dictionary mode */
383 cctx->cdictLocal = NULL;
384 cctx->cdict = NULL;
385 } else {
Yann Collet8b21ec42017-05-19 19:46:15 -0700386 ZSTD_compressionParameters const cParams =
387 cctx->compressionLevel == ZSTD_CLEVEL_CUSTOM ?
Yann Collet1ad7c822017-05-22 17:06:04 -0700388 cctx->requestedParams.cParams :
Yann Collet8b21ec42017-05-19 19:46:15 -0700389 ZSTD_getCParams(cctx->compressionLevel, 0, dictSize);
Yann Collet6d4fef32017-05-17 18:36:15 -0700390 cctx->cdictLocal = ZSTD_createCDict_advanced(
391 dict, dictSize,
392 0 /* byReference */,
Yann Collet8b21ec42017-05-19 19:46:15 -0700393 cParams, cctx->customMem);
Yann Collet6d4fef32017-05-17 18:36:15 -0700394 cctx->cdict = cctx->cdictLocal;
395 if (cctx->cdictLocal == NULL)
396 return ERROR(memory_allocation);
397 }
398 return 0;
399}
400
Yann Colletb0edb7f2017-05-12 15:31:53 -0700401/* Not ready yet ! */
402ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
403{
404 (void)cctx; (void)prefix; (void)prefixSize; /* to be done later */
Yann Collet24de7b02017-05-22 13:05:45 -0700405 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
Yann Colletb0edb7f2017-05-12 15:31:53 -0700406 return ERROR(compressionParameter_unsupported);
407}
408
Yann Colletb0edb7f2017-05-12 15:31:53 -0700409ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
410{
Yann Collet24de7b02017-05-22 13:05:45 -0700411 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
412 cctx->cdict = cdict;
Yann Colletb0edb7f2017-05-12 15:31:53 -0700413 return ERROR(compressionParameter_unsupported);
Yann Collet95162342016-10-25 16:19:52 -0700414}
415
Yann Collet59d70632015-11-04 12:05:27 +0100416
Yann Collet21588e32016-03-30 16:50:44 +0200417/** ZSTD_checkParams() :
418 ensure param values remain within authorized range.
419 @return : 0, or an error code if one value is beyond authorized range */
Yann Collet3b719252016-03-30 19:48:05 +0200420size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
Yann Collet21588e32016-03-30 16:50:44 +0200421{
Yann Collet15354142016-04-04 04:22:53 +0200422 CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
Yann Collet8a57b922016-04-04 13:49:18 +0200423 CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
Yann Collet3b719252016-03-30 19:48:05 +0200424 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
425 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
Yann Collet2e2e78d2017-03-29 16:02:47 -0700426 CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
Yann Collet3b719252016-03-30 19:48:05 +0200427 CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
Nick Terrelleeb31ee2017-03-09 11:44:25 -0800428 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) return ERROR(compressionParameter_unsupported);
Yann Collet21588e32016-03-30 16:50:44 +0200429 return 0;
430}
431
Yann Colletc3a5c4b2016-12-12 00:47:30 +0100432/** ZSTD_cycleLog() :
433 * condition for correct operation : hashLog > 1 */
434static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
435{
436 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
437 return hashLog - btScale;
438}
439
Yann Collet70d13012016-06-01 18:45:34 +0200440/** ZSTD_adjustCParams() :
Yann Colletcf409a72016-09-26 16:41:05 +0200441 optimize `cPar` for a given input (`srcSize` and `dictSize`).
Yann Collet21588e32016-03-30 16:50:44 +0200442 mostly downsizing to reduce memory consumption and initialization.
443 Both `srcSize` and `dictSize` are optional (use 0 if unknown),
444 but if both are 0, no optimization can be done.
Yann Collet70d13012016-06-01 18:45:34 +0200445 Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
Yann Collet52c04fe2016-07-07 11:53:18 +0200446ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
Yann Collet59d70632015-11-04 12:05:27 +0100447{
Yann Collet70d13012016-06-01 18:45:34 +0200448 if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */
Yann Collet59d70632015-11-04 12:05:27 +0100449
Yann Collet70e45772016-03-19 18:08:32 +0100450 /* resize params, to use less memory when necessary */
Yann Colletdd6466a2016-03-30 20:06:26 +0200451 { U32 const minSrcSize = (srcSize==0) ? 500 : 0;
452 U64 const rSize = srcSize + dictSize + minSrcSize;
Yann Colletb59bf962016-04-04 14:53:16 +0200453 if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
Yann Colletcf409a72016-09-26 16:41:05 +0200454 U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
Yann Collet70d13012016-06-01 18:45:34 +0200455 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
Yann Collet21588e32016-03-30 16:50:44 +0200456 } }
Yann Collet70d13012016-06-01 18:45:34 +0200457 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
Yann Colletc3a5c4b2016-12-12 00:47:30 +0100458 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
459 if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
460 }
Yann Colletc6eea2b2016-03-19 17:18:00 +0100461
Yann Collet70d13012016-06-01 18:45:34 +0200462 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
Yann Collet70d13012016-06-01 18:45:34 +0200463
464 return cPar;
Yann Collet59d70632015-11-04 12:05:27 +0100465}
466
467
Yann Collet88472382016-07-14 17:05:38 +0200468size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
Yann Collete74215e2016-03-19 16:09:09 +0100469{
Yann Colletfa3671e2017-05-19 10:51:30 -0700470 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
Yann Collet731ef162016-07-27 21:05:12 +0200471 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
472 size_t const maxNbSeq = blockSize / divider;
473 size_t const tokenSpace = blockSize + 11*maxNbSeq;
Yann Collet3ae543c2016-07-11 03:12:17 +0200474
Yann Collet731ef162016-07-27 21:05:12 +0200475 size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
476 size_t const hSize = ((size_t)1) << cParams.hashLog;
477 U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
478 size_t const h3Size = ((size_t)1) << hashLog3;
Yann Collet71ddeb62017-04-20 22:54:54 -0700479 size_t const entropySpace = hufCTable_size + litlengthCTable_size
Yann Colleta4086452017-04-20 23:09:39 -0700480 + offcodeCTable_size + matchlengthCTable_size
Yann Collet72712032017-04-20 23:21:19 -0700481 + entropyScratchSpace_size;
Yann Collet731ef162016-07-27 21:05:12 +0200482 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
Yann Collet3ae543c2016-07-11 03:12:17 +0200483
Yann Colletfc514592017-05-08 17:07:59 -0700484 size_t const optBudget = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
Yann Collet3ae543c2016-07-11 03:12:17 +0200485 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
Nick Terrell5f2c7212017-05-10 16:49:58 -0700486 size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
Yann Colletfc514592017-05-08 17:07:59 -0700487 size_t const neededSpace = entropySpace + tableSpace + tokenSpace + optSpace;
Yann Collet3ae543c2016-07-11 03:12:17 +0200488
489 return sizeof(ZSTD_CCtx) + neededSpace;
Yann Collet2e91dde2016-03-08 12:22:11 +0100490}
491
Yann Colletc7fe2622017-05-23 13:16:00 -0700492size_t ZSTD_estimateCStreamSize(ZSTD_compressionParameters cParams)
493{
494 size_t const CCtxSize = ZSTD_estimateCCtxSize(cParams);
495 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
496 size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
497 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
498 size_t const streamingSize = inBuffSize + outBuffSize;
499
500 return CCtxSize + streamingSize;
501}
502
Yann Colleta7737f62016-09-06 09:44:59 +0200503
Yann Collet009d6042017-05-19 10:17:59 -0700504static U32 ZSTD_equivalentParams(ZSTD_compressionParameters cParams1,
505 ZSTD_compressionParameters cParams2)
Yann Colleta7737f62016-09-06 09:44:59 +0200506{
Yann Colletfa3671e2017-05-19 10:51:30 -0700507 U32 bslog1 = MIN(cParams1.windowLog, ZSTD_BLOCKSIZELOG_MAX);
508 U32 bslog2 = MIN(cParams2.windowLog, ZSTD_BLOCKSIZELOG_MAX);
Yann Collet009d6042017-05-19 10:17:59 -0700509 return (bslog1 == bslog2) /* same block size */
510 & (cParams1.hashLog == cParams2.hashLog)
511 & (cParams1.chainLog == cParams2.chainLog)
512 & (cParams1.strategy == cParams2.strategy) /* opt parser space */
513 & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */
Yann Colleta7737f62016-09-06 09:44:59 +0200514}
515
516/*! ZSTD_continueCCtx() :
517 reuse CCtx without reset (note : requires no dictionary) */
518static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize)
519{
520 U32 const end = (U32)(cctx->nextSrc - cctx->base);
Yann Collet1ad7c822017-05-22 17:06:04 -0700521 cctx->appliedParams = params;
Yann Colleta7737f62016-09-06 09:44:59 +0200522 cctx->frameContentSize = frameContentSize;
Yann Collet20d5e032017-04-11 18:34:02 -0700523 cctx->consumedSrcSize = 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200524 cctx->lowLimit = end;
525 cctx->dictLimit = end;
526 cctx->nextToUpdate = end+1;
527 cctx->stage = ZSTDcs_init;
528 cctx->dictID = 0;
529 cctx->loadedDictEnd = 0;
530 { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; }
Yann Colletb6249222016-09-06 09:54:22 +0200531 cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
532 XXH64_reset(&cctx->xxhState, 0);
Yann Colleta7737f62016-09-06 09:44:59 +0200533 return 0;
534}
535
Yann Colletb0739bc2017-05-22 17:45:15 -0700536typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
Yann Colleta7737f62016-09-06 09:44:59 +0200537
Yann Collet30fb4992017-04-18 14:08:50 -0700538/*! ZSTD_resetCCtx_internal() :
Yann Collet5ac72b42017-05-23 11:18:24 -0700539 note : `params` are assumed fully validated at this stage */
540static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
541 ZSTD_parameters params, U64 frameContentSize,
542 ZSTD_compResetPolicy_e const crp,
543 ZSTD_buffered_policy_e const zbuff)
Yann Colleta7737f62016-09-06 09:44:59 +0200544{
Yann Collet5ac72b42017-05-23 11:18:24 -0700545 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
Yann Collet0be6fd32017-05-08 16:08:01 -0700546
Yann Colletb0739bc2017-05-22 17:45:15 -0700547 if (crp == ZSTDcrp_continue) {
Yann Collet1ad7c822017-05-22 17:06:04 -0700548 if (ZSTD_equivalentParams(params.cParams, zc->appliedParams.cParams)) {
Yann Collet009d6042017-05-19 10:17:59 -0700549 DEBUGLOG(5, "ZSTD_equivalentParams()==1");
Yann Collet71ddeb62017-04-20 22:54:54 -0700550 zc->fseCTables_ready = 0;
551 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta7737f62016-09-06 09:44:59 +0200552 return ZSTD_continueCCtx(zc, params, frameContentSize);
Yann Colletb0739bc2017-05-22 17:45:15 -0700553 } }
inikep87d4f3d2016-03-02 15:56:24 +0100554
Yann Colletfa3671e2017-05-19 10:51:30 -0700555 { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
Yann Colleta7737f62016-09-06 09:44:59 +0200556 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
557 size_t const maxNbSeq = blockSize / divider;
558 size_t const tokenSpace = blockSize + 11*maxNbSeq;
Yann Collet5ac72b42017-05-23 11:18:24 -0700559 size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ?
560 0 : (1 << params.cParams.chainLog);
Yann Colleta7737f62016-09-06 09:44:59 +0200561 size_t const hSize = ((size_t)1) << params.cParams.hashLog;
Yann Collet5ac72b42017-05-23 11:18:24 -0700562 U32 const hashLog3 = (params.cParams.searchLength>3) ?
563 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
Yann Colleta7737f62016-09-06 09:44:59 +0200564 size_t const h3Size = ((size_t)1) << hashLog3;
565 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
Yann Collet5ac72b42017-05-23 11:18:24 -0700566 size_t const buffOutSize = ZSTD_compressBound(blockSize)+1;
567 size_t const buffInSize = ((size_t)1 << params.cParams.windowLog) + blockSize;
Yann Colleta7737f62016-09-06 09:44:59 +0200568 void* ptr;
Yann Collete74215e2016-03-19 16:09:09 +0100569
Yann Colleta7737f62016-09-06 09:44:59 +0200570 /* Check if workSpace is large enough, alloc a new one if needed */
Yann Collet71ddeb62017-04-20 22:54:54 -0700571 { size_t const entropySpace = hufCTable_size + litlengthCTable_size
Yann Colleta4086452017-04-20 23:09:39 -0700572 + offcodeCTable_size + matchlengthCTable_size
Yann Collet72712032017-04-20 23:21:19 -0700573 + entropyScratchSpace_size;
Yann Collet71ddeb62017-04-20 22:54:54 -0700574 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
Yann Collete6fa70a2017-04-20 17:28:31 -0700575 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
Yann Collet5ac72b42017-05-23 11:18:24 -0700576 size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
577 || (params.cParams.strategy == ZSTD_btultra)) ?
578 optPotentialSpace : 0;
579 size_t const bufferSpace = (zbuff==ZSTDb_buffered) ?
580 buffInSize + buffOutSize : 0;
581 size_t const neededSpace = entropySpace + optSpace + tableSpace
582 + tokenSpace + bufferSpace;
Yann Colletc7fe2622017-05-23 13:16:00 -0700583
584 if (zc->workSpaceSize < neededSpace) { /* too small : resize /*/
Yann Collet0be6fd32017-05-08 16:08:01 -0700585 DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n",
Yann Colletc7fe2622017-05-23 13:16:00 -0700586 (unsigned)zc->workSpaceSize>>10,
587 (unsigned)neededSpace>>10);
588 /* static cctx : no resize, error out */
589 if (zc->staticSize) return ERROR(memory_allocation);
590
Yann Collet0181fef2017-04-06 01:25:26 -0700591 zc->workSpaceSize = 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200592 ZSTD_free(zc->workSpace, zc->customMem);
593 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
594 if (zc->workSpace == NULL) return ERROR(memory_allocation);
595 zc->workSpaceSize = neededSpace;
Yann Collet7bb60b12017-04-20 17:38:56 -0700596 ptr = zc->workSpace;
597
598 /* entropy space */
Yann Collet71ddeb62017-04-20 22:54:54 -0700599 zc->hufCTable = (HUF_CElt*)ptr;
600 ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */
Yann Collet71aaa322017-04-20 23:03:38 -0700601 zc->offcodeCTable = (FSE_CTable*) ptr;
602 ptr = (char*)ptr + offcodeCTable_size;
Yann Collet72712032017-04-20 23:21:19 -0700603 zc->matchlengthCTable = (FSE_CTable*) ptr;
Yann Collet71aaa322017-04-20 23:03:38 -0700604 ptr = (char*)ptr + matchlengthCTable_size;
Yann Collet72712032017-04-20 23:21:19 -0700605 zc->litlengthCTable = (FSE_CTable*) ptr;
606 ptr = (char*)ptr + litlengthCTable_size;
607 assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */
Yann Collete42afbc2017-04-26 11:39:35 -0700608 zc->entropyScratchSpace = (unsigned*) ptr;
Yann Colleta7737f62016-09-06 09:44:59 +0200609 } }
Yann Collet083fcc82015-10-25 14:06:35 +0100610
Yann Collete6fa70a2017-04-20 17:28:31 -0700611 /* init params */
Yann Collet1ad7c822017-05-22 17:06:04 -0700612 zc->appliedParams = params;
Yann Collete6fa70a2017-04-20 17:28:31 -0700613 zc->frameContentSize = frameContentSize;
614 zc->consumedSrcSize = 0;
Yann Colletcc9f9b72017-06-15 18:17:10 -0700615 if (frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN)
616 zc->appliedParams.fParams.contentSizeFlag = 0;
617 zc->blockSize = blockSize;
Yann Collet70e8c382016-02-10 13:37:52 +0100618
Yann Collet083fcc82015-10-25 14:06:35 +0100619 XXH64_reset(&zc->xxhState, 0);
Yann Collete6fa70a2017-04-20 17:28:31 -0700620 zc->stage = ZSTDcs_init;
621 zc->dictID = 0;
622 zc->loadedDictEnd = 0;
Yann Collet71ddeb62017-04-20 22:54:54 -0700623 zc->fseCTables_ready = 0;
624 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta7737f62016-09-06 09:44:59 +0200625 zc->nextToUpdate = 1;
626 zc->nextSrc = NULL;
627 zc->base = NULL;
628 zc->dictBase = NULL;
629 zc->dictLimit = 0;
630 zc->lowLimit = 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200631 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
Yann Collete6fa70a2017-04-20 17:28:31 -0700632 zc->hashLog3 = hashLog3;
633 zc->seqStore.litLengthSum = 0;
Yann Colleta7737f62016-09-06 09:44:59 +0200634
Yann Collet71aaa322017-04-20 23:03:38 -0700635 /* ensure entropy tables are close together at the beginning */
636 assert((void*)zc->hufCTable == zc->workSpace);
637 assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size);
638 assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size);
639 assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size);
Yann Collete42afbc2017-04-26 11:39:35 -0700640 assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size);
641 ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size;
Yann Collete6fa70a2017-04-20 17:28:31 -0700642
643 /* opt parser space */
Nick Terrelleeb31ee2017-03-09 11:44:25 -0800644 if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
Yann Collet009d6042017-05-19 10:17:59 -0700645 DEBUGLOG(5, "reserving optimal parser space");
Yann Collete6fa70a2017-04-20 17:28:31 -0700646 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
Yann Colleta7737f62016-09-06 09:44:59 +0200647 zc->seqStore.litFreq = (U32*)ptr;
648 zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
649 zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
650 zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
651 ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
652 zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
653 ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
654 zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
655 ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
Yann Colleta7737f62016-09-06 09:44:59 +0200656 }
Yann Collete6fa70a2017-04-20 17:28:31 -0700657
658 /* table Space */
659 if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
660 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
661 zc->hashTable = (U32*)(ptr);
662 zc->chainTable = zc->hashTable + hSize;
663 zc->hashTable3 = zc->chainTable + chainSize;
664 ptr = zc->hashTable3 + h3Size;
665
666 /* sequences storage */
Yann Colleta7737f62016-09-06 09:44:59 +0200667 zc->seqStore.sequencesStart = (seqDef*)ptr;
668 ptr = zc->seqStore.sequencesStart + maxNbSeq;
669 zc->seqStore.llCode = (BYTE*) ptr;
670 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
671 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
672 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
Yann Collet5ac72b42017-05-23 11:18:24 -0700673 ptr = zc->seqStore.litStart + blockSize;
674
675 /* buffers */
676 zc->inBuffSize = buffInSize;
677 zc->inBuff = (char*)ptr;
678 zc->outBuffSize = buffOutSize;
679 zc->outBuff = zc->inBuff + buffInSize;
Yann Colleta7737f62016-09-06 09:44:59 +0200680
Yann Colleta7737f62016-09-06 09:44:59 +0200681 return 0;
Yann Collet72d706a2016-03-23 20:44:12 +0100682 }
Yann Colletf3eca252015-10-22 15:31:46 +0100683}
684
Yann Collet32dfae62017-01-19 10:32:55 -0800685/* ZSTD_invalidateRepCodes() :
686 * ensures next compression will not use repcodes from previous block.
687 * Note : only works with regular variant;
688 * do not use with extDict variant ! */
689void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
690 int i;
691 for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
692}
Yann Collet083fcc82015-10-25 14:06:35 +0100693
Yann Collet7b51a292016-01-26 15:58:49 +0100694
Yann Colleta4cab802017-04-18 14:54:54 -0700695/*! ZSTD_copyCCtx_internal() :
696 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
697 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
698 * pledgedSrcSize=0 means "empty" if fParams.contentSizeFlag=1
699 * @return : 0, or an error code */
Yann Collet1ad7c822017-05-22 17:06:04 -0700700static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
701 const ZSTD_CCtx* srcCCtx,
702 ZSTD_frameParameters fParams,
703 unsigned long long pledgedSrcSize)
Yann Collet7b51a292016-01-26 15:58:49 +0100704{
Yann Collet009d6042017-05-19 10:17:59 -0700705 DEBUGLOG(5, "ZSTD_copyCCtx_internal");
Yann Collet7b51a292016-01-26 15:58:49 +0100706 if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
Sean Purcell2db72492017-02-09 10:50:43 -0800707
inikep28669512016-06-02 13:04:18 +0200708 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
Yann Collet5ac72b42017-05-23 11:18:24 -0700709 { ZSTD_buffered_policy_e const zbuff = srcCCtx->inBuffSize ?
710 ZSTDb_buffered : ZSTDb_not_buffered;
711 ZSTD_parameters params = srcCCtx->appliedParams;
Yann Colleta4cab802017-04-18 14:54:54 -0700712 params.fParams = fParams;
Yann Collet5ac72b42017-05-23 11:18:24 -0700713 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
714 ZSTDcrp_noMemset, zbuff);
Sean Purcell2db72492017-02-09 10:50:43 -0800715 }
Yann Collet7b51a292016-01-26 15:58:49 +0100716
717 /* copy tables */
Yann Collet1ad7c822017-05-22 17:06:04 -0700718 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->appliedParams.cParams.chainLog);
719 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
Yann Collet731ef162016-07-27 21:05:12 +0200720 size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
721 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
Yann Collete6fa70a2017-04-20 17:28:31 -0700722 assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */
723 assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
724 memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */
Yann Colletc6eea2b2016-03-19 17:18:00 +0100725 }
Yann Collet7b51a292016-01-26 15:58:49 +0100726
Yann Colletc46fb922016-05-29 05:01:04 +0200727 /* copy dictionary offsets */
Yann Colletc6eea2b2016-03-19 17:18:00 +0100728 dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
729 dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
730 dstCCtx->nextSrc = srcCCtx->nextSrc;
731 dstCCtx->base = srcCCtx->base;
732 dstCCtx->dictBase = srcCCtx->dictBase;
733 dstCCtx->dictLimit = srcCCtx->dictLimit;
734 dstCCtx->lowLimit = srcCCtx->lowLimit;
735 dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
Yann Colletc46fb922016-05-29 05:01:04 +0200736 dstCCtx->dictID = srcCCtx->dictID;
Yann Collet7b51a292016-01-26 15:58:49 +0100737
Yann Colletfb810d62016-01-28 00:18:06 +0100738 /* copy entropy tables */
Yann Collet71ddeb62017-04-20 22:54:54 -0700739 dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready;
740 if (srcCCtx->fseCTables_ready) {
Yann Colleta34a39c2017-04-20 18:17:58 -0700741 memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size);
742 memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size);
743 memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size);
Yann Colletfb810d62016-01-28 00:18:06 +0100744 }
Yann Collet71ddeb62017-04-20 22:54:54 -0700745 dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode;
746 if (srcCCtx->hufCTable_repeatMode) {
747 memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size);
Nick Terrella4197772017-03-01 17:51:56 -0800748 }
Yann Collet7b51a292016-01-26 15:58:49 +0100749
750 return 0;
751}
752
Yann Colleta4cab802017-04-18 14:54:54 -0700753/*! ZSTD_copyCCtx() :
754 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
755 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
756 * pledgedSrcSize==0 means "unknown".
757* @return : 0, or an error code */
758size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
759{
760 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
761 fParams.contentSizeFlag = pledgedSrcSize>0;
762
763 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, fParams, pledgedSrcSize);
764}
765
Yann Collet7b51a292016-01-26 15:58:49 +0100766
Yann Colletecabfe32016-03-20 16:20:06 +0100767/*! ZSTD_reduceTable() :
Yann Colleta4cab802017-04-18 14:54:54 -0700768 * reduce table indexes by `reducerValue` */
Yann Colletecabfe32016-03-20 16:20:06 +0100769static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
Yann Collet89db5e02015-11-13 11:27:46 +0100770{
Yann Colletecabfe32016-03-20 16:20:06 +0100771 U32 u;
772 for (u=0 ; u < size ; u++) {
773 if (table[u] < reducerValue) table[u] = 0;
774 else table[u] -= reducerValue;
Yann Collet89db5e02015-11-13 11:27:46 +0100775 }
776}
777
Yann Colletecabfe32016-03-20 16:20:06 +0100778/*! ZSTD_reduceIndex() :
779* rescale all indexes to avoid future overflow (indexes are U32) */
780static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
781{
Yann Collet1ad7c822017-05-22 17:06:04 -0700782 { U32 const hSize = 1 << zc->appliedParams.cParams.hashLog;
Yann Colletecabfe32016-03-20 16:20:06 +0100783 ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
784
Yann Collet1ad7c822017-05-22 17:06:04 -0700785 { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->appliedParams.cParams.chainLog);
Yann Collet8a57b922016-04-04 13:49:18 +0200786 ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
Yann Colletecabfe32016-03-20 16:20:06 +0100787
Yann Collet731ef162016-07-27 21:05:12 +0200788 { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
Yann Colletecabfe32016-03-20 16:20:06 +0100789 ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
790}
791
Yann Collet89db5e02015-11-13 11:27:46 +0100792
Yann Collet863ec402016-01-28 17:56:33 +0100793/*-*******************************************************
Yann Collet14983e72015-11-11 21:38:21 +0100794* Block entropic compression
795*********************************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100796
Przemyslaw Skibinski3ee94a72016-10-24 15:58:07 +0200797/* See doc/zstd_compression_format.md for detailed format description */
Yann Collet14983e72015-11-11 21:38:21 +0100798
Yann Colletd1b26842016-03-15 01:24:33 +0100799size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +0100800{
Yann Colletd1b26842016-03-15 01:24:33 +0100801 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
Yann Collet6fa05a22016-07-20 14:58:49 +0200802 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
803 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
Yann Collet14983e72015-11-11 21:38:21 +0100804 return ZSTD_blockHeaderSize+srcSize;
805}
806
807
Yann Colletd1b26842016-03-15 01:24:33 +0100808static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +0100809{
810 BYTE* const ostart = (BYTE* const)dst;
Yann Collet731ef162016-07-27 21:05:12 +0200811 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
Yann Collet14983e72015-11-11 21:38:21 +0100812
Yann Colletd1b26842016-03-15 01:24:33 +0100813 if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
Yann Collet14983e72015-11-11 21:38:21 +0100814
Yann Collet59d1f792016-01-23 19:28:41 +0100815 switch(flSize)
816 {
817 case 1: /* 2 - 1 - 5 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200818 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
Yann Collet59d1f792016-01-23 19:28:41 +0100819 break;
820 case 2: /* 2 - 2 - 12 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200821 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100822 break;
Yann Collet59d1f792016-01-23 19:28:41 +0100823 case 3: /* 2 - 2 - 20 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200824 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100825 break;
Yann Colletcd2892f2017-06-01 09:44:54 -0700826 default: /* not necessary : flSize is {1,2,3} */
827 assert(0);
Yann Collet59d1f792016-01-23 19:28:41 +0100828 }
829
830 memcpy(ostart + flSize, src, srcSize);
831 return srcSize + flSize;
Yann Collet14983e72015-11-11 21:38:21 +0100832}
833
Yann Colletd1b26842016-03-15 01:24:33 +0100834static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +0100835{
836 BYTE* const ostart = (BYTE* const)dst;
Yann Collet731ef162016-07-27 21:05:12 +0200837 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
Yann Collet14983e72015-11-11 21:38:21 +0100838
Yann Collet198e6aa2016-07-20 20:12:24 +0200839 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
Yann Collet59d1f792016-01-23 19:28:41 +0100840
841 switch(flSize)
842 {
843 case 1: /* 2 - 1 - 5 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200844 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
Yann Collet59d1f792016-01-23 19:28:41 +0100845 break;
846 case 2: /* 2 - 2 - 12 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200847 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100848 break;
Yann Collet59d1f792016-01-23 19:28:41 +0100849 case 3: /* 2 - 2 - 20 */
Yann Colletf8e7b532016-07-23 16:31:49 +0200850 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
Yann Collet59d1f792016-01-23 19:28:41 +0100851 break;
Yann Colletcd2892f2017-06-01 09:44:54 -0700852 default: /* not necessary : flSize is {1,2,3} */
853 assert(0);
Yann Collet59d1f792016-01-23 19:28:41 +0100854 }
855
856 ostart[flSize] = *(const BYTE*)src;
857 return flSize+1;
Yann Collet14983e72015-11-11 21:38:21 +0100858}
859
Yann Collet59d1f792016-01-23 19:28:41 +0100860
Yann Colleta5c2c082016-03-20 01:09:18 +0100861static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
Yann Collet14983e72015-11-11 21:38:21 +0100862
Yann Colletb923f652016-01-26 03:14:20 +0100863static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
Yann Colletd1b26842016-03-15 01:24:33 +0100864 void* dst, size_t dstCapacity,
Yann Collet14983e72015-11-11 21:38:21 +0100865 const void* src, size_t srcSize)
866{
Yann Colleta910dc82016-03-18 12:37:45 +0100867 size_t const minGain = ZSTD_minGain(srcSize);
868 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
Yann Collet731ef162016-07-27 21:05:12 +0200869 BYTE* const ostart = (BYTE*)dst;
Yann Colletafe07092016-01-25 04:10:46 +0100870 U32 singleStream = srcSize < 256;
Yann Colletf8e7b532016-07-23 16:31:49 +0200871 symbolEncodingType_e hType = set_compressed;
Yann Colleta910dc82016-03-18 12:37:45 +0100872 size_t cLitSize;
Yann Collet14983e72015-11-11 21:38:21 +0100873
Yann Collet14983e72015-11-11 21:38:21 +0100874
Yann Colleta5c2c082016-03-20 01:09:18 +0100875 /* small ? don't even attempt compression (speed opt) */
876# define LITERAL_NOENTROPY 63
Yann Collet71ddeb62017-04-20 22:54:54 -0700877 { size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
Yann Colleta5c2c082016-03-20 01:09:18 +0100878 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
879 }
880
881 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
Yann Collet71ddeb62017-04-20 22:54:54 -0700882 { HUF_repeat repeat = zc->hufCTable_repeatMode;
Yann Collet1ad7c822017-05-22 17:06:04 -0700883 int const preferRepeat = zc->appliedParams.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
Nick Terrella4197772017-03-01 17:51:56 -0800884 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
Yann Collete348dad2017-04-20 11:14:13 -0700885 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
Yann Collete42afbc2017-04-26 11:39:35 -0700886 zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat)
Yann Collete348dad2017-04-20 11:14:13 -0700887 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
Yann Collete42afbc2017-04-26 11:39:35 -0700888 zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat);
Nick Terrella4197772017-03-01 17:51:56 -0800889 if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
Yann Collet71ddeb62017-04-20 22:54:54 -0700890 else { zc->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */
Yann Colletb923f652016-01-26 03:14:20 +0100891 }
Yann Collet14983e72015-11-11 21:38:21 +0100892
Nick Terrella4197772017-03-01 17:51:56 -0800893 if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
Yann Collet71ddeb62017-04-20 22:54:54 -0700894 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta910dc82016-03-18 12:37:45 +0100895 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
Nick Terrella4197772017-03-01 17:51:56 -0800896 }
897 if (cLitSize==1) {
Yann Collet71ddeb62017-04-20 22:54:54 -0700898 zc->hufCTable_repeatMode = HUF_repeat_none;
Yann Colleta910dc82016-03-18 12:37:45 +0100899 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
Nick Terrella4197772017-03-01 17:51:56 -0800900 }
Yann Collet14983e72015-11-11 21:38:21 +0100901
902 /* Build header */
Yann Collet59d1f792016-01-23 19:28:41 +0100903 switch(lhSize)
Yann Collet14983e72015-11-11 21:38:21 +0100904 {
Yann Collet59d1f792016-01-23 19:28:41 +0100905 case 3: /* 2 - 2 - 10 - 10 */
Yann Colletc2e1a682016-07-22 17:30:52 +0200906 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
Yann Collet198e6aa2016-07-20 20:12:24 +0200907 MEM_writeLE24(ostart, lhc);
908 break;
909 }
Yann Collet59d1f792016-01-23 19:28:41 +0100910 case 4: /* 2 - 2 - 14 - 14 */
Yann Collet32faf6c2016-07-22 04:45:06 +0200911 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
Yann Collet198e6aa2016-07-20 20:12:24 +0200912 MEM_writeLE32(ostart, lhc);
913 break;
914 }
Yann Collet59d1f792016-01-23 19:28:41 +0100915 case 5: /* 2 - 2 - 18 - 18 */
Yann Collet32faf6c2016-07-22 04:45:06 +0200916 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
Yann Collet198e6aa2016-07-20 20:12:24 +0200917 MEM_writeLE32(ostart, lhc);
918 ostart[4] = (BYTE)(cLitSize >> 10);
919 break;
920 }
Yann Colletcd2892f2017-06-01 09:44:54 -0700921 default: /* not possible : lhSize is {3,4,5} */
922 assert(0);
Yann Collet14983e72015-11-11 21:38:21 +0100923 }
Yann Colleta910dc82016-03-18 12:37:45 +0100924 return lhSize+cLitSize;
Yann Collet14983e72015-11-11 21:38:21 +0100925}
926
Yann Collet3b2bd1d2016-07-30 13:21:41 +0200927static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
928 8, 9, 10, 11, 12, 13, 14, 15,
929 16, 16, 17, 17, 18, 18, 19, 19,
930 20, 20, 20, 20, 21, 21, 21, 21,
931 22, 22, 22, 22, 22, 22, 22, 22,
932 23, 23, 23, 23, 23, 23, 23, 23,
933 24, 24, 24, 24, 24, 24, 24, 24,
934 24, 24, 24, 24, 24, 24, 24, 24 };
Yann Collet14983e72015-11-11 21:38:21 +0100935
Yann Collet3b2bd1d2016-07-30 13:21:41 +0200936static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
937 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
938 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
939 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
940 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
941 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
942 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
943 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
Yann Colleted57d852016-07-29 21:22:17 +0200944
945
946void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
Yann Colletb44be742016-03-26 20:52:14 +0100947{
Yann Colleted57d852016-07-29 21:22:17 +0200948 BYTE const LL_deltaCode = 19;
949 BYTE const ML_deltaCode = 36;
Yann Colletc0ce4f12016-07-30 00:55:13 +0200950 const seqDef* const sequences = seqStorePtr->sequencesStart;
Yann Colleted57d852016-07-29 21:22:17 +0200951 BYTE* const llCodeTable = seqStorePtr->llCode;
952 BYTE* const ofCodeTable = seqStorePtr->ofCode;
953 BYTE* const mlCodeTable = seqStorePtr->mlCode;
Yann Colletc0ce4f12016-07-30 00:55:13 +0200954 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
Yann Colleted57d852016-07-29 21:22:17 +0200955 U32 u;
956 for (u=0; u<nbSeq; u++) {
957 U32 const llv = sequences[u].litLength;
958 U32 const mlv = sequences[u].matchLength;
Yann Collet3b2bd1d2016-07-30 13:21:41 +0200959 llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
Yann Colleted57d852016-07-29 21:22:17 +0200960 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
Yann Collet3b2bd1d2016-07-30 13:21:41 +0200961 mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
Yann Collet5d393572016-04-07 17:19:00 +0200962 }
Yann Colleted57d852016-07-29 21:22:17 +0200963 if (seqStorePtr->longLengthID==1)
964 llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
965 if (seqStorePtr->longLengthID==2)
966 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
Yann Colletb44be742016-03-26 20:52:14 +0100967}
968
Sean Purcell553f67e2017-03-02 15:15:31 -0800969MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
Yann Colletd1b26842016-03-15 01:24:33 +0100970 void* dst, size_t dstCapacity,
Sean Purcell553f67e2017-03-02 15:15:31 -0800971 size_t srcSize)
Yann Collet14983e72015-11-11 21:38:21 +0100972{
Yann Collet1ad7c822017-05-22 17:06:04 -0700973 const int longOffsets = zc->appliedParams.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
Yann Colletb923f652016-01-26 03:14:20 +0100974 const seqStore_t* seqStorePtr = &(zc->seqStore);
Yann Collet14983e72015-11-11 21:38:21 +0100975 U32 count[MaxSeq+1];
976 S16 norm[MaxSeq+1];
Yann Colletfb810d62016-01-28 00:18:06 +0100977 FSE_CTable* CTable_LitLength = zc->litlengthCTable;
978 FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
979 FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
Yann Collet14983e72015-11-11 21:38:21 +0100980 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
Yann Colletc0ce4f12016-07-30 00:55:13 +0200981 const seqDef* const sequences = seqStorePtr->sequencesStart;
Yann Colleted57d852016-07-29 21:22:17 +0200982 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
983 const BYTE* const llCodeTable = seqStorePtr->llCode;
984 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
Yann Collet5054ee02015-11-23 13:34:21 +0100985 BYTE* const ostart = (BYTE*)dst;
Yann Colletd1b26842016-03-15 01:24:33 +0100986 BYTE* const oend = ostart + dstCapacity;
Yann Colleta910dc82016-03-18 12:37:45 +0100987 BYTE* op = ostart;
Yann Colletc0ce4f12016-07-30 00:55:13 +0200988 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
Yann Collet14983e72015-11-11 21:38:21 +0100989 BYTE* seqHead;
Yann Colletd79a9a02016-11-30 15:52:20 -0800990 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
Yann Collet14983e72015-11-11 21:38:21 +0100991
Yann Collet14983e72015-11-11 21:38:21 +0100992 /* Compress literals */
Yann Colleta5c2c082016-03-20 01:09:18 +0100993 { const BYTE* const literals = seqStorePtr->litStart;
Yann Colleta910dc82016-03-18 12:37:45 +0100994 size_t const litSize = seqStorePtr->lit - literals;
Yann Colleta5c2c082016-03-20 01:09:18 +0100995 size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
Yann Collet14983e72015-11-11 21:38:21 +0100996 if (ZSTD_isError(cSize)) return cSize;
997 op += cSize;
998 }
999
1000 /* Sequences Header */
Yann Collet7cbe79a2016-03-23 22:31:57 +01001001 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
Yann Colletd409db62016-03-04 14:45:31 +01001002 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
1003 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
1004 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
Yann Collete93d6ce2016-01-31 00:58:06 +01001005 if (nbSeq==0) goto _check_compressibility;
Yann Collet14983e72015-11-11 21:38:21 +01001006
Yann Colletbe391432016-03-22 23:19:28 +01001007 /* seqHead : flags for FSE encoding type */
1008 seqHead = op++;
Yann Collet14983e72015-11-11 21:38:21 +01001009
Yann Colletfb810d62016-01-28 00:18:06 +01001010#define MIN_SEQ_FOR_DYNAMIC_FSE 64
1011#define MAX_SEQ_FOR_STATIC_FSE 1000
1012
Yann Colletb44be742016-03-26 20:52:14 +01001013 /* convert length/distances into codes */
Yann Colleted57d852016-07-29 21:22:17 +02001014 ZSTD_seqToCodes(seqStorePtr);
Yann Collet597847a2016-03-20 19:14:22 +01001015
Yann Collet14983e72015-11-11 21:38:21 +01001016 /* CTable for Literal Lengths */
Yann Colletfadda6c2016-03-22 12:14:26 +01001017 { U32 max = MaxLL;
Yann Collete42afbc2017-04-26 11:39:35 -07001018 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace);
Yann Colletfadda6c2016-03-22 12:14:26 +01001019 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
1020 *op++ = llCodeTable[0];
1021 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
Yann Colletf8e7b532016-07-23 16:31:49 +02001022 LLtype = set_rle;
Yann Collet71ddeb62017-04-20 22:54:54 -07001023 } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Yann Colletf8e7b532016-07-23 16:31:49 +02001024 LLtype = set_repeat;
Yann Colletfadda6c2016-03-22 12:14:26 +01001025 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
Yann Colletd79a9a02016-11-30 15:52:20 -08001026 FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001027 LLtype = set_basic;
Yann Colletfadda6c2016-03-22 12:14:26 +01001028 } else {
Yann Colletfadda6c2016-03-22 12:14:26 +01001029 size_t nbSeq_1 = nbSeq;
1030 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
1031 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
1032 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
Yann Colletadd08d62016-03-23 01:32:41 +01001033 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
Yann Collet23776ce2017-03-23 17:59:50 -07001034 if (FSE_isError(NCountSize)) return NCountSize;
Yann Colletadd08d62016-03-23 01:32:41 +01001035 op += NCountSize; }
Yann Colletd79a9a02016-11-30 15:52:20 -08001036 FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001037 LLtype = set_compressed;
Yann Colletfadda6c2016-03-22 12:14:26 +01001038 } }
Yann Collet14983e72015-11-11 21:38:21 +01001039
Yann Colletb44be742016-03-26 20:52:14 +01001040 /* CTable for Offsets */
Yann Colletfadda6c2016-03-22 12:14:26 +01001041 { U32 max = MaxOff;
Yann Collete42afbc2017-04-26 11:39:35 -07001042 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace);
Yann Colletfadda6c2016-03-22 12:14:26 +01001043 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
Yann Collet7cbe79a2016-03-23 22:31:57 +01001044 *op++ = ofCodeTable[0];
Yann Colletfadda6c2016-03-22 12:14:26 +01001045 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
Yann Colletf8e7b532016-07-23 16:31:49 +02001046 Offtype = set_rle;
Yann Collet71ddeb62017-04-20 22:54:54 -07001047 } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Yann Colletf8e7b532016-07-23 16:31:49 +02001048 Offtype = set_repeat;
Yann Collet48537162016-04-07 15:24:29 +02001049 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
Yann Colletd79a9a02016-11-30 15:52:20 -08001050 FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001051 Offtype = set_basic;
Yann Colletfadda6c2016-03-22 12:14:26 +01001052 } else {
Yann Colletfadda6c2016-03-22 12:14:26 +01001053 size_t nbSeq_1 = nbSeq;
1054 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
Yann Collet7cbe79a2016-03-23 22:31:57 +01001055 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
Yann Colletfadda6c2016-03-22 12:14:26 +01001056 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
Yann Colletadd08d62016-03-23 01:32:41 +01001057 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
Yann Collet23776ce2017-03-23 17:59:50 -07001058 if (FSE_isError(NCountSize)) return NCountSize;
Yann Colletadd08d62016-03-23 01:32:41 +01001059 op += NCountSize; }
Yann Colletd79a9a02016-11-30 15:52:20 -08001060 FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001061 Offtype = set_compressed;
Yann Colletfadda6c2016-03-22 12:14:26 +01001062 } }
1063
Yann Collet14983e72015-11-11 21:38:21 +01001064 /* CTable for MatchLengths */
Yann Colletfadda6c2016-03-22 12:14:26 +01001065 { U32 max = MaxML;
Yann Collete42afbc2017-04-26 11:39:35 -07001066 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace);
Yann Colletfadda6c2016-03-22 12:14:26 +01001067 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
Yann Collet72d706a2016-03-23 20:44:12 +01001068 *op++ = *mlCodeTable;
Yann Colletfadda6c2016-03-22 12:14:26 +01001069 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
Yann Colletf8e7b532016-07-23 16:31:49 +02001070 MLtype = set_rle;
Yann Collet71ddeb62017-04-20 22:54:54 -07001071 } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Yann Colletf8e7b532016-07-23 16:31:49 +02001072 MLtype = set_repeat;
Yann Colletfadda6c2016-03-22 12:14:26 +01001073 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
Yann Colletd79a9a02016-11-30 15:52:20 -08001074 FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001075 MLtype = set_basic;
Yann Colletfadda6c2016-03-22 12:14:26 +01001076 } else {
1077 size_t nbSeq_1 = nbSeq;
1078 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
1079 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
1080 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
1081 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
Yann Collet23776ce2017-03-23 17:59:50 -07001082 if (FSE_isError(NCountSize)) return NCountSize;
Yann Colletfadda6c2016-03-22 12:14:26 +01001083 op += NCountSize; }
Yann Colletd79a9a02016-11-30 15:52:20 -08001084 FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
Yann Colletf8e7b532016-07-23 16:31:49 +02001085 MLtype = set_compressed;
Yann Colletfadda6c2016-03-22 12:14:26 +01001086 } }
Yann Collet14983e72015-11-11 21:38:21 +01001087
Yann Colletbe391432016-03-22 23:19:28 +01001088 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
Yann Collet71ddeb62017-04-20 22:54:54 -07001089 zc->fseCTables_ready = 0;
Yann Collet14983e72015-11-11 21:38:21 +01001090
1091 /* Encoding Sequences */
Yann Collet70e45772016-03-19 18:08:32 +01001092 { BIT_CStream_t blockStream;
Yann Colleta910dc82016-03-18 12:37:45 +01001093 FSE_CState_t stateMatchLength;
1094 FSE_CState_t stateOffsetBits;
1095 FSE_CState_t stateLitLength;
Yann Collet14983e72015-11-11 21:38:21 +01001096
Yann Collet95d07d72016-09-06 16:38:51 +02001097 CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
Yann Collet14983e72015-11-11 21:38:21 +01001098
Yann Collet597847a2016-03-20 19:14:22 +01001099 /* first symbols */
Yann Colletfadda6c2016-03-22 12:14:26 +01001100 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
Yann Collet7cbe79a2016-03-23 22:31:57 +01001101 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
Yann Collet597847a2016-03-20 19:14:22 +01001102 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
Yann Colleted57d852016-07-29 21:22:17 +02001103 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
Yann Colletb9151402016-03-26 17:18:11 +01001104 if (MEM_32bits()) BIT_flushBits(&blockStream);
Yann Colleted57d852016-07-29 21:22:17 +02001105 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
Yann Colletb9151402016-03-26 17:18:11 +01001106 if (MEM_32bits()) BIT_flushBits(&blockStream);
Sean Purcelld44703d2017-03-01 14:36:25 -08001107 if (longOffsets) {
1108 U32 const ofBits = ofCodeTable[nbSeq-1];
1109 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
1110 if (extraBits) {
1111 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
1112 BIT_flushBits(&blockStream);
1113 }
1114 BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
1115 ofBits - extraBits);
1116 } else {
1117 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
1118 }
Yann Collet597847a2016-03-20 19:14:22 +01001119 BIT_flushBits(&blockStream);
1120
Yann Colletfadda6c2016-03-22 12:14:26 +01001121 { size_t n;
1122 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
Yann Collet3c6b8082016-07-30 03:20:47 +02001123 BYTE const llCode = llCodeTable[n];
Yann Collet731ef162016-07-27 21:05:12 +02001124 BYTE const ofCode = ofCodeTable[n];
1125 BYTE const mlCode = mlCodeTable[n];
Yann Collet731ef162016-07-27 21:05:12 +02001126 U32 const llBits = LL_bits[llCode];
Yann Collet731ef162016-07-27 21:05:12 +02001127 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
Yann Collet3c6b8082016-07-30 03:20:47 +02001128 U32 const mlBits = ML_bits[mlCode];
Yann Colletfadda6c2016-03-22 12:14:26 +01001129 /* (7)*/ /* (7)*/
Yann Colletb9151402016-03-26 17:18:11 +01001130 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
1131 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
1132 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
1133 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
Yann Collet582933f2016-04-11 16:25:56 +02001134 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
Yann Colletb9151402016-03-26 17:18:11 +01001135 BIT_flushBits(&blockStream); /* (7)*/
Yann Colleted57d852016-07-29 21:22:17 +02001136 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
Yann Colletb9151402016-03-26 17:18:11 +01001137 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
Yann Colleted57d852016-07-29 21:22:17 +02001138 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
Yann Colletb9151402016-03-26 17:18:11 +01001139 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
Sean Purcelld44703d2017-03-01 14:36:25 -08001140 if (longOffsets) {
1141 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
1142 if (extraBits) {
1143 BIT_addBits(&blockStream, sequences[n].offset, extraBits);
1144 BIT_flushBits(&blockStream); /* (7)*/
1145 }
1146 BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
1147 ofBits - extraBits); /* 31 */
1148 } else {
1149 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
1150 }
Yann Colletb9151402016-03-26 17:18:11 +01001151 BIT_flushBits(&blockStream); /* (7)*/
Yann Colletfadda6c2016-03-22 12:14:26 +01001152 } }
Yann Collet14983e72015-11-11 21:38:21 +01001153
1154 FSE_flushCState(&blockStream, &stateMatchLength);
1155 FSE_flushCState(&blockStream, &stateOffsetBits);
1156 FSE_flushCState(&blockStream, &stateLitLength);
1157
Yann Colletb9151402016-03-26 17:18:11 +01001158 { size_t const streamSize = BIT_closeCStream(&blockStream);
1159 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
1160 op += streamSize;
1161 } }
Yann Collet14983e72015-11-11 21:38:21 +01001162
1163 /* check compressibility */
Yann Collete93d6ce2016-01-31 00:58:06 +01001164_check_compressibility:
Nick Terrella4197772017-03-01 17:51:56 -08001165 { size_t const minGain = ZSTD_minGain(srcSize);
1166 size_t const maxCSize = srcSize - minGain;
1167 if ((size_t)(op-ostart) >= maxCSize) {
Yann Collet71ddeb62017-04-20 22:54:54 -07001168 zc->hufCTable_repeatMode = HUF_repeat_none;
Nick Terrella4197772017-03-01 17:51:56 -08001169 return 0;
1170 } }
Yann Collet14983e72015-11-11 21:38:21 +01001171
Yann Collet4266c0a2016-06-14 01:49:25 +02001172 /* confirm repcodes */
Yann Colletb459aad2017-01-19 17:33:37 -08001173 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
Yann Collet4266c0a2016-06-14 01:49:25 +02001174
Yann Collet5054ee02015-11-23 13:34:21 +01001175 return op - ostart;
Yann Collet14983e72015-11-11 21:38:21 +01001176}
1177
Yann Colletbb002742017-01-25 16:25:38 -08001178
Yann Collet95cd0c22016-03-08 18:24:21 +01001179/*! ZSTD_storeSeq() :
1180 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
1181 `offsetCode` : distance to match, or 0 == repCode.
1182 `matchCode` : matchLength - MINMATCH
Yann Collet14983e72015-11-11 21:38:21 +01001183*/
Yann Colletd57dffb2016-07-03 01:48:26 +02001184MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
Yann Collet14983e72015-11-11 21:38:21 +01001185{
Yann Collet009d6042017-05-19 10:17:59 -07001186#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
1187 static const BYTE* g_start = NULL;
1188 U32 const pos = (U32)((const BYTE*)literals - g_start);
1189 if (g_start==NULL) g_start = (const BYTE*)literals;
1190 if ((pos > 0) && (pos < 1000000000))
1191 DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u",
1192 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
Yann Collet14983e72015-11-11 21:38:21 +01001193#endif
Yann Collet14983e72015-11-11 21:38:21 +01001194 /* copy Literals */
Yann Collet009d6042017-05-19 10:17:59 -07001195 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
Yann Collet14983e72015-11-11 21:38:21 +01001196 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
1197 seqStorePtr->lit += litLength;
1198
1199 /* literal Length */
Yann Collete6fa70a2017-04-20 17:28:31 -07001200 if (litLength>0xFFFF) {
1201 seqStorePtr->longLengthID = 1;
1202 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1203 }
Yann Colletc0ce4f12016-07-30 00:55:13 +02001204 seqStorePtr->sequences[0].litLength = (U16)litLength;
Yann Collet14983e72015-11-11 21:38:21 +01001205
1206 /* match offset */
Yann Colletc0ce4f12016-07-30 00:55:13 +02001207 seqStorePtr->sequences[0].offset = offsetCode + 1;
Yann Collet14983e72015-11-11 21:38:21 +01001208
1209 /* match Length */
Yann Collete6fa70a2017-04-20 17:28:31 -07001210 if (matchCode>0xFFFF) {
1211 seqStorePtr->longLengthID = 2;
1212 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1213 }
Yann Colletc0ce4f12016-07-30 00:55:13 +02001214 seqStorePtr->sequences[0].matchLength = (U16)matchCode;
Yann Colleted57d852016-07-29 21:22:17 +02001215
Yann Colletc0ce4f12016-07-30 00:55:13 +02001216 seqStorePtr->sequences++;
Yann Collet14983e72015-11-11 21:38:21 +01001217}
1218
1219
Yann Collet7d360282016-02-12 00:07:30 +01001220/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +01001221* Match length counter
1222***************************************/
Yann Collet5054ee02015-11-23 13:34:21 +01001223static unsigned ZSTD_NbCommonBytes (register size_t val)
Yann Collet14983e72015-11-11 21:38:21 +01001224{
Yann Collet863ec402016-01-28 17:56:33 +01001225 if (MEM_isLittleEndian()) {
1226 if (MEM_64bits()) {
Yann Collet14983e72015-11-11 21:38:21 +01001227# if defined(_MSC_VER) && defined(_WIN64)
1228 unsigned long r = 0;
1229 _BitScanForward64( &r, (U64)val );
Yann Colletd6080882015-12-09 09:05:22 +01001230 return (unsigned)(r>>3);
Yann Collet14983e72015-11-11 21:38:21 +01001231# elif defined(__GNUC__) && (__GNUC__ >= 3)
1232 return (__builtin_ctzll((U64)val) >> 3);
1233# else
Yann Collete348dad2017-04-20 11:14:13 -07001234 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
1235 0, 3, 1, 3, 1, 4, 2, 7,
1236 0, 2, 3, 6, 1, 5, 3, 5,
1237 1, 3, 4, 4, 2, 5, 6, 7,
1238 7, 0, 1, 2, 3, 3, 4, 6,
1239 2, 6, 5, 5, 3, 4, 5, 6,
1240 7, 1, 2, 4, 6, 4, 4, 5,
1241 7, 2, 6, 5, 7, 6, 7, 7 };
Yann Collet14983e72015-11-11 21:38:21 +01001242 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
1243# endif
Yann Collet863ec402016-01-28 17:56:33 +01001244 } else { /* 32 bits */
Yann Collet14983e72015-11-11 21:38:21 +01001245# if defined(_MSC_VER)
1246 unsigned long r=0;
1247 _BitScanForward( &r, (U32)val );
Yann Colletd6080882015-12-09 09:05:22 +01001248 return (unsigned)(r>>3);
Yann Collet14983e72015-11-11 21:38:21 +01001249# elif defined(__GNUC__) && (__GNUC__ >= 3)
1250 return (__builtin_ctz((U32)val) >> 3);
1251# else
Yann Collete348dad2017-04-20 11:14:13 -07001252 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
1253 3, 2, 2, 1, 3, 2, 0, 1,
1254 3, 3, 1, 2, 2, 2, 2, 0,
1255 3, 1, 2, 0, 1, 0, 1, 1 };
Yann Collet14983e72015-11-11 21:38:21 +01001256 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
1257# endif
1258 }
Yann Collet863ec402016-01-28 17:56:33 +01001259 } else { /* Big Endian CPU */
1260 if (MEM_64bits()) {
Yann Collet14983e72015-11-11 21:38:21 +01001261# if defined(_MSC_VER) && defined(_WIN64)
1262 unsigned long r = 0;
1263 _BitScanReverse64( &r, val );
1264 return (unsigned)(r>>3);
1265# elif defined(__GNUC__) && (__GNUC__ >= 3)
1266 return (__builtin_clzll(val) >> 3);
1267# else
1268 unsigned r;
1269 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
1270 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
1271 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
1272 r += (!val);
1273 return r;
1274# endif
Yann Collet863ec402016-01-28 17:56:33 +01001275 } else { /* 32 bits */
Yann Collet14983e72015-11-11 21:38:21 +01001276# if defined(_MSC_VER)
1277 unsigned long r = 0;
1278 _BitScanReverse( &r, (unsigned long)val );
1279 return (unsigned)(r>>3);
1280# elif defined(__GNUC__) && (__GNUC__ >= 3)
1281 return (__builtin_clz((U32)val) >> 3);
1282# else
1283 unsigned r;
1284 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
1285 r += (!val);
1286 return r;
1287# endif
Yann Collet863ec402016-01-28 17:56:33 +01001288 } }
Yann Collet14983e72015-11-11 21:38:21 +01001289}
1290
1291
Yann Colleta436a522016-06-20 23:34:04 +02001292static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
Yann Collet14983e72015-11-11 21:38:21 +01001293{
1294 const BYTE* const pStart = pIn;
Yann Colleta436a522016-06-20 23:34:04 +02001295 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
Yann Collet14983e72015-11-11 21:38:21 +01001296
Yann Colleta436a522016-06-20 23:34:04 +02001297 while (pIn < pInLoopLimit) {
Yann Collet7591a7f2016-05-20 11:44:43 +02001298 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
Yann Collet14983e72015-11-11 21:38:21 +01001299 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
1300 pIn += ZSTD_NbCommonBytes(diff);
1301 return (size_t)(pIn - pStart);
1302 }
Yann Collet14983e72015-11-11 21:38:21 +01001303 if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
1304 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
1305 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
1306 return (size_t)(pIn - pStart);
1307}
1308
Yann Collet04b12d82016-02-11 06:23:24 +01001309/** ZSTD_count_2segments() :
Yann Collet7d360282016-02-12 00:07:30 +01001310* can count match length with `ip` & `match` in 2 different segments.
Yann Collet5054ee02015-11-23 13:34:21 +01001311* convention : on reaching mEnd, match count continue starting from iStart
1312*/
1313static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
1314{
Yann Collet7591a7f2016-05-20 11:44:43 +02001315 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
Yann Collet731ef162016-07-27 21:05:12 +02001316 size_t const matchLength = ZSTD_count(ip, match, vEnd);
1317 if (match + matchLength != mEnd) return matchLength;
1318 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
Yann Collet5054ee02015-11-23 13:34:21 +01001319}
1320
Yann Collet14983e72015-11-11 21:38:21 +01001321
Yann Collet863ec402016-01-28 17:56:33 +01001322/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +01001323* Hashes
Yann Colletf3eca252015-10-22 15:31:46 +01001324***************************************/
inikepcc52a972016-02-19 10:09:35 +01001325static const U32 prime3bytes = 506832829U;
1326static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
Yann Collete6fa70a2017-04-20 17:28:31 -07001327MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
inikepcc52a972016-02-19 10:09:35 +01001328
Yann Collet4b100f42015-10-30 15:49:48 +01001329static const U32 prime4bytes = 2654435761U;
Yann Collet863ec402016-01-28 17:56:33 +01001330static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
Yann Collet5be2dd22015-11-11 13:43:58 +01001331static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
Yann Collet2acb5d32015-10-29 16:49:43 +01001332
Yann Collet4b100f42015-10-30 15:49:48 +01001333static const U64 prime5bytes = 889523592379ULL;
Yann Collet863ec402016-01-28 17:56:33 +01001334static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
Yann Collet4f0a3932016-02-07 04:00:27 +01001335static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
Yann Collet4b100f42015-10-30 15:49:48 +01001336
1337static const U64 prime6bytes = 227718039650203ULL;
Yann Collet863ec402016-01-28 17:56:33 +01001338static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
Yann Collet4f0a3932016-02-07 04:00:27 +01001339static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
Yann Collet4b100f42015-10-30 15:49:48 +01001340
Yann Collet14983e72015-11-11 21:38:21 +01001341static const U64 prime7bytes = 58295818150454627ULL;
Yann Collet863ec402016-01-28 17:56:33 +01001342static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
Yann Collet4f0a3932016-02-07 04:00:27 +01001343static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001344
Yann Collet45dc3562016-07-12 09:47:31 +02001345static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
1346static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
1347static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
1348
Yann Collet5be2dd22015-11-11 13:43:58 +01001349static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
Yann Collet4b100f42015-10-30 15:49:48 +01001350{
1351 switch(mls)
1352 {
1353 default:
Yann Collet5be2dd22015-11-11 13:43:58 +01001354 case 4: return ZSTD_hash4Ptr(p, hBits);
1355 case 5: return ZSTD_hash5Ptr(p, hBits);
1356 case 6: return ZSTD_hash6Ptr(p, hBits);
1357 case 7: return ZSTD_hash7Ptr(p, hBits);
Yann Collet45dc3562016-07-12 09:47:31 +02001358 case 8: return ZSTD_hash8Ptr(p, hBits);
Yann Collet4b100f42015-10-30 15:49:48 +01001359 }
1360}
Yann Collet2acb5d32015-10-29 16:49:43 +01001361
Yann Collet863ec402016-01-28 17:56:33 +01001362
Yann Collet2ce49232016-02-02 14:36:49 +01001363/*-*************************************
Yann Collet1f44b3f2015-11-05 17:32:18 +01001364* Fast Scan
1365***************************************/
Yann Collet417890c2015-12-04 17:16:37 +01001366static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
1367{
1368 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001369 U32 const hBits = zc->appliedParams.cParams.hashLog;
Yann Collet417890c2015-12-04 17:16:37 +01001370 const BYTE* const base = zc->base;
1371 const BYTE* ip = base + zc->nextToUpdate;
Yann Collet731ef162016-07-27 21:05:12 +02001372 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
Yann Collet37f3d1b2016-03-19 15:11:42 +01001373 const size_t fastHashFillStep = 3;
Yann Collet417890c2015-12-04 17:16:37 +01001374
Yann Colletfb810d62016-01-28 00:18:06 +01001375 while(ip <= iend) {
Yann Collet417890c2015-12-04 17:16:37 +01001376 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
Yann Collet37f3d1b2016-03-19 15:11:42 +01001377 ip += fastHashFillStep;
Yann Collet417890c2015-12-04 17:16:37 +01001378 }
1379}
1380
1381
Yann Collet1f44b3f2015-11-05 17:32:18 +01001382FORCE_INLINE
Yann Collet4266c0a2016-06-14 01:49:25 +02001383void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
Yann Collet280f9a82016-08-08 00:44:00 +02001384 const void* src, size_t srcSize,
1385 const U32 mls)
Yann Collet1f44b3f2015-11-05 17:32:18 +01001386{
Yann Collet4266c0a2016-06-14 01:49:25 +02001387 U32* const hashTable = cctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001388 U32 const hBits = cctx->appliedParams.cParams.hashLog;
Yann Collet4266c0a2016-06-14 01:49:25 +02001389 seqStore_t* seqStorePtr = &(cctx->seqStore);
1390 const BYTE* const base = cctx->base;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001391 const BYTE* const istart = (const BYTE*)src;
Yann Collet805a52a2015-11-06 10:52:17 +01001392 const BYTE* ip = istart;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001393 const BYTE* anchor = istart;
Yann Collet731ef162016-07-27 21:05:12 +02001394 const U32 lowestIndex = cctx->dictLimit;
Yann Collet4266c0a2016-06-14 01:49:25 +02001395 const BYTE* const lowest = base + lowestIndex;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001396 const BYTE* const iend = istart + srcSize;
Yann Collet731ef162016-07-27 21:05:12 +02001397 const BYTE* const ilimit = iend - HASH_READ_SIZE;
Yann Collet92d75662016-07-03 01:10:53 +02001398 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1399 U32 offsetSaved = 0;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001400
Yann Collet1f44b3f2015-11-05 17:32:18 +01001401 /* init */
Yann Collet4266c0a2016-06-14 01:49:25 +02001402 ip += (ip==lowest);
1403 { U32 const maxRep = (U32)(ip-lowest);
Yann Collet92d75662016-07-03 01:10:53 +02001404 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1405 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
Yann Collet4266c0a2016-06-14 01:49:25 +02001406 }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001407
1408 /* Main Search Loop */
Yann Collet4266c0a2016-06-14 01:49:25 +02001409 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
Yann Colleta436a522016-06-20 23:34:04 +02001410 size_t mLength;
Yann Collet43dfe012016-06-13 21:43:06 +02001411 size_t const h = ZSTD_hashPtr(ip, hBits, mls);
1412 U32 const current = (U32)(ip-base);
1413 U32 const matchIndex = hashTable[h];
Yann Colletd94efbf2015-12-29 14:29:08 +01001414 const BYTE* match = base + matchIndex;
Yann Collet96ffa422016-01-02 01:16:28 +01001415 hashTable[h] = current; /* update hash table */
Yann Collet1f44b3f2015-11-05 17:32:18 +01001416
Yann Collet280f9a82016-08-08 00:44:00 +02001417 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
Yann Collet45dc3562016-07-12 09:47:31 +02001418 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
Yann Collet402fdcf2015-11-20 12:46:08 +01001419 ip++;
Yann Colleta436a522016-06-20 23:34:04 +02001420 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1421 } else {
Yann Collet92d75662016-07-03 01:10:53 +02001422 U32 offset;
Yann Colleta436a522016-06-20 23:34:04 +02001423 if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001424 ip += ((ip-anchor) >> g_searchStrength) + 1;
1425 continue;
1426 }
Yann Collet45dc3562016-07-12 09:47:31 +02001427 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
Yann Collet92d75662016-07-03 01:10:53 +02001428 offset = (U32)(ip-match);
Yann Colleta436a522016-06-20 23:34:04 +02001429 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
Yann Collet402fdcf2015-11-20 12:46:08 +01001430 offset_2 = offset_1;
1431 offset_1 = offset;
inikep59453082016-03-16 15:35:14 +01001432
Yann Colleta436a522016-06-20 23:34:04 +02001433 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Collet402fdcf2015-11-20 12:46:08 +01001434 }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001435
Yann Collet402fdcf2015-11-20 12:46:08 +01001436 /* match found */
Yann Colleta436a522016-06-20 23:34:04 +02001437 ip += mLength;
Yann Collet402fdcf2015-11-20 12:46:08 +01001438 anchor = ip;
1439
Yann Colletfb810d62016-01-28 00:18:06 +01001440 if (ip <= ilimit) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001441 /* Fill Table */
Yann Colletecd651b2016-01-07 15:35:18 +01001442 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
Yann Collet402fdcf2015-11-20 12:46:08 +01001443 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1444 /* check immediate repcode */
1445 while ( (ip <= ilimit)
Yann Collet4266c0a2016-06-14 01:49:25 +02001446 && ( (offset_2>0)
Yann Collet43dfe012016-06-13 21:43:06 +02001447 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001448 /* store sequence */
Yann Collet45dc3562016-07-12 09:47:31 +02001449 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
Yann Collet92d75662016-07-03 01:10:53 +02001450 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
Yann Collet402fdcf2015-11-20 12:46:08 +01001451 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
Yann Colleta436a522016-06-20 23:34:04 +02001452 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1453 ip += rLength;
Yann Collet402fdcf2015-11-20 12:46:08 +01001454 anchor = ip;
1455 continue; /* faster when present ... (?) */
Yann Colletfb810d62016-01-28 00:18:06 +01001456 } } }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001457
Yann Collet4266c0a2016-06-14 01:49:25 +02001458 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001459 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1460 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
Yann Collet4266c0a2016-06-14 01:49:25 +02001461
Yann Collet70e45772016-03-19 18:08:32 +01001462 /* Last Literals */
1463 { size_t const lastLLSize = iend - anchor;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001464 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1465 seqStorePtr->lit += lastLLSize;
1466 }
Yann Collet1f44b3f2015-11-05 17:32:18 +01001467}
1468
1469
Yann Collet82260dd2016-02-11 07:14:25 +01001470static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
Yann Collet59d1f792016-01-23 19:28:41 +01001471 const void* src, size_t srcSize)
Yann Collet1f44b3f2015-11-05 17:32:18 +01001472{
Yann Collet1ad7c822017-05-22 17:06:04 -07001473 const U32 mls = ctx->appliedParams.cParams.searchLength;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001474 switch(mls)
1475 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001476 default: /* includes case 3 */
Yann Collet1f44b3f2015-11-05 17:32:18 +01001477 case 4 :
Yann Collet59d1f792016-01-23 19:28:41 +01001478 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001479 case 5 :
Yann Collet59d1f792016-01-23 19:28:41 +01001480 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001481 case 6 :
Yann Collet59d1f792016-01-23 19:28:41 +01001482 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001483 case 7 :
Yann Collet59d1f792016-01-23 19:28:41 +01001484 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001485 }
1486}
Yann Colletf3eca252015-10-22 15:31:46 +01001487
Yann Colletf3eca252015-10-22 15:31:46 +01001488
Yann Collet82260dd2016-02-11 07:14:25 +01001489static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
Yann Collet59d1f792016-01-23 19:28:41 +01001490 const void* src, size_t srcSize,
1491 const U32 mls)
Yann Collet89db5e02015-11-13 11:27:46 +01001492{
1493 U32* hashTable = ctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001494 const U32 hBits = ctx->appliedParams.cParams.hashLog;
Yann Collet89db5e02015-11-13 11:27:46 +01001495 seqStore_t* seqStorePtr = &(ctx->seqStore);
1496 const BYTE* const base = ctx->base;
1497 const BYTE* const dictBase = ctx->dictBase;
1498 const BYTE* const istart = (const BYTE*)src;
1499 const BYTE* ip = istart;
1500 const BYTE* anchor = istart;
Yann Collet43dfe012016-06-13 21:43:06 +02001501 const U32 lowestIndex = ctx->lowLimit;
1502 const BYTE* const dictStart = dictBase + lowestIndex;
Yann Collet89db5e02015-11-13 11:27:46 +01001503 const U32 dictLimit = ctx->dictLimit;
Yann Collet743402c2015-11-20 12:03:53 +01001504 const BYTE* const lowPrefixPtr = base + dictLimit;
1505 const BYTE* const dictEnd = dictBase + dictLimit;
Yann Collet89db5e02015-11-13 11:27:46 +01001506 const BYTE* const iend = istart + srcSize;
1507 const BYTE* const ilimit = iend - 8;
Yann Collet4266c0a2016-06-14 01:49:25 +02001508 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
Yann Collet89db5e02015-11-13 11:27:46 +01001509
Yann Colleta436a522016-06-20 23:34:04 +02001510 /* Search Loop */
Yann Colletfb810d62016-01-28 00:18:06 +01001511 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
Yann Collet89db5e02015-11-13 11:27:46 +01001512 const size_t h = ZSTD_hashPtr(ip, hBits, mls);
Yann Collet743402c2015-11-20 12:03:53 +01001513 const U32 matchIndex = hashTable[h];
Yann Collet89db5e02015-11-13 11:27:46 +01001514 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
Yann Collet6bcdeac2015-11-26 11:43:00 +01001515 const BYTE* match = matchBase + matchIndex;
Yann Collet89db5e02015-11-13 11:27:46 +01001516 const U32 current = (U32)(ip-base);
Yann Colleta436a522016-06-20 23:34:04 +02001517 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
Yann Collet402fdcf2015-11-20 12:46:08 +01001518 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
Yann Collet89db5e02015-11-13 11:27:46 +01001519 const BYTE* repMatch = repBase + repIndex;
Yann Colleta436a522016-06-20 23:34:04 +02001520 size_t mLength;
Yann Collet89db5e02015-11-13 11:27:46 +01001521 hashTable[h] = current; /* update hash table */
1522
Yann Colleta436a522016-06-20 23:34:04 +02001523 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
Yann Collet4266c0a2016-06-14 01:49:25 +02001524 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
Yann Collet402fdcf2015-11-20 12:46:08 +01001525 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete6fa70a2017-04-20 17:28:31 -07001526 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
Yann Collet743402c2015-11-20 12:03:53 +01001527 ip++;
Yann Colleta436a522016-06-20 23:34:04 +02001528 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
Yann Colletfb810d62016-01-28 00:18:06 +01001529 } else {
Yann Collet43dfe012016-06-13 21:43:06 +02001530 if ( (matchIndex < lowestIndex) ||
Yann Collet52447382016-03-20 16:00:00 +01001531 (MEM_read32(match) != MEM_read32(ip)) ) {
1532 ip += ((ip-anchor) >> g_searchStrength) + 1;
1533 continue;
1534 }
1535 { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
Yann Collet5054ee02015-11-23 13:34:21 +01001536 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
Yann Colleta436a522016-06-20 23:34:04 +02001537 U32 offset;
Yann Collete6fa70a2017-04-20 17:28:31 -07001538 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
Yann Colleta436a522016-06-20 23:34:04 +02001539 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
Yann Collet402fdcf2015-11-20 12:46:08 +01001540 offset = current - matchIndex;
1541 offset_2 = offset_1;
1542 offset_1 = offset;
Yann Colleta436a522016-06-20 23:34:04 +02001543 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Colletfb810d62016-01-28 00:18:06 +01001544 } }
Yann Collet89db5e02015-11-13 11:27:46 +01001545
Yann Collet5054ee02015-11-23 13:34:21 +01001546 /* found a match : store it */
Yann Colleta436a522016-06-20 23:34:04 +02001547 ip += mLength;
Yann Collet402fdcf2015-11-20 12:46:08 +01001548 anchor = ip;
1549
Yann Colletfb810d62016-01-28 00:18:06 +01001550 if (ip <= ilimit) {
Yann Collet6bcdeac2015-11-26 11:43:00 +01001551 /* Fill Table */
Yann Collet3e21ec52016-09-06 15:36:19 +02001552 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
Yann Collet402fdcf2015-11-20 12:46:08 +01001553 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1554 /* check immediate repcode */
Yann Colletfb810d62016-01-28 00:18:06 +01001555 while (ip <= ilimit) {
Yann Collet27caf2a2016-04-01 15:48:48 +02001556 U32 const current2 = (U32)(ip-base);
1557 U32 const repIndex2 = current2 - offset_2;
Yann Collet402fdcf2015-11-20 12:46:08 +01001558 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
Yann Collet4266c0a2016-06-14 01:49:25 +02001559 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1560 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
Yann Collet5054ee02015-11-23 13:34:21 +01001561 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
Yann Collete6fa70a2017-04-20 17:28:31 -07001562 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
Yann Collet5054ee02015-11-23 13:34:21 +01001563 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
inikep7bc19b62016-04-06 09:46:01 +02001564 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
Yann Collet5054ee02015-11-23 13:34:21 +01001565 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
inikep7bc19b62016-04-06 09:46:01 +02001566 ip += repLength2;
Yann Collet402fdcf2015-11-20 12:46:08 +01001567 anchor = ip;
1568 continue;
1569 }
Yann Collet743402c2015-11-20 12:03:53 +01001570 break;
Yann Colletfb810d62016-01-28 00:18:06 +01001571 } } }
Yann Collet89db5e02015-11-13 11:27:46 +01001572
Yann Collet4266c0a2016-06-14 01:49:25 +02001573 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001574 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
Yann Collet4266c0a2016-06-14 01:49:25 +02001575
Yann Collet89db5e02015-11-13 11:27:46 +01001576 /* Last Literals */
Yann Collet70e45772016-03-19 18:08:32 +01001577 { size_t const lastLLSize = iend - anchor;
Yann Collet89db5e02015-11-13 11:27:46 +01001578 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1579 seqStorePtr->lit += lastLLSize;
1580 }
Yann Collet89db5e02015-11-13 11:27:46 +01001581}
1582
1583
Yann Collet82260dd2016-02-11 07:14:25 +01001584static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
Yann Collet89db5e02015-11-13 11:27:46 +01001585 const void* src, size_t srcSize)
1586{
Yann Collet1ad7c822017-05-22 17:06:04 -07001587 U32 const mls = ctx->appliedParams.cParams.searchLength;
Yann Collet89db5e02015-11-13 11:27:46 +01001588 switch(mls)
1589 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001590 default: /* includes case 3 */
Yann Collet89db5e02015-11-13 11:27:46 +01001591 case 4 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001592 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001593 case 5 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001594 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001595 case 6 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001596 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001597 case 7 :
Yann Colleta1249dc2016-01-25 04:22:03 +01001598 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
Yann Collet89db5e02015-11-13 11:27:46 +01001599 }
1600}
1601
1602
Yann Collet04b12d82016-02-11 06:23:24 +01001603/*-*************************************
Yann Collet45dc3562016-07-12 09:47:31 +02001604* Double Fast
1605***************************************/
1606static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
1607{
1608 U32* const hashLarge = cctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001609 U32 const hBitsL = cctx->appliedParams.cParams.hashLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001610 U32* const hashSmall = cctx->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001611 U32 const hBitsS = cctx->appliedParams.cParams.chainLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001612 const BYTE* const base = cctx->base;
1613 const BYTE* ip = base + cctx->nextToUpdate;
Yann Collet731ef162016-07-27 21:05:12 +02001614 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
Yann Collet45dc3562016-07-12 09:47:31 +02001615 const size_t fastHashFillStep = 3;
1616
1617 while(ip <= iend) {
1618 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
1619 hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
1620 ip += fastHashFillStep;
1621 }
1622}
1623
1624
1625FORCE_INLINE
1626void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
1627 const void* src, size_t srcSize,
1628 const U32 mls)
1629{
1630 U32* const hashLong = cctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001631 const U32 hBitsL = cctx->appliedParams.cParams.hashLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001632 U32* const hashSmall = cctx->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001633 const U32 hBitsS = cctx->appliedParams.cParams.chainLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001634 seqStore_t* seqStorePtr = &(cctx->seqStore);
1635 const BYTE* const base = cctx->base;
1636 const BYTE* const istart = (const BYTE*)src;
1637 const BYTE* ip = istart;
1638 const BYTE* anchor = istart;
1639 const U32 lowestIndex = cctx->dictLimit;
1640 const BYTE* const lowest = base + lowestIndex;
1641 const BYTE* const iend = istart + srcSize;
Yann Collet731ef162016-07-27 21:05:12 +02001642 const BYTE* const ilimit = iend - HASH_READ_SIZE;
Yann Collet45dc3562016-07-12 09:47:31 +02001643 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1644 U32 offsetSaved = 0;
1645
1646 /* init */
1647 ip += (ip==lowest);
1648 { U32 const maxRep = (U32)(ip-lowest);
1649 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1650 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
1651 }
1652
1653 /* Main Search Loop */
1654 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
1655 size_t mLength;
1656 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
1657 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
1658 U32 const current = (U32)(ip-base);
1659 U32 const matchIndexL = hashLong[h2];
1660 U32 const matchIndexS = hashSmall[h];
1661 const BYTE* matchLong = base + matchIndexL;
1662 const BYTE* match = base + matchIndexS;
1663 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
1664
Yann Colletc17e0202017-04-20 12:50:02 -07001665 assert(offset_1 <= current); /* supposed guaranteed by construction */
1666 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
Yann Collete6fa70a2017-04-20 17:28:31 -07001667 /* favor repcode */
Yann Collet45dc3562016-07-12 09:47:31 +02001668 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
1669 ip++;
1670 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1671 } else {
Yann Colleteed20812016-07-12 15:11:40 +02001672 U32 offset;
Yann Collet45dc3562016-07-12 09:47:31 +02001673 if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
1674 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
Yann Colleteed20812016-07-12 15:11:40 +02001675 offset = (U32)(ip-matchLong);
Yann Collet45dc3562016-07-12 09:47:31 +02001676 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1677 } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
Yann Collete6fa70a2017-04-20 17:28:31 -07001678 size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1679 U32 const matchIndexL3 = hashLong[hl3];
1680 const BYTE* matchL3 = base + matchIndexL3;
1681 hashLong[hl3] = current + 1;
1682 if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
1683 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
Yann Colletc54692f2016-08-24 01:10:42 +02001684 ip++;
Yann Collete6fa70a2017-04-20 17:28:31 -07001685 offset = (U32)(ip-matchL3);
1686 while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
Yann Colletc54692f2016-08-24 01:10:42 +02001687 } else {
1688 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
1689 offset = (U32)(ip-match);
1690 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1691 }
Yann Collet45dc3562016-07-12 09:47:31 +02001692 } else {
1693 ip += ((ip-anchor) >> g_searchStrength) + 1;
1694 continue;
1695 }
1696
1697 offset_2 = offset_1;
1698 offset_1 = offset;
1699
1700 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1701 }
1702
1703 /* match found */
1704 ip += mLength;
1705 anchor = ip;
1706
1707 if (ip <= ilimit) {
1708 /* Fill Table */
1709 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
1710 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
1711 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
1712 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1713
1714 /* check immediate repcode */
1715 while ( (ip <= ilimit)
1716 && ( (offset_2>0)
1717 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1718 /* store sequence */
1719 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
Yann Colleteed20812016-07-12 15:11:40 +02001720 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
Yann Collet45dc3562016-07-12 09:47:31 +02001721 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
1722 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
1723 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1724 ip += rLength;
1725 anchor = ip;
1726 continue; /* faster when present ... (?) */
1727 } } }
1728
1729 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001730 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1731 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
Yann Collet45dc3562016-07-12 09:47:31 +02001732
1733 /* Last Literals */
1734 { size_t const lastLLSize = iend - anchor;
1735 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1736 seqStorePtr->lit += lastLLSize;
1737 }
1738}
1739
1740
1741static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1742{
Yann Collet1ad7c822017-05-22 17:06:04 -07001743 const U32 mls = ctx->appliedParams.cParams.searchLength;
Yann Collet45dc3562016-07-12 09:47:31 +02001744 switch(mls)
1745 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001746 default: /* includes case 3 */
Yann Collet45dc3562016-07-12 09:47:31 +02001747 case 4 :
1748 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
1749 case 5 :
1750 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
1751 case 6 :
1752 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
1753 case 7 :
1754 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
1755 }
1756}
1757
1758
1759static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
1760 const void* src, size_t srcSize,
1761 const U32 mls)
1762{
1763 U32* const hashLong = ctx->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001764 U32 const hBitsL = ctx->appliedParams.cParams.hashLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001765 U32* const hashSmall = ctx->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001766 U32 const hBitsS = ctx->appliedParams.cParams.chainLog;
Yann Collet45dc3562016-07-12 09:47:31 +02001767 seqStore_t* seqStorePtr = &(ctx->seqStore);
1768 const BYTE* const base = ctx->base;
1769 const BYTE* const dictBase = ctx->dictBase;
1770 const BYTE* const istart = (const BYTE*)src;
1771 const BYTE* ip = istart;
1772 const BYTE* anchor = istart;
1773 const U32 lowestIndex = ctx->lowLimit;
1774 const BYTE* const dictStart = dictBase + lowestIndex;
1775 const U32 dictLimit = ctx->dictLimit;
1776 const BYTE* const lowPrefixPtr = base + dictLimit;
1777 const BYTE* const dictEnd = dictBase + dictLimit;
1778 const BYTE* const iend = istart + srcSize;
1779 const BYTE* const ilimit = iend - 8;
1780 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1781
1782 /* Search Loop */
1783 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1784 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
1785 const U32 matchIndex = hashSmall[hSmall];
1786 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1787 const BYTE* match = matchBase + matchIndex;
1788
1789 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
1790 const U32 matchLongIndex = hashLong[hLong];
1791 const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
1792 const BYTE* matchLong = matchLongBase + matchLongIndex;
1793
1794 const U32 current = (U32)(ip-base);
1795 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1796 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1797 const BYTE* repMatch = repBase + repIndex;
1798 size_t mLength;
1799 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
1800
1801 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1802 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1803 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1804 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
1805 ip++;
1806 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1807 } else {
1808 if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
1809 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
1810 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
1811 U32 offset;
1812 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
1813 offset = current - matchLongIndex;
1814 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1815 offset_2 = offset_1;
1816 offset_1 = offset;
1817 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Colletc54692f2016-08-24 01:10:42 +02001818
Yann Collet73d74a02016-07-12 13:03:48 +02001819 } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
Yann Colletc54692f2016-08-24 01:10:42 +02001820 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1821 U32 const matchIndex3 = hashLong[h3];
1822 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
1823 const BYTE* match3 = match3Base + matchIndex3;
Yann Collet45dc3562016-07-12 09:47:31 +02001824 U32 offset;
Yann Colletc54692f2016-08-24 01:10:42 +02001825 hashLong[h3] = current + 1;
1826 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1827 const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
1828 const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
1829 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
1830 ip++;
1831 offset = current+1 - matchIndex3;
1832 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1833 } else {
1834 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1835 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1836 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
1837 offset = current - matchIndex;
1838 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1839 }
Yann Collet45dc3562016-07-12 09:47:31 +02001840 offset_2 = offset_1;
1841 offset_1 = offset;
1842 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
Yann Colletc54692f2016-08-24 01:10:42 +02001843
Yann Collet45dc3562016-07-12 09:47:31 +02001844 } else {
1845 ip += ((ip-anchor) >> g_searchStrength) + 1;
1846 continue;
1847 } }
1848
1849 /* found a match : store it */
1850 ip += mLength;
1851 anchor = ip;
1852
1853 if (ip <= ilimit) {
1854 /* Fill Table */
Nick Terrellf35ef5c2017-03-09 12:51:33 -08001855 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
1856 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
Yann Collet45dc3562016-07-12 09:47:31 +02001857 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1858 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
1859 /* check immediate repcode */
1860 while (ip <= ilimit) {
1861 U32 const current2 = (U32)(ip-base);
1862 U32 const repIndex2 = current2 - offset_2;
1863 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1864 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1865 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1866 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07001867 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
Yann Collet45dc3562016-07-12 09:47:31 +02001868 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1869 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1870 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
1871 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
1872 ip += repLength2;
1873 anchor = ip;
1874 continue;
1875 }
1876 break;
1877 } } }
1878
1879 /* save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08001880 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
Yann Collet45dc3562016-07-12 09:47:31 +02001881
1882 /* Last Literals */
1883 { size_t const lastLLSize = iend - anchor;
1884 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1885 seqStorePtr->lit += lastLLSize;
1886 }
1887}
1888
1889
1890static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
1891 const void* src, size_t srcSize)
1892{
Yann Collet1ad7c822017-05-22 17:06:04 -07001893 U32 const mls = ctx->appliedParams.cParams.searchLength;
Yann Collet45dc3562016-07-12 09:47:31 +02001894 switch(mls)
1895 {
Yann Collet933ce4a2017-03-29 14:32:15 -07001896 default: /* includes case 3 */
Yann Collet45dc3562016-07-12 09:47:31 +02001897 case 4 :
1898 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
1899 case 5 :
1900 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
1901 case 6 :
1902 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
1903 case 7 :
1904 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
1905 }
1906}
1907
1908
1909/*-*************************************
Yann Collet96b9f0b2015-11-04 03:52:54 +01001910* Binary Tree search
Yann Colletf3eca252015-10-22 15:31:46 +01001911***************************************/
Yann Collet04b12d82016-02-11 06:23:24 +01001912/** ZSTD_insertBt1() : add one or multiple positions to tree.
1913* ip : assumed <= iend-8 .
Yann Collet06eade52015-11-23 14:23:47 +01001914* @return : nb of positions added */
Yann Collet1358f912016-01-01 07:29:39 +01001915static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
1916 U32 extDict)
Yann Collet96b9f0b2015-11-04 03:52:54 +01001917{
Yann Collet731ef162016-07-27 21:05:12 +02001918 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001919 U32 const hashLog = zc->appliedParams.cParams.hashLog;
Yann Collet731ef162016-07-27 21:05:12 +02001920 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
1921 U32* const bt = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07001922 U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
Yann Collet731ef162016-07-27 21:05:12 +02001923 U32 const btMask = (1 << btLog) - 1;
1924 U32 matchIndex = hashTable[h];
Yann Collet96b9f0b2015-11-04 03:52:54 +01001925 size_t commonLengthSmaller=0, commonLengthLarger=0;
1926 const BYTE* const base = zc->base;
Yann Collet1358f912016-01-01 07:29:39 +01001927 const BYTE* const dictBase = zc->dictBase;
1928 const U32 dictLimit = zc->dictLimit;
1929 const BYTE* const dictEnd = dictBase + dictLimit;
1930 const BYTE* const prefixStart = base + dictLimit;
Yann Collet2b361cf2016-10-14 16:03:34 -07001931 const BYTE* match;
Yann Collet6c3e2e72015-12-11 10:44:07 +01001932 const U32 current = (U32)(ip-base);
Yann Collete9eba602015-11-08 15:08:03 +01001933 const U32 btLow = btMask >= current ? 0 : current - btMask;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001934 U32* smallerPtr = bt + 2*(current&btMask);
Yann Colleta87278a2016-01-17 00:12:55 +01001935 U32* largerPtr = smallerPtr + 1;
Yann Collet59d70632015-11-04 12:05:27 +01001936 U32 dummy32; /* to be nullified at the end */
Yann Collet731ef162016-07-27 21:05:12 +02001937 U32 const windowLow = zc->lowLimit;
Yann Collet72e84cf2015-12-31 19:08:44 +01001938 U32 matchEndIdx = current+8;
Yann Colletb8a6f682016-02-15 17:06:29 +01001939 size_t bestLength = 8;
Yann Colletc0932082016-06-30 14:07:30 +02001940#ifdef ZSTD_C_PREDICT
Yann Collet7beaa052016-01-21 11:57:45 +01001941 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
1942 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
1943 predictedSmall += (predictedSmall>0);
1944 predictedLarge += (predictedLarge>0);
Yann Colletc0932082016-06-30 14:07:30 +02001945#endif /* ZSTD_C_PREDICT */
Yann Colletf48e35c2015-11-07 01:13:31 +01001946
Yann Collet6c3e2e72015-12-11 10:44:07 +01001947 hashTable[h] = current; /* Update Hash Table */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001948
Yann Colletfb810d62016-01-28 00:18:06 +01001949 while (nbCompares-- && (matchIndex > windowLow)) {
Yann Collet25f46dc2016-11-29 16:59:27 -08001950 U32* const nextPtr = bt + 2*(matchIndex & btMask);
Yann Collet96b9f0b2015-11-04 03:52:54 +01001951 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
Yann Collet25f46dc2016-11-29 16:59:27 -08001952
Yann Colletc0932082016-06-30 14:07:30 +02001953#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
Yann Collet70e8c382016-02-10 13:37:52 +01001954 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
Yann Colletfb810d62016-01-28 00:18:06 +01001955 if (matchIndex == predictedSmall) {
1956 /* no need to check length, result known */
Yann Colleta87278a2016-01-17 00:12:55 +01001957 *smallerPtr = matchIndex;
1958 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1959 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1960 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
Yann Collet7beaa052016-01-21 11:57:45 +01001961 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
Yann Colleta87278a2016-01-17 00:12:55 +01001962 continue;
1963 }
Yann Colletfb810d62016-01-28 00:18:06 +01001964 if (matchIndex == predictedLarge) {
Yann Colleta87278a2016-01-17 00:12:55 +01001965 *largerPtr = matchIndex;
1966 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1967 largerPtr = nextPtr;
1968 matchIndex = nextPtr[0];
Yann Collet7beaa052016-01-21 11:57:45 +01001969 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
Yann Colleta87278a2016-01-17 00:12:55 +01001970 continue;
1971 }
Yann Collet04b12d82016-02-11 06:23:24 +01001972#endif
Yann Colletfb810d62016-01-28 00:18:06 +01001973 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
Yann Collet1358f912016-01-01 07:29:39 +01001974 match = base + matchIndex;
1975 if (match[matchLength] == ip[matchLength])
1976 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
Yann Colletfb810d62016-01-28 00:18:06 +01001977 } else {
Yann Collet1358f912016-01-01 07:29:39 +01001978 match = dictBase + matchIndex;
1979 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1980 if (matchIndex+matchLength >= dictLimit)
Nick Terrellf35ef5c2017-03-09 12:51:33 -08001981 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
Yann Collet1358f912016-01-01 07:29:39 +01001982 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01001983
Yann Colletb8a6f682016-02-15 17:06:29 +01001984 if (matchLength > bestLength) {
1985 bestLength = matchLength;
1986 if (matchLength > matchEndIdx - matchIndex)
1987 matchEndIdx = matchIndex + (U32)matchLength;
1988 }
Yann Colletee3f4512015-12-29 22:26:09 +01001989
Yann Collet59d70632015-11-04 12:05:27 +01001990 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
Yann Collet1358f912016-01-01 07:29:39 +01001991 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001992
Yann Colletfb810d62016-01-28 00:18:06 +01001993 if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001994 /* match is smaller than current */
1995 *smallerPtr = matchIndex; /* update smaller idx */
1996 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
Yann Colletf48e35c2015-11-07 01:13:31 +01001997 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001998 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
Yann Colletf48e35c2015-11-07 01:13:31 +01001999 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
Yann Colletfb810d62016-01-28 00:18:06 +01002000 } else {
Yann Collet96b9f0b2015-11-04 03:52:54 +01002001 /* match is larger than current */
2002 *largerPtr = matchIndex;
2003 commonLengthLarger = matchLength;
Yann Colletf48e35c2015-11-07 01:13:31 +01002004 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
Yann Collet96b9f0b2015-11-04 03:52:54 +01002005 largerPtr = nextPtr;
Yann Colletf48e35c2015-11-07 01:13:31 +01002006 matchIndex = nextPtr[0];
Yann Colletfb810d62016-01-28 00:18:06 +01002007 } }
Yann Collet96b9f0b2015-11-04 03:52:54 +01002008
Yann Collet59d70632015-11-04 12:05:27 +01002009 *smallerPtr = *largerPtr = 0;
Yann Colleta436a522016-06-20 23:34:04 +02002010 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
Yann Colletb8a6f682016-02-15 17:06:29 +01002011 if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
2012 return 1;
Yann Collet96b9f0b2015-11-04 03:52:54 +01002013}
2014
2015
Yann Collet82260dd2016-02-11 07:14:25 +01002016static size_t ZSTD_insertBtAndFindBestMatch (
Yann Collet03526e12015-11-23 15:29:15 +01002017 ZSTD_CCtx* zc,
2018 const BYTE* const ip, const BYTE* const iend,
2019 size_t* offsetPtr,
Yann Collet2cc12cb2016-01-01 07:47:58 +01002020 U32 nbCompares, const U32 mls,
2021 U32 extDict)
Yann Collet03526e12015-11-23 15:29:15 +01002022{
Yann Collet731ef162016-07-27 21:05:12 +02002023 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002024 U32 const hashLog = zc->appliedParams.cParams.hashLog;
Yann Collet731ef162016-07-27 21:05:12 +02002025 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
2026 U32* const bt = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002027 U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
Yann Collet731ef162016-07-27 21:05:12 +02002028 U32 const btMask = (1 << btLog) - 1;
Yann Collet03526e12015-11-23 15:29:15 +01002029 U32 matchIndex = hashTable[h];
2030 size_t commonLengthSmaller=0, commonLengthLarger=0;
2031 const BYTE* const base = zc->base;
2032 const BYTE* const dictBase = zc->dictBase;
2033 const U32 dictLimit = zc->dictLimit;
2034 const BYTE* const dictEnd = dictBase + dictLimit;
2035 const BYTE* const prefixStart = base + dictLimit;
2036 const U32 current = (U32)(ip-base);
2037 const U32 btLow = btMask >= current ? 0 : current - btMask;
2038 const U32 windowLow = zc->lowLimit;
2039 U32* smallerPtr = bt + 2*(current&btMask);
2040 U32* largerPtr = bt + 2*(current&btMask) + 1;
Yann Collet72e84cf2015-12-31 19:08:44 +01002041 U32 matchEndIdx = current+8;
Yann Collet03526e12015-11-23 15:29:15 +01002042 U32 dummy32; /* to be nullified at the end */
inikep64d7bcb2016-04-07 19:14:09 +02002043 size_t bestLength = 0;
Yann Collet03526e12015-11-23 15:29:15 +01002044
Yann Collet6c3e2e72015-12-11 10:44:07 +01002045 hashTable[h] = current; /* Update Hash Table */
Yann Collet03526e12015-11-23 15:29:15 +01002046
Yann Colletfb810d62016-01-28 00:18:06 +01002047 while (nbCompares-- && (matchIndex > windowLow)) {
Yann Collet25f46dc2016-11-29 16:59:27 -08002048 U32* const nextPtr = bt + 2*(matchIndex & btMask);
Yann Collet03526e12015-11-23 15:29:15 +01002049 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
2050 const BYTE* match;
2051
Yann Colletfb810d62016-01-28 00:18:06 +01002052 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
Yann Collet03526e12015-11-23 15:29:15 +01002053 match = base + matchIndex;
2054 if (match[matchLength] == ip[matchLength])
2055 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
Yann Colletfb810d62016-01-28 00:18:06 +01002056 } else {
Yann Collet03526e12015-11-23 15:29:15 +01002057 match = dictBase + matchIndex;
2058 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
Yann Collet225179d2015-11-23 16:52:22 +01002059 if (matchIndex+matchLength >= dictLimit)
Nick Terrellf35ef5c2017-03-09 12:51:33 -08002060 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
Yann Collet03526e12015-11-23 15:29:15 +01002061 }
2062
Yann Colletfb810d62016-01-28 00:18:06 +01002063 if (matchLength > bestLength) {
Yann Colletee3f4512015-12-29 22:26:09 +01002064 if (matchLength > matchEndIdx - matchIndex)
Yann Collet48da1642015-12-29 23:40:02 +01002065 matchEndIdx = matchIndex + (U32)matchLength;
Yann Collet49bb0042016-06-04 20:17:38 +02002066 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
inikep75716852016-04-06 12:34:42 +02002067 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
Yann Collet03526e12015-11-23 15:29:15 +01002068 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
2069 break; /* drop, to guarantee consistency (miss a little bit of compression) */
2070 }
2071
Yann Colletfb810d62016-01-28 00:18:06 +01002072 if (match[matchLength] < ip[matchLength]) {
Yann Collet03526e12015-11-23 15:29:15 +01002073 /* match is smaller than current */
2074 *smallerPtr = matchIndex; /* update smaller idx */
2075 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
2076 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2077 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
2078 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
Yann Colletfb810d62016-01-28 00:18:06 +01002079 } else {
Yann Collet03526e12015-11-23 15:29:15 +01002080 /* match is larger than current */
2081 *largerPtr = matchIndex;
2082 commonLengthLarger = matchLength;
2083 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
2084 largerPtr = nextPtr;
2085 matchIndex = nextPtr[0];
Yann Collet768c6bc2016-02-10 14:01:49 +01002086 } }
Yann Collet03526e12015-11-23 15:29:15 +01002087
2088 *smallerPtr = *largerPtr = 0;
2089
Yann Collet72e84cf2015-12-31 19:08:44 +01002090 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
inikep64d7bcb2016-04-07 19:14:09 +02002091 return bestLength;
Yann Collet03526e12015-11-23 15:29:15 +01002092}
2093
Yann Collet2cc12cb2016-01-01 07:47:58 +01002094
Yann Colletb8a6f682016-02-15 17:06:29 +01002095static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
Yann Collet82260dd2016-02-11 07:14:25 +01002096{
2097 const BYTE* const base = zc->base;
2098 const U32 target = (U32)(ip - base);
2099 U32 idx = zc->nextToUpdate;
Yann Colletb8a6f682016-02-15 17:06:29 +01002100
2101 while(idx < target)
2102 idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
Yann Collet82260dd2016-02-11 07:14:25 +01002103}
2104
Yann Collet52447382016-03-20 16:00:00 +01002105/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
Yann Collet82260dd2016-02-11 07:14:25 +01002106static size_t ZSTD_BtFindBestMatch (
Yann Collet2cc12cb2016-01-01 07:47:58 +01002107 ZSTD_CCtx* zc,
2108 const BYTE* const ip, const BYTE* const iLimit,
2109 size_t* offsetPtr,
2110 const U32 maxNbAttempts, const U32 mls)
2111{
2112 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
Yann Colletb8a6f682016-02-15 17:06:29 +01002113 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
Yann Collet2cc12cb2016-01-01 07:47:58 +01002114 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
2115}
2116
2117
Yann Collet768c6bc2016-02-10 14:01:49 +01002118static size_t ZSTD_BtFindBestMatch_selectMLS (
Yann Collet2cc12cb2016-01-01 07:47:58 +01002119 ZSTD_CCtx* zc, /* Index table will be updated */
2120 const BYTE* ip, const BYTE* const iLimit,
2121 size_t* offsetPtr,
2122 const U32 maxNbAttempts, const U32 matchLengthSearch)
2123{
2124 switch(matchLengthSearch)
2125 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002126 default : /* includes case 3 */
Yann Collet2cc12cb2016-01-01 07:47:58 +01002127 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
2128 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
Yann Collet933ce4a2017-03-29 14:32:15 -07002129 case 7 :
Yann Collet2cc12cb2016-01-01 07:47:58 +01002130 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
2131 }
2132}
2133
2134
Yann Colletb8a6f682016-02-15 17:06:29 +01002135static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
2136{
2137 const BYTE* const base = zc->base;
2138 const U32 target = (U32)(ip - base);
2139 U32 idx = zc->nextToUpdate;
2140
2141 while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
2142}
2143
inikep64d7bcb2016-04-07 19:14:09 +02002144
Yann Collet03526e12015-11-23 15:29:15 +01002145/** Tree updater, providing best match */
Yann Collet82260dd2016-02-11 07:14:25 +01002146static size_t ZSTD_BtFindBestMatch_extDict (
Yann Collet03526e12015-11-23 15:29:15 +01002147 ZSTD_CCtx* zc,
2148 const BYTE* const ip, const BYTE* const iLimit,
2149 size_t* offsetPtr,
2150 const U32 maxNbAttempts, const U32 mls)
2151{
Yann Colletee3f4512015-12-29 22:26:09 +01002152 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
Yann Colletb8a6f682016-02-15 17:06:29 +01002153 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
Yann Collet2cc12cb2016-01-01 07:47:58 +01002154 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
Yann Collet03526e12015-11-23 15:29:15 +01002155}
2156
2157
Yann Collet82260dd2016-02-11 07:14:25 +01002158static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
Yann Collet03526e12015-11-23 15:29:15 +01002159 ZSTD_CCtx* zc, /* Index table will be updated */
2160 const BYTE* ip, const BYTE* const iLimit,
2161 size_t* offsetPtr,
2162 const U32 maxNbAttempts, const U32 matchLengthSearch)
2163{
2164 switch(matchLengthSearch)
2165 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002166 default : /* includes case 3 */
Yann Collet03526e12015-11-23 15:29:15 +01002167 case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
2168 case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
Yann Collet933ce4a2017-03-29 14:32:15 -07002169 case 7 :
Yann Collet03526e12015-11-23 15:29:15 +01002170 case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
2171 }
2172}
2173
2174
Yann Collet5106a762015-11-05 15:00:24 +01002175
Yann Collet731ef162016-07-27 21:05:12 +02002176/* *********************************
inikep64d7bcb2016-04-07 19:14:09 +02002177* Hash Chain
Yann Collet731ef162016-07-27 21:05:12 +02002178***********************************/
inikep64d7bcb2016-04-07 19:14:09 +02002179#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
2180
2181/* Update chains up to ip (excluded)
Anders Oleson517577b2017-02-20 12:08:59 -08002182 Assumption : always within prefix (i.e. not within extDict) */
inikep64d7bcb2016-04-07 19:14:09 +02002183FORCE_INLINE
2184U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
2185{
2186 U32* const hashTable = zc->hashTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002187 const U32 hashLog = zc->appliedParams.cParams.hashLog;
inikep64d7bcb2016-04-07 19:14:09 +02002188 U32* const chainTable = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002189 const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
inikep64d7bcb2016-04-07 19:14:09 +02002190 const BYTE* const base = zc->base;
2191 const U32 target = (U32)(ip - base);
2192 U32 idx = zc->nextToUpdate;
2193
Yann Collet22d76322016-06-21 08:01:51 +02002194 while(idx < target) { /* catch up */
inikep64d7bcb2016-04-07 19:14:09 +02002195 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
2196 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
2197 hashTable[h] = idx;
2198 idx++;
2199 }
2200
2201 zc->nextToUpdate = target;
2202 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
2203}
2204
2205
Nick Terrell55fc1f92017-05-24 13:50:10 -07002206/* inlining is important to hardwire a hot branch (template emulation) */
2207FORCE_INLINE
inikep64d7bcb2016-04-07 19:14:09 +02002208size_t ZSTD_HcFindBestMatch_generic (
2209 ZSTD_CCtx* zc, /* Index table will be updated */
2210 const BYTE* const ip, const BYTE* const iLimit,
2211 size_t* offsetPtr,
2212 const U32 maxNbAttempts, const U32 mls, const U32 extDict)
2213{
2214 U32* const chainTable = zc->chainTable;
Yann Collet1ad7c822017-05-22 17:06:04 -07002215 const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
inikep64d7bcb2016-04-07 19:14:09 +02002216 const U32 chainMask = chainSize-1;
2217 const BYTE* const base = zc->base;
2218 const BYTE* const dictBase = zc->dictBase;
2219 const U32 dictLimit = zc->dictLimit;
2220 const BYTE* const prefixStart = base + dictLimit;
2221 const BYTE* const dictEnd = dictBase + dictLimit;
2222 const U32 lowLimit = zc->lowLimit;
2223 const U32 current = (U32)(ip-base);
2224 const U32 minChain = current > chainSize ? current - chainSize : 0;
2225 int nbAttempts=maxNbAttempts;
Yann Collete42afbc2017-04-26 11:39:35 -07002226 size_t ml=4-1;
inikep64d7bcb2016-04-07 19:14:09 +02002227
2228 /* HC4 match finder */
2229 U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
2230
Yann Collet22d76322016-06-21 08:01:51 +02002231 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
inikep64d7bcb2016-04-07 19:14:09 +02002232 const BYTE* match;
2233 size_t currentMl=0;
2234 if ((!extDict) || matchIndex >= dictLimit) {
2235 match = base + matchIndex;
2236 if (match[ml] == ip[ml]) /* potentially better */
2237 currentMl = ZSTD_count(ip, match, iLimit);
2238 } else {
2239 match = dictBase + matchIndex;
2240 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
Yann Collete42afbc2017-04-26 11:39:35 -07002241 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002242 }
2243
2244 /* save best solution */
Yann Colletc17e0202017-04-20 12:50:02 -07002245 if (currentMl > ml) {
2246 ml = currentMl;
2247 *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
2248 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
2249 }
inikep64d7bcb2016-04-07 19:14:09 +02002250
2251 if (matchIndex <= minChain) break;
2252 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
2253 }
2254
2255 return ml;
2256}
2257
2258
2259FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
2260 ZSTD_CCtx* zc,
2261 const BYTE* ip, const BYTE* const iLimit,
2262 size_t* offsetPtr,
2263 const U32 maxNbAttempts, const U32 matchLengthSearch)
2264{
2265 switch(matchLengthSearch)
2266 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002267 default : /* includes case 3 */
inikep64d7bcb2016-04-07 19:14:09 +02002268 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
2269 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
Yann Collet933ce4a2017-03-29 14:32:15 -07002270 case 7 :
inikep64d7bcb2016-04-07 19:14:09 +02002271 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
2272 }
2273}
2274
2275
2276FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
2277 ZSTD_CCtx* zc,
2278 const BYTE* ip, const BYTE* const iLimit,
2279 size_t* offsetPtr,
2280 const U32 maxNbAttempts, const U32 matchLengthSearch)
2281{
2282 switch(matchLengthSearch)
2283 {
Yann Collet933ce4a2017-03-29 14:32:15 -07002284 default : /* includes case 3 */
inikep64d7bcb2016-04-07 19:14:09 +02002285 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
2286 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
Yann Collet933ce4a2017-03-29 14:32:15 -07002287 case 7 :
inikep64d7bcb2016-04-07 19:14:09 +02002288 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
2289 }
2290}
2291
inikep64d7bcb2016-04-07 19:14:09 +02002292
Yann Collet287b7d92015-11-22 13:24:05 +01002293/* *******************************
inikep64d7bcb2016-04-07 19:14:09 +02002294* Common parser - lazy strategy
inikepfaa8d8a2016-04-05 19:01:10 +02002295*********************************/
Yann Collet96b9f0b2015-11-04 03:52:54 +01002296FORCE_INLINE
inikep64d7bcb2016-04-07 19:14:09 +02002297void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
2298 const void* src, size_t srcSize,
2299 const U32 searchMethod, const U32 depth)
Yann Collet96b9f0b2015-11-04 03:52:54 +01002300{
inikepfaa8d8a2016-04-05 19:01:10 +02002301 seqStore_t* seqStorePtr = &(ctx->seqStore);
2302 const BYTE* const istart = (const BYTE*)src;
2303 const BYTE* ip = istart;
2304 const BYTE* anchor = istart;
2305 const BYTE* const iend = istart + srcSize;
2306 const BYTE* const ilimit = iend - 8;
2307 const BYTE* const base = ctx->base + ctx->dictLimit;
Yann Collet96b9f0b2015-11-04 03:52:54 +01002308
Yann Collet1ad7c822017-05-22 17:06:04 -07002309 U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
2310 U32 const mls = ctx->appliedParams.cParams.searchLength;
Yann Collet96b9f0b2015-11-04 03:52:54 +01002311
inikep64d7bcb2016-04-07 19:14:09 +02002312 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2313 size_t* offsetPtr,
2314 U32 maxNbAttempts, U32 matchLengthSearch);
Yann Collet43dfe012016-06-13 21:43:06 +02002315 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
Yann Collet9634f672016-07-03 01:23:58 +02002316 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
inikep64d7bcb2016-04-07 19:14:09 +02002317
inikepfaa8d8a2016-04-05 19:01:10 +02002318 /* init */
Yann Collet4266c0a2016-06-14 01:49:25 +02002319 ip += (ip==base);
inikep64d7bcb2016-04-07 19:14:09 +02002320 ctx->nextToUpdate3 = ctx->nextToUpdate;
Yann Collet9634f672016-07-03 01:23:58 +02002321 { U32 const maxRep = (U32)(ip-base);
2322 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
2323 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
2324 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01002325
inikepfaa8d8a2016-04-05 19:01:10 +02002326 /* Match Loop */
2327 while (ip < ilimit) {
2328 size_t matchLength=0;
2329 size_t offset=0;
2330 const BYTE* start=ip+1;
Yann Collet5106a762015-11-05 15:00:24 +01002331
inikepfaa8d8a2016-04-05 19:01:10 +02002332 /* check repCode */
Yann Collet9634f672016-07-03 01:23:58 +02002333 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
inikepfaa8d8a2016-04-05 19:01:10 +02002334 /* repcode : we take it */
Yann Collete42afbc2017-04-26 11:39:35 -07002335 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002336 if (depth==0) goto _storeSequence;
Yann Collet5106a762015-11-05 15:00:24 +01002337 }
Yann Collet5be2dd22015-11-11 13:43:58 +01002338
inikepfaa8d8a2016-04-05 19:01:10 +02002339 /* first search (depth 0) */
2340 { size_t offsetFound = 99999999;
inikep64d7bcb2016-04-07 19:14:09 +02002341 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
inikepfaa8d8a2016-04-05 19:01:10 +02002342 if (ml2 > matchLength)
inikep75716852016-04-06 12:34:42 +02002343 matchLength = ml2, start = ip, offset=offsetFound;
inikepfaa8d8a2016-04-05 19:01:10 +02002344 }
Yann Collet5106a762015-11-05 15:00:24 +01002345
Yann Collete42afbc2017-04-26 11:39:35 -07002346 if (matchLength < 4) {
inikepfaa8d8a2016-04-05 19:01:10 +02002347 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
2348 continue;
2349 }
2350
inikep64d7bcb2016-04-07 19:14:09 +02002351 /* let's try to find a better solution */
2352 if (depth>=1)
2353 while (ip<ilimit) {
2354 ip ++;
Yann Collet9634f672016-07-03 01:23:58 +02002355 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
Yann Collete42afbc2017-04-26 11:39:35 -07002356 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002357 int const gain2 = (int)(mlRep * 3);
Yann Collet49bb0042016-06-04 20:17:38 +02002358 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002359 if ((mlRep >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002360 matchLength = mlRep, offset = 0, start = ip;
2361 }
2362 { size_t offset2=99999999;
2363 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002364 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2365 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
Yann Collete42afbc2017-04-26 11:39:35 -07002366 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002367 matchLength = ml2, offset = offset2, start = ip;
2368 continue; /* search a better one */
2369 } }
inikepfaa8d8a2016-04-05 19:01:10 +02002370
inikep64d7bcb2016-04-07 19:14:09 +02002371 /* let's find an even better one */
2372 if ((depth==2) && (ip<ilimit)) {
2373 ip ++;
Yann Collet9634f672016-07-03 01:23:58 +02002374 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
Yann Collete42afbc2017-04-26 11:39:35 -07002375 size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002376 int const gain2 = (int)(ml2 * 4);
Yann Collet49bb0042016-06-04 20:17:38 +02002377 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002378 if ((ml2 >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002379 matchLength = ml2, offset = 0, start = ip;
2380 }
2381 { size_t offset2=99999999;
2382 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002383 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2384 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
Yann Collete42afbc2017-04-26 11:39:35 -07002385 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002386 matchLength = ml2, offset = offset2, start = ip;
2387 continue;
2388 } } }
2389 break; /* nothing found : store previous solution */
2390 }
2391
2392 /* catch up */
2393 if (offset) {
Yann Colletc17e0202017-04-20 12:50:02 -07002394 while ( (start > anchor)
2395 && (start > base+offset-ZSTD_REP_MOVE)
2396 && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]) ) /* only search for offset within prefix */
inikep64d7bcb2016-04-07 19:14:09 +02002397 { start--; matchLength++; }
Yann Collet9634f672016-07-03 01:23:58 +02002398 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
inikep64d7bcb2016-04-07 19:14:09 +02002399 }
2400
inikepfaa8d8a2016-04-05 19:01:10 +02002401 /* store sequence */
inikep64d7bcb2016-04-07 19:14:09 +02002402_storeSequence:
inikepfaa8d8a2016-04-05 19:01:10 +02002403 { size_t const litLength = start - anchor;
Yann Colletd57dffb2016-07-03 01:48:26 +02002404 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
inikepfaa8d8a2016-04-05 19:01:10 +02002405 anchor = ip = start + matchLength;
2406 }
Yann Collet48537162016-04-07 15:24:29 +02002407
inikepfaa8d8a2016-04-05 19:01:10 +02002408 /* check immediate repcode */
2409 while ( (ip <= ilimit)
Yann Collet9634f672016-07-03 01:23:58 +02002410 && ((offset_2>0)
2411 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
inikepfaa8d8a2016-04-05 19:01:10 +02002412 /* store sequence */
Yann Collete42afbc2017-04-26 11:39:35 -07002413 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
Yann Collet9634f672016-07-03 01:23:58 +02002414 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
inikep7bc19b62016-04-06 09:46:01 +02002415 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2416 ip += matchLength;
inikepfaa8d8a2016-04-05 19:01:10 +02002417 anchor = ip;
2418 continue; /* faster when present ... (?) */
inikep64d7bcb2016-04-07 19:14:09 +02002419 } }
inikepfaa8d8a2016-04-05 19:01:10 +02002420
Yann Collet4266c0a2016-06-14 01:49:25 +02002421 /* Save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08002422 ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
2423 ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
Yann Collet4266c0a2016-06-14 01:49:25 +02002424
inikepfaa8d8a2016-04-05 19:01:10 +02002425 /* Last Literals */
2426 { size_t const lastLLSize = iend - anchor;
2427 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2428 seqStorePtr->lit += lastLLSize;
Yann Collet5106a762015-11-05 15:00:24 +01002429 }
Yann Collet5106a762015-11-05 15:00:24 +01002430}
2431
Yann Collet5be2dd22015-11-11 13:43:58 +01002432
inikep64d7bcb2016-04-07 19:14:09 +02002433static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2434{
2435 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
2436}
2437
2438static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2439{
2440 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
2441}
2442
2443static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2444{
2445 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
2446}
2447
2448static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2449{
2450 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
2451}
2452
2453
inikepfaa8d8a2016-04-05 19:01:10 +02002454FORCE_INLINE
inikep64d7bcb2016-04-07 19:14:09 +02002455void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
2456 const void* src, size_t srcSize,
2457 const U32 searchMethod, const U32 depth)
Yann Collet5be2dd22015-11-11 13:43:58 +01002458{
inikepfaa8d8a2016-04-05 19:01:10 +02002459 seqStore_t* seqStorePtr = &(ctx->seqStore);
2460 const BYTE* const istart = (const BYTE*)src;
2461 const BYTE* ip = istart;
2462 const BYTE* anchor = istart;
2463 const BYTE* const iend = istart + srcSize;
2464 const BYTE* const ilimit = iend - 8;
2465 const BYTE* const base = ctx->base;
2466 const U32 dictLimit = ctx->dictLimit;
Yann Collet43dfe012016-06-13 21:43:06 +02002467 const U32 lowestIndex = ctx->lowLimit;
inikepfaa8d8a2016-04-05 19:01:10 +02002468 const BYTE* const prefixStart = base + dictLimit;
2469 const BYTE* const dictBase = ctx->dictBase;
2470 const BYTE* const dictEnd = dictBase + dictLimit;
2471 const BYTE* const dictStart = dictBase + ctx->lowLimit;
2472
Yann Collet1ad7c822017-05-22 17:06:04 -07002473 const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
2474 const U32 mls = ctx->appliedParams.cParams.searchLength;
inikepfaa8d8a2016-04-05 19:01:10 +02002475
inikep64d7bcb2016-04-07 19:14:09 +02002476 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2477 size_t* offsetPtr,
2478 U32 maxNbAttempts, U32 matchLengthSearch);
2479 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
2480
Yann Collet302ff032016-07-03 01:28:16 +02002481 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
inikepfaa8d8a2016-04-05 19:01:10 +02002482
Yann Collet302ff032016-07-03 01:28:16 +02002483 /* init */
inikep64d7bcb2016-04-07 19:14:09 +02002484 ctx->nextToUpdate3 = ctx->nextToUpdate;
Yann Collet4266c0a2016-06-14 01:49:25 +02002485 ip += (ip == prefixStart);
inikepfaa8d8a2016-04-05 19:01:10 +02002486
2487 /* Match Loop */
2488 while (ip < ilimit) {
2489 size_t matchLength=0;
2490 size_t offset=0;
2491 const BYTE* start=ip+1;
inikep64d7bcb2016-04-07 19:14:09 +02002492 U32 current = (U32)(ip-base);
inikepfaa8d8a2016-04-05 19:01:10 +02002493
2494 /* check repCode */
Yann Collet302ff032016-07-03 01:28:16 +02002495 { const U32 repIndex = (U32)(current+1 - offset_1);
inikepfaa8d8a2016-04-05 19:01:10 +02002496 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2497 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002498 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikep64d7bcb2016-04-07 19:14:09 +02002499 if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
inikepfaa8d8a2016-04-05 19:01:10 +02002500 /* repcode detected we should take it */
2501 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002502 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002503 if (depth==0) goto _storeSequence;
inikepfaa8d8a2016-04-05 19:01:10 +02002504 } }
2505
2506 /* first search (depth 0) */
2507 { size_t offsetFound = 99999999;
inikep64d7bcb2016-04-07 19:14:09 +02002508 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
inikepfaa8d8a2016-04-05 19:01:10 +02002509 if (ml2 > matchLength)
inikep75716852016-04-06 12:34:42 +02002510 matchLength = ml2, start = ip, offset=offsetFound;
inikepfaa8d8a2016-04-05 19:01:10 +02002511 }
2512
Yann Collete42afbc2017-04-26 11:39:35 -07002513 if (matchLength < 4) {
inikepfaa8d8a2016-04-05 19:01:10 +02002514 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
2515 continue;
2516 }
2517
inikep64d7bcb2016-04-07 19:14:09 +02002518 /* let's try to find a better solution */
2519 if (depth>=1)
2520 while (ip<ilimit) {
2521 ip ++;
2522 current++;
2523 /* check repCode */
2524 if (offset) {
Yann Collet302ff032016-07-03 01:28:16 +02002525 const U32 repIndex = (U32)(current - offset_1);
inikep64d7bcb2016-04-07 19:14:09 +02002526 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2527 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002528 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikep64d7bcb2016-04-07 19:14:09 +02002529 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2530 /* repcode detected */
2531 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002532 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
inikep64d7bcb2016-04-07 19:14:09 +02002533 int const gain2 = (int)(repLength * 3);
Yann Collet49bb0042016-06-04 20:17:38 +02002534 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002535 if ((repLength >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002536 matchLength = repLength, offset = 0, start = ip;
2537 } }
2538
2539 /* search match, depth 1 */
2540 { size_t offset2=99999999;
2541 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002542 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2543 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
Yann Collete42afbc2017-04-26 11:39:35 -07002544 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002545 matchLength = ml2, offset = offset2, start = ip;
2546 continue; /* search a better one */
2547 } }
2548
2549 /* let's find an even better one */
2550 if ((depth==2) && (ip<ilimit)) {
2551 ip ++;
2552 current++;
2553 /* check repCode */
2554 if (offset) {
Yann Collet302ff032016-07-03 01:28:16 +02002555 const U32 repIndex = (U32)(current - offset_1);
inikep64d7bcb2016-04-07 19:14:09 +02002556 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2557 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002558 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikep64d7bcb2016-04-07 19:14:09 +02002559 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2560 /* repcode detected */
2561 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002562 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
Yann Colletc17e0202017-04-20 12:50:02 -07002563 int const gain2 = (int)(repLength * 4);
2564 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
Yann Collete42afbc2017-04-26 11:39:35 -07002565 if ((repLength >= 4) && (gain2 > gain1))
inikep64d7bcb2016-04-07 19:14:09 +02002566 matchLength = repLength, offset = 0, start = ip;
2567 } }
2568
2569 /* search match, depth 2 */
2570 { size_t offset2=99999999;
2571 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet49bb0042016-06-04 20:17:38 +02002572 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2573 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
Yann Collete42afbc2017-04-26 11:39:35 -07002574 if ((ml2 >= 4) && (gain2 > gain1)) {
inikep64d7bcb2016-04-07 19:14:09 +02002575 matchLength = ml2, offset = offset2, start = ip;
2576 continue;
2577 } } }
2578 break; /* nothing found : store previous solution */
2579 }
2580
inikepfaa8d8a2016-04-05 19:01:10 +02002581 /* catch up */
inikep64d7bcb2016-04-07 19:14:09 +02002582 if (offset) {
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002583 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
inikepfaa8d8a2016-04-05 19:01:10 +02002584 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2585 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
inikep64d7bcb2016-04-07 19:14:09 +02002586 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
Yann Collet302ff032016-07-03 01:28:16 +02002587 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
Yann Collet48537162016-04-07 15:24:29 +02002588 }
inikepfaa8d8a2016-04-05 19:01:10 +02002589
inikepfaa8d8a2016-04-05 19:01:10 +02002590 /* store sequence */
inikep64d7bcb2016-04-07 19:14:09 +02002591_storeSequence:
inikepfaa8d8a2016-04-05 19:01:10 +02002592 { size_t const litLength = start - anchor;
Yann Colletd57dffb2016-07-03 01:48:26 +02002593 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
inikepfaa8d8a2016-04-05 19:01:10 +02002594 anchor = ip = start + matchLength;
2595 }
2596
2597 /* check immediate repcode */
2598 while (ip <= ilimit) {
Yann Collet302ff032016-07-03 01:28:16 +02002599 const U32 repIndex = (U32)((ip-base) - offset_2);
inikepfaa8d8a2016-04-05 19:01:10 +02002600 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2601 const BYTE* const repMatch = repBase + repIndex;
Yann Collet43dfe012016-06-13 21:43:06 +02002602 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
inikepfaa8d8a2016-04-05 19:01:10 +02002603 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2604 /* repcode detected we should take it */
2605 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collete42afbc2017-04-26 11:39:35 -07002606 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
Yann Collet302ff032016-07-03 01:28:16 +02002607 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
inikepfaa8d8a2016-04-05 19:01:10 +02002608 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2609 ip += matchLength;
2610 anchor = ip;
2611 continue; /* faster when present ... (?) */
2612 }
2613 break;
2614 } }
2615
Yann Collet4266c0a2016-06-14 01:49:25 +02002616 /* Save reps for next block */
Yann Colletb459aad2017-01-19 17:33:37 -08002617 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
Yann Collet4266c0a2016-06-14 01:49:25 +02002618
inikepfaa8d8a2016-04-05 19:01:10 +02002619 /* Last Literals */
2620 { size_t const lastLLSize = iend - anchor;
2621 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2622 seqStorePtr->lit += lastLLSize;
Yann Collet5106a762015-11-05 15:00:24 +01002623 }
2624}
2625
2626
Yann Collet59d1f792016-01-23 19:28:41 +01002627void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Collet9a24e592015-11-22 02:53:43 +01002628{
inikep64d7bcb2016-04-07 19:14:09 +02002629 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
Yann Collet9a24e592015-11-22 02:53:43 +01002630}
2631
Yann Collet59d1f792016-01-23 19:28:41 +01002632static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Colletb7fc88e2015-11-22 03:12:28 +01002633{
Yann Colleta1249dc2016-01-25 04:22:03 +01002634 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
Yann Colletb7fc88e2015-11-22 03:12:28 +01002635}
Yann Collet9a24e592015-11-22 02:53:43 +01002636
Yann Collet59d1f792016-01-23 19:28:41 +01002637static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Colleta85c77b2015-11-22 12:22:04 +01002638{
Yann Colleta1249dc2016-01-25 04:22:03 +01002639 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
Yann Colleta85c77b2015-11-22 12:22:04 +01002640}
2641
Yann Collet59d1f792016-01-23 19:28:41 +01002642static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Yann Collet5054ee02015-11-23 13:34:21 +01002643{
Yann Colleta1249dc2016-01-25 04:22:03 +01002644 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
Yann Collet5054ee02015-11-23 13:34:21 +01002645}
2646
inikepef519412016-04-21 11:08:43 +02002647
inikepef519412016-04-21 11:08:43 +02002648/* The optimal parser */
2649#include "zstd_opt.h"
2650
2651static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2652{
Yann Colletd4f4e582016-06-27 01:31:35 +02002653#ifdef ZSTD_OPT_H_91842398743
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002654 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
2655#else
2656 (void)ctx; (void)src; (void)srcSize;
2657 return;
2658#endif
2659}
2660
Nick Terrelleeb31ee2017-03-09 11:44:25 -08002661static void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002662{
2663#ifdef ZSTD_OPT_H_91842398743
2664 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
Yann Colletd4f4e582016-06-27 01:31:35 +02002665#else
2666 (void)ctx; (void)src; (void)srcSize;
2667 return;
2668#endif
inikepef519412016-04-21 11:08:43 +02002669}
2670
inikepd3b8d7a2016-02-22 10:06:17 +01002671static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
inikepe2bfe242016-01-31 11:25:48 +01002672{
Yann Colletd4f4e582016-06-27 01:31:35 +02002673#ifdef ZSTD_OPT_H_91842398743
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002674 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
2675#else
2676 (void)ctx; (void)src; (void)srcSize;
2677 return;
2678#endif
2679}
2680
Nick Terrelleeb31ee2017-03-09 11:44:25 -08002681static void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
Przemyslaw Skibinski5c5f01f2016-10-25 12:25:07 +02002682{
2683#ifdef ZSTD_OPT_H_91842398743
2684 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
Yann Colletd4f4e582016-06-27 01:31:35 +02002685#else
2686 (void)ctx; (void)src; (void)srcSize;
2687 return;
2688#endif
inikepe2bfe242016-01-31 11:25:48 +01002689}
2690
Yann Collet7a231792015-11-21 15:27:35 +01002691
Yann Collet59d1f792016-01-23 19:28:41 +01002692typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
Yann Collet59d70632015-11-04 12:05:27 +01002693
Yann Colletb923f652016-01-26 03:14:20 +01002694static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
Yann Collet59d70632015-11-04 12:05:27 +01002695{
Yann Colleta5ffe3d2017-05-12 16:29:19 -07002696 static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
2697 { NULL,
2698 ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
Yann Colletc17e0202017-04-20 12:50:02 -07002699 ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
Nick Terrell5f2c7212017-05-10 16:49:58 -07002700 ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra },
Yann Colleta5ffe3d2017-05-12 16:29:19 -07002701 { NULL,
2702 ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
Yann Colletc17e0202017-04-20 12:50:02 -07002703 ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
Nick Terrell5f2c7212017-05-10 16:49:58 -07002704 ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
Yann Collet7fe531e2015-11-29 02:38:09 +01002705 };
Yann Colleta5ffe3d2017-05-12 16:29:19 -07002706 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
Yann Collet7fe531e2015-11-29 02:38:09 +01002707
2708 return blockCompressor[extDict][(U32)strat];
Yann Collet59d70632015-11-04 12:05:27 +01002709}
2710
2711
Yann Colletd1b26842016-03-15 01:24:33 +01002712static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Yann Colletbe2010e2015-10-31 12:57:14 +01002713{
Yann Collet1ad7c822017-05-22 17:06:04 -07002714 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->lowLimit < zc->dictLimit);
inikep98e08cb2016-08-10 15:00:30 +02002715 const BYTE* const base = zc->base;
2716 const BYTE* const istart = (const BYTE*)src;
2717 const U32 current = (U32)(istart-base);
Yann Collet2ce49232016-02-02 14:36:49 +01002718 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
Yann Collet19cab462016-06-17 12:54:52 +02002719 ZSTD_resetSeqStore(&(zc->seqStore));
inikep98e08cb2016-08-10 15:00:30 +02002720 if (current > zc->nextToUpdate + 384)
Yann Colletc17e0202017-04-20 12:50:02 -07002721 zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */
Yann Collet59d1f792016-01-23 19:28:41 +01002722 blockCompressor(zc, src, srcSize);
Yann Colletd1b26842016-03-15 01:24:33 +01002723 return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
Yann Colletbe2010e2015-10-31 12:57:14 +01002724}
2725
2726
Yann Colletdb8e21d2017-05-12 13:46:49 -07002727/*! ZSTD_compress_frameChunk() :
Yann Colletc991cc12016-07-28 00:55:43 +02002728* Compress a chunk of data into one or multiple blocks.
2729* All blocks will be terminated, all input will be consumed.
2730* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2731* Frame is supposed already started (header already produced)
2732* @return : compressed size, or an error code
2733*/
Yann Colletdb8e21d2017-05-12 13:46:49 -07002734static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002735 void* dst, size_t dstCapacity,
Yann Colletc991cc12016-07-28 00:55:43 +02002736 const void* src, size_t srcSize,
2737 U32 lastFrameChunk)
Yann Colletf3eca252015-10-22 15:31:46 +01002738{
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002739 size_t blockSize = cctx->blockSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002740 size_t remaining = srcSize;
2741 const BYTE* ip = (const BYTE*)src;
2742 BYTE* const ostart = (BYTE*)dst;
2743 BYTE* op = ostart;
Yann Collet1ad7c822017-05-22 17:06:04 -07002744 U32 const maxDist = 1 << cctx->appliedParams.cParams.windowLog;
Yann Collet9b11b462015-11-01 12:40:22 +01002745
Yann Collet1ad7c822017-05-22 17:06:04 -07002746 if (cctx->appliedParams.fParams.checksumFlag && srcSize)
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002747 XXH64_update(&cctx->xxhState, src, srcSize);
2748
Yann Collet2ce49232016-02-02 14:36:49 +01002749 while (remaining) {
Yann Colletc991cc12016-07-28 00:55:43 +02002750 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
Yann Collet3e358272015-11-04 18:19:39 +01002751 size_t cSize;
2752
Yann Colletc17e0202017-04-20 12:50:02 -07002753 if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
2754 return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
Yann Collet3e358272015-11-04 18:19:39 +01002755 if (remaining < blockSize) blockSize = remaining;
Yann Collet89db5e02015-11-13 11:27:46 +01002756
Yann Collet346efcc2016-08-02 14:26:00 +02002757 /* preemptive overflow correction */
Sean Purcell881abe42017-03-07 16:52:23 -08002758 if (cctx->lowLimit > (3U<<29)) {
Yann Collet1ad7c822017-05-22 17:06:04 -07002759 U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->appliedParams.cParams.hashLog, cctx->appliedParams.cParams.strategy)) - 1;
Yann Colletc261f712016-12-12 00:25:07 +01002760 U32 const current = (U32)(ip - cctx->base);
Yann Collet1ad7c822017-05-22 17:06:04 -07002761 U32 const newCurrent = (current & cycleMask) + (1 << cctx->appliedParams.cParams.windowLog);
Yann Colletc261f712016-12-12 00:25:07 +01002762 U32 const correction = current - newCurrent;
2763 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
Yann Collet346efcc2016-08-02 14:26:00 +02002764 ZSTD_reduceIndex(cctx, correction);
2765 cctx->base += correction;
2766 cctx->dictBase += correction;
Yann Colletc261f712016-12-12 00:25:07 +01002767 cctx->lowLimit -= correction;
Yann Collet346efcc2016-08-02 14:26:00 +02002768 cctx->dictLimit -= correction;
2769 if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
2770 else cctx->nextToUpdate -= correction;
2771 }
2772
Yann Collet06e76972017-01-25 16:39:03 -08002773 if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
Yann Collet70e45772016-03-19 18:08:32 +01002774 /* enforce maxDist */
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002775 U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
2776 if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
2777 if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
Yann Colletc3652152015-11-24 14:06:07 +01002778 }
Yann Collet89db5e02015-11-13 11:27:46 +01002779
Yann Colletf2a3b6e2016-05-31 18:13:56 +02002780 cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
inikepfb5df612016-05-24 15:36:37 +02002781 if (ZSTD_isError(cSize)) return cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002782
Yann Collet2ce49232016-02-02 14:36:49 +01002783 if (cSize == 0) { /* block is not compressible */
Yann Colletc991cc12016-07-28 00:55:43 +02002784 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2785 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2786 MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
2787 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2788 cSize = ZSTD_blockHeaderSize+blockSize;
Yann Collet2ce49232016-02-02 14:36:49 +01002789 } else {
Yann Colletc991cc12016-07-28 00:55:43 +02002790 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
Yann Collet6fa05a22016-07-20 14:58:49 +02002791 MEM_writeLE24(op, cBlockHeader24);
Yann Colletc991cc12016-07-28 00:55:43 +02002792 cSize += ZSTD_blockHeaderSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002793 }
2794
2795 remaining -= blockSize;
Yann Colletd1b26842016-03-15 01:24:33 +01002796 dstCapacity -= cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002797 ip += blockSize;
2798 op += cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002799 }
2800
Yann Collet62470b42016-07-28 15:29:08 +02002801 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
Yann Colletf3eca252015-10-22 15:31:46 +01002802 return op-ostart;
2803}
2804
2805
Yann Collet6236eba2016-04-12 15:52:33 +02002806static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
Yann Colletc46fb922016-05-29 05:01:04 +02002807 ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
Yann Collet6236eba2016-04-12 15:52:33 +02002808{ BYTE* const op = (BYTE*)dst;
Yann Collet31533ba2017-04-27 00:29:04 -07002809 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2810 U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
Yann Collet731ef162016-07-27 21:05:12 +02002811 U32 const checksumFlag = params.fParams.checksumFlag>0;
2812 U32 const windowSize = 1U << params.cParams.windowLog;
Sean Purcell2db72492017-02-09 10:50:43 -08002813 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
Yann Collet731ef162016-07-27 21:05:12 +02002814 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2815 U32 const fcsCode = params.fParams.contentSizeFlag ?
Nick Terrell55fc1f92017-05-24 13:50:10 -07002816 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
Yann Collet731ef162016-07-27 21:05:12 +02002817 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
Yann Colletc46fb922016-05-29 05:01:04 +02002818 size_t pos;
2819
2820 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
Yann Collet009d6042017-05-19 10:17:59 -07002821 DEBUGLOG(5, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
Yann Collet0be6fd32017-05-08 16:08:01 -07002822 !params.fParams.noDictIDFlag, dictID, dictIDSizeCode);
Yann Collet6236eba2016-04-12 15:52:33 +02002823
2824 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
Yann Collet673f0d72016-06-06 00:26:38 +02002825 op[4] = frameHeaderDecriptionByte; pos=5;
Eric Biggerse4d02652016-07-26 10:42:19 -07002826 if (!singleSegment) op[pos++] = windowLogByte;
Yann Colletc46fb922016-05-29 05:01:04 +02002827 switch(dictIDSizeCode)
2828 {
Yann Colletcd2892f2017-06-01 09:44:54 -07002829 default: assert(0); /* impossible */
Yann Colletc46fb922016-05-29 05:01:04 +02002830 case 0 : break;
2831 case 1 : op[pos] = (BYTE)(dictID); pos++; break;
Yann Colletd4180ca2016-07-27 21:21:36 +02002832 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
Yann Colletc46fb922016-05-29 05:01:04 +02002833 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2834 }
Yann Collet673f0d72016-06-06 00:26:38 +02002835 switch(fcsCode)
Yann Collet6236eba2016-04-12 15:52:33 +02002836 {
Yann Colletcd2892f2017-06-01 09:44:54 -07002837 default: assert(0); /* impossible */
Eric Biggerse4d02652016-07-26 10:42:19 -07002838 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
Yann Collet673f0d72016-06-06 00:26:38 +02002839 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2840 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
Yann Colletc46fb922016-05-29 05:01:04 +02002841 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
Yann Collet6236eba2016-04-12 15:52:33 +02002842 }
Yann Colletc46fb922016-05-29 05:01:04 +02002843 return pos;
Yann Collet6236eba2016-04-12 15:52:33 +02002844}
2845
2846
Yann Collet346efcc2016-08-02 14:26:00 +02002847static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
Yann Collet7cbe79a2016-03-23 22:31:57 +01002848 void* dst, size_t dstCapacity,
Yann Colletbf42c8e2016-01-09 01:08:23 +01002849 const void* src, size_t srcSize,
Yann Colletc991cc12016-07-28 00:55:43 +02002850 U32 frame, U32 lastFrameChunk)
Yann Colletf3eca252015-10-22 15:31:46 +01002851{
Yann Collet2acb5d32015-10-29 16:49:43 +01002852 const BYTE* const ip = (const BYTE*) src;
Yann Collet6236eba2016-04-12 15:52:33 +02002853 size_t fhSize = 0;
Yann Colletecd651b2016-01-07 15:35:18 +01002854
Yann Collet346efcc2016-08-02 14:26:00 +02002855 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
Yann Colletd4180ca2016-07-27 21:21:36 +02002856
Yann Collet346efcc2016-08-02 14:26:00 +02002857 if (frame && (cctx->stage==ZSTDcs_init)) {
Yann Collet1ad7c822017-05-22 17:06:04 -07002858 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, cctx->frameContentSize, cctx->dictID);
Yann Collet6236eba2016-04-12 15:52:33 +02002859 if (ZSTD_isError(fhSize)) return fhSize;
2860 dstCapacity -= fhSize;
2861 dst = (char*)dst + fhSize;
Yann Collet346efcc2016-08-02 14:26:00 +02002862 cctx->stage = ZSTDcs_ongoing;
Yann Colletecd651b2016-01-07 15:35:18 +01002863 }
Yann Colletf3eca252015-10-22 15:31:46 +01002864
Yann Collet417890c2015-12-04 17:16:37 +01002865 /* Check if blocks follow each other */
Yann Collet346efcc2016-08-02 14:26:00 +02002866 if (src != cctx->nextSrc) {
Yann Collet417890c2015-12-04 17:16:37 +01002867 /* not contiguous */
Yann Collet346efcc2016-08-02 14:26:00 +02002868 ptrdiff_t const delta = cctx->nextSrc - ip;
2869 cctx->lowLimit = cctx->dictLimit;
2870 cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
2871 cctx->dictBase = cctx->base;
2872 cctx->base -= delta;
2873 cctx->nextToUpdate = cctx->dictLimit;
2874 if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
Yann Collet417890c2015-12-04 17:16:37 +01002875 }
2876
Yann Collet346efcc2016-08-02 14:26:00 +02002877 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
2878 if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
2879 ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
2880 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
2881 cctx->lowLimit = lowLimitMax;
Yann Colletf3eca252015-10-22 15:31:46 +01002882 }
2883
Yann Collet346efcc2016-08-02 14:26:00 +02002884 cctx->nextSrc = ip + srcSize;
Yann Collet89db5e02015-11-13 11:27:46 +01002885
Yann Collet5eb749e2017-01-11 18:21:25 +01002886 if (srcSize) {
2887 size_t const cSize = frame ?
Yann Colletdb8e21d2017-05-12 13:46:49 -07002888 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
Yann Collet346efcc2016-08-02 14:26:00 +02002889 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
Yann Colletecd651b2016-01-07 15:35:18 +01002890 if (ZSTD_isError(cSize)) return cSize;
Yann Collet20d5e032017-04-11 18:34:02 -07002891 cctx->consumedSrcSize += srcSize;
Yann Collet6236eba2016-04-12 15:52:33 +02002892 return cSize + fhSize;
Yann Collet5eb749e2017-01-11 18:21:25 +01002893 } else
2894 return fhSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002895}
2896
Yann Colletbf42c8e2016-01-09 01:08:23 +01002897
Yann Collet5b567392016-07-28 01:17:22 +02002898size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
Yann Collet7cbe79a2016-03-23 22:31:57 +01002899 void* dst, size_t dstCapacity,
Yann Colletbf42c8e2016-01-09 01:08:23 +01002900 const void* src, size_t srcSize)
2901{
Yann Collet20d5e032017-04-11 18:34:02 -07002902 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
Yann Collet5b567392016-07-28 01:17:22 +02002903}
2904
2905
Yann Colletfa3671e2017-05-19 10:51:30 -07002906size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
Yann Colletbf42c8e2016-01-09 01:08:23 +01002907{
Yann Colletfa3671e2017-05-19 10:51:30 -07002908 U32 const cLevel = cctx->compressionLevel;
2909 ZSTD_compressionParameters cParams = (cLevel == ZSTD_CLEVEL_CUSTOM) ?
Yann Collet1ad7c822017-05-22 17:06:04 -07002910 cctx->appliedParams.cParams :
Yann Colletfa3671e2017-05-19 10:51:30 -07002911 ZSTD_getCParams(cLevel, 0, 0);
2912 return MIN (ZSTD_BLOCKSIZE_MAX, 1 << cParams.windowLog);
Yann Colletcf05b9d2016-07-18 16:52:10 +02002913}
2914
2915size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2916{
Yann Colletfa3671e2017-05-19 10:51:30 -07002917 size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
Yann Collet961b6a02016-07-15 11:56:53 +02002918 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
Yann Collet20d5e032017-04-11 18:34:02 -07002919 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
Yann Colletbf42c8e2016-01-09 01:08:23 +01002920}
2921
Yann Collet16a0b102017-03-24 12:46:46 -07002922/*! ZSTD_loadDictionaryContent() :
2923 * @return : 0, or an error code
2924 */
Yann Colletb923f652016-01-26 03:14:20 +01002925static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
Yann Collet417890c2015-12-04 17:16:37 +01002926{
2927 const BYTE* const ip = (const BYTE*) src;
2928 const BYTE* const iend = ip + srcSize;
Yann Colletf3eca252015-10-22 15:31:46 +01002929
Yann Collet417890c2015-12-04 17:16:37 +01002930 /* input becomes current prefix */
2931 zc->lowLimit = zc->dictLimit;
2932 zc->dictLimit = (U32)(zc->nextSrc - zc->base);
2933 zc->dictBase = zc->base;
2934 zc->base += ip - zc->nextSrc;
2935 zc->nextToUpdate = zc->dictLimit;
Yann Collet06e76972017-01-25 16:39:03 -08002936 zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
Yann Collet417890c2015-12-04 17:16:37 +01002937
2938 zc->nextSrc = iend;
Yann Collet731ef162016-07-27 21:05:12 +02002939 if (srcSize <= HASH_READ_SIZE) return 0;
Yann Collet417890c2015-12-04 17:16:37 +01002940
Yann Collet1ad7c822017-05-22 17:06:04 -07002941 switch(zc->appliedParams.cParams.strategy)
Yann Collet417890c2015-12-04 17:16:37 +01002942 {
2943 case ZSTD_fast:
Yann Collet1ad7c822017-05-22 17:06:04 -07002944 ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
Yann Collet417890c2015-12-04 17:16:37 +01002945 break;
2946
Yann Collet45dc3562016-07-12 09:47:31 +02002947 case ZSTD_dfast:
Yann Collet1ad7c822017-05-22 17:06:04 -07002948 ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
Yann Collet45dc3562016-07-12 09:47:31 +02002949 break;
2950
Yann Collet417890c2015-12-04 17:16:37 +01002951 case ZSTD_greedy:
2952 case ZSTD_lazy:
2953 case ZSTD_lazy2:
Yann Collet16a0b102017-03-24 12:46:46 -07002954 if (srcSize >= HASH_READ_SIZE)
Yann Collet1ad7c822017-05-22 17:06:04 -07002955 ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->appliedParams.cParams.searchLength);
Yann Collet417890c2015-12-04 17:16:37 +01002956 break;
2957
2958 case ZSTD_btlazy2:
Yann Colletcefef8c2016-02-15 07:21:54 +01002959 case ZSTD_btopt:
Nick Terrelleeb31ee2017-03-09 11:44:25 -08002960 case ZSTD_btultra:
Yann Collet16a0b102017-03-24 12:46:46 -07002961 if (srcSize >= HASH_READ_SIZE)
Yann Collet1ad7c822017-05-22 17:06:04 -07002962 ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->appliedParams.cParams.searchLog, zc->appliedParams.cParams.searchLength);
Yann Collet417890c2015-12-04 17:16:37 +01002963 break;
2964
2965 default:
Yann Colletcd2892f2017-06-01 09:44:54 -07002966 assert(0); /* not possible : not a valid strategy id */
Yann Collet417890c2015-12-04 17:16:37 +01002967 }
2968
Nick Terrellecf90ca2017-02-13 18:27:34 -08002969 zc->nextToUpdate = (U32)(iend - zc->base);
Yann Collet417890c2015-12-04 17:16:37 +01002970 return 0;
2971}
2972
2973
Nick Terrellf9c9af32016-10-19 17:22:08 -07002974/* Dictionaries that assign zero probability to symbols that show up causes problems
2975 when FSE encoding. Refuse dictionaries that assign zero probability to symbols
2976 that we may encounter during compression.
2977 NOTE: This behavior is not standard and could be improved in the future. */
2978static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
2979 U32 s;
2980 if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
2981 for (s = 0; s <= maxSymbolValue; ++s) {
2982 if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
2983 }
2984 return 0;
2985}
2986
2987
Yann Colletb923f652016-01-26 03:14:20 +01002988/* Dictionary format :
Yann Colletbea78e82017-03-22 18:09:11 -07002989 * See :
2990 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
2991 */
Yann Collet16a0b102017-03-24 12:46:46 -07002992/*! ZSTD_loadZstdDictionary() :
2993 * @return : 0, or an error code
2994 * assumptions : magic number supposed already checked
2995 * dictSize supposed > 8
Yann Colletbea78e82017-03-22 18:09:11 -07002996 */
Yann Collet16a0b102017-03-24 12:46:46 -07002997static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
Yann Colletb923f652016-01-26 03:14:20 +01002998{
Yann Collet52a06222016-06-15 13:53:34 +02002999 const BYTE* dictPtr = (const BYTE*)dict;
3000 const BYTE* const dictEnd = dictPtr + dictSize;
Nick Terrellf9c9af32016-10-19 17:22:08 -07003001 short offcodeNCount[MaxOff+1];
3002 unsigned offcodeMaxValue = MaxOff;
Yann Collet643d9a22016-12-01 16:24:04 -08003003 BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
Yann Colletfb810d62016-01-28 00:18:06 +01003004
Yann Colletbea78e82017-03-22 18:09:11 -07003005 dictPtr += 4; /* skip magic number */
Yann Collet1ad7c822017-05-22 17:06:04 -07003006 cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
Yann Colletbea78e82017-03-22 18:09:11 -07003007 dictPtr += 4;
3008
Yann Collet71ddeb62017-04-20 22:54:54 -07003009 { size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003010 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003011 dictPtr += hufHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003012 }
Yann Colletfb810d62016-01-28 00:18:06 +01003013
Nick Terrellf9c9af32016-10-19 17:22:08 -07003014 { unsigned offcodeLog;
Yann Collet52a06222016-06-15 13:53:34 +02003015 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003016 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
Nick Terrellbfd943a2016-10-17 16:55:52 -07003017 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
Nick Terrellf9c9af32016-10-19 17:22:08 -07003018 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
Yann Colletc17e0202017-04-20 12:50:02 -07003019 CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)),
3020 dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003021 dictPtr += offcodeHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003022 }
Yann Colletfb810d62016-01-28 00:18:06 +01003023
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003024 { short matchlengthNCount[MaxML+1];
Nick Terrellbfd943a2016-10-17 16:55:52 -07003025 unsigned matchlengthMaxValue = MaxML, matchlengthLog;
Yann Collet52a06222016-06-15 13:53:34 +02003026 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003027 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
Nick Terrellbfd943a2016-10-17 16:55:52 -07003028 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
Nick Terrellf9c9af32016-10-19 17:22:08 -07003029 /* Every match length code must have non-zero probability */
Yann Colletc17e0202017-04-20 12:50:02 -07003030 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
3031 CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)),
3032 dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003033 dictPtr += matchlengthHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003034 }
Yann Colletfb810d62016-01-28 00:18:06 +01003035
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003036 { short litlengthNCount[MaxLL+1];
Nick Terrellbfd943a2016-10-17 16:55:52 -07003037 unsigned litlengthMaxValue = MaxLL, litlengthLog;
Yann Collet52a06222016-06-15 13:53:34 +02003038 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003039 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
Nick Terrellbfd943a2016-10-17 16:55:52 -07003040 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
Nick Terrellf9c9af32016-10-19 17:22:08 -07003041 /* Every literal length code must have non-zero probability */
Yann Colletc17e0202017-04-20 12:50:02 -07003042 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
3043 CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)),
3044 dictionary_corrupted);
Yann Collet52a06222016-06-15 13:53:34 +02003045 dictPtr += litlengthHeaderSize;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003046 }
Yann Colletfb810d62016-01-28 00:18:06 +01003047
Yann Collet52a06222016-06-15 13:53:34 +02003048 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
Yann Collet16a0b102017-03-24 12:46:46 -07003049 cctx->rep[0] = MEM_readLE32(dictPtr+0);
3050 cctx->rep[1] = MEM_readLE32(dictPtr+4);
3051 cctx->rep[2] = MEM_readLE32(dictPtr+8);
Yann Collet52a06222016-06-15 13:53:34 +02003052 dictPtr += 12;
3053
Yann Colletbea78e82017-03-22 18:09:11 -07003054 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
3055 U32 offcodeMax = MaxOff;
3056 if (dictContentSize <= ((U32)-1) - 128 KB) {
3057 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
3058 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
Nick Terrellb2c39a22016-10-24 14:11:27 -07003059 }
Yann Colletbea78e82017-03-22 18:09:11 -07003060 /* All offset values <= dictContentSize + 128 KB must be representable */
Nick Terrellf9c9af32016-10-19 17:22:08 -07003061 CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
Yann Colletbea78e82017-03-22 18:09:11 -07003062 /* All repCodes must be <= dictContentSize and != 0*/
3063 { U32 u;
3064 for (u=0; u<3; u++) {
3065 if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted);
3066 if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
Yann Collet16a0b102017-03-24 12:46:46 -07003067 } }
Nick Terrellf9c9af32016-10-19 17:22:08 -07003068
Yann Collet71ddeb62017-04-20 22:54:54 -07003069 cctx->fseCTables_ready = 1;
3070 cctx->hufCTable_repeatMode = HUF_repeat_valid;
Yann Collet16a0b102017-03-24 12:46:46 -07003071 return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
3072 }
Yann Colletb923f652016-01-26 03:14:20 +01003073}
3074
Yann Colletd1b26842016-03-15 01:24:33 +01003075/** ZSTD_compress_insertDictionary() :
3076* @return : 0, or an error code */
Yann Collet16a0b102017-03-24 12:46:46 -07003077static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
Yann Colletb923f652016-01-26 03:14:20 +01003078{
Yann Colletc46fb922016-05-29 05:01:04 +02003079 if ((dict==NULL) || (dictSize<=8)) return 0;
Yann Colletb923f652016-01-26 03:14:20 +01003080
Yann Collet14312d82017-02-23 23:42:12 -08003081 /* dict as pure content */
Yann Collet16a0b102017-03-24 12:46:46 -07003082 if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
3083 return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
Yann Colletd1b26842016-03-15 01:24:33 +01003084
Yann Colletbea78e82017-03-22 18:09:11 -07003085 /* dict as zstd dictionary */
Yann Collet16a0b102017-03-24 12:46:46 -07003086 return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
Yann Colletecd651b2016-01-07 15:35:18 +01003087}
3088
Yann Collet27caf2a2016-04-01 15:48:48 +02003089/*! ZSTD_compressBegin_internal() :
Yann Colletecd651b2016-01-07 15:35:18 +01003090* @return : 0, or an error code */
Yann Collet8c910d22017-06-03 01:15:02 -07003091size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
Yann Collet1c8e1942016-01-26 16:31:22 +01003092 const void* dict, size_t dictSize,
Yann Collet18803372017-05-22 18:21:51 -07003093 const ZSTD_CDict* cdict,
Yann Collet5ac72b42017-05-23 11:18:24 -07003094 ZSTD_parameters params, U64 pledgedSrcSize,
3095 ZSTD_buffered_policy_e zbuff)
Yann Colletf3eca252015-10-22 15:31:46 +01003096{
Yann Collet5ac72b42017-05-23 11:18:24 -07003097 /* params are supposed to be fully validated at this point */
Yann Colletab9162e2017-04-11 10:46:20 -07003098 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
Yann Collet18803372017-05-22 18:21:51 -07003099 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3100
3101 if (cdict && cdict->dictContentSize>0)
Yann Collet5ac72b42017-05-23 11:18:24 -07003102 return ZSTD_copyCCtx_internal(cctx, cdict->refContext,
3103 params.fParams, pledgedSrcSize);
Yann Collet18803372017-05-22 18:21:51 -07003104
Yann Collet5ac72b42017-05-23 11:18:24 -07003105 CHECK_F(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
3106 ZSTDcrp_continue, zbuff));
Yann Colleta7737f62016-09-06 09:44:59 +02003107 return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
Yann Collet88fcd292015-11-25 14:42:45 +01003108}
3109
3110
Yann Collet27caf2a2016-04-01 15:48:48 +02003111/*! ZSTD_compressBegin_advanced() :
3112* @return : 0, or an error code */
Yann Collet81e13ef2016-06-07 00:51:51 +02003113size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
Yann Collet27caf2a2016-04-01 15:48:48 +02003114 const void* dict, size_t dictSize,
Yann Collet52c04fe2016-07-07 11:53:18 +02003115 ZSTD_parameters params, unsigned long long pledgedSrcSize)
Yann Collet27caf2a2016-04-01 15:48:48 +02003116{
3117 /* compression parameters verification and optimization */
Yann Colletcf409a72016-09-26 16:41:05 +02003118 CHECK_F(ZSTD_checkCParams(params.cParams));
Yann Collet5ac72b42017-05-23 11:18:24 -07003119 return ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL,
3120 params, pledgedSrcSize, ZSTDb_not_buffered);
Yann Collet27caf2a2016-04-01 15:48:48 +02003121}
3122
3123
Yann Collet81e13ef2016-06-07 00:51:51 +02003124size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
Yann Colletb923f652016-01-26 03:14:20 +01003125{
Yann Collet6c6e1752016-06-27 15:28:45 +02003126 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
Yann Collet5ac72b42017-05-23 11:18:24 -07003127 return ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL,
3128 params, 0, ZSTDb_not_buffered);
Yann Collet1c8e1942016-01-26 16:31:22 +01003129}
Yann Collet083fcc82015-10-25 14:06:35 +01003130
inikep19bd48f2016-04-04 12:10:00 +02003131
Yann Colletb05c4822017-01-12 02:01:28 +01003132size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
Yann Collet083fcc82015-10-25 14:06:35 +01003133{
Yann Colletb05c4822017-01-12 02:01:28 +01003134 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
Yann Collet083fcc82015-10-25 14:06:35 +01003135}
3136
3137
Yann Collet62470b42016-07-28 15:29:08 +02003138/*! ZSTD_writeEpilogue() :
3139* Ends a frame.
Yann Collet88fcd292015-11-25 14:42:45 +01003140* @return : nb of bytes written into dst (or an error code) */
Yann Collet62470b42016-07-28 15:29:08 +02003141static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
Yann Collet2acb5d32015-10-29 16:49:43 +01003142{
Yann Colletc991cc12016-07-28 00:55:43 +02003143 BYTE* const ostart = (BYTE*)dst;
3144 BYTE* op = ostart;
Yann Collet6236eba2016-04-12 15:52:33 +02003145 size_t fhSize = 0;
Yann Collet2acb5d32015-10-29 16:49:43 +01003146
Yann Collet009d6042017-05-19 10:17:59 -07003147 DEBUGLOG(5, "ZSTD_writeEpilogue");
Yann Collet87c18b22016-08-26 01:43:47 +02003148 if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
Yann Collet887e7da2016-04-11 20:12:27 +02003149
3150 /* special case : empty frame */
Yann Colletc991cc12016-07-28 00:55:43 +02003151 if (cctx->stage == ZSTDcs_init) {
Yann Collet1ad7c822017-05-22 17:06:04 -07003152 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
Yann Collet6236eba2016-04-12 15:52:33 +02003153 if (ZSTD_isError(fhSize)) return fhSize;
3154 dstCapacity -= fhSize;
3155 op += fhSize;
Yann Collet731ef162016-07-27 21:05:12 +02003156 cctx->stage = ZSTDcs_ongoing;
Yann Colletecd651b2016-01-07 15:35:18 +01003157 }
3158
Yann Colletc991cc12016-07-28 00:55:43 +02003159 if (cctx->stage != ZSTDcs_ending) {
3160 /* write one last empty block, make it the "last" block */
3161 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
3162 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
3163 MEM_writeLE32(op, cBlockHeader24);
3164 op += ZSTD_blockHeaderSize;
3165 dstCapacity -= ZSTD_blockHeaderSize;
3166 }
3167
Yann Collet1ad7c822017-05-22 17:06:04 -07003168 if (cctx->appliedParams.fParams.checksumFlag) {
Yann Colletc991cc12016-07-28 00:55:43 +02003169 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
3170 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
3171 MEM_writeLE32(op, checksum);
3172 op += 4;
Yann Colletf2a3b6e2016-05-31 18:13:56 +02003173 }
Yann Collet2acb5d32015-10-29 16:49:43 +01003174
Yann Collet731ef162016-07-27 21:05:12 +02003175 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
Yann Colletc991cc12016-07-28 00:55:43 +02003176 return op-ostart;
Yann Collet2acb5d32015-10-29 16:49:43 +01003177}
3178
Yann Colletfd416f12016-01-30 03:14:15 +01003179
Yann Collet62470b42016-07-28 15:29:08 +02003180size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
3181 void* dst, size_t dstCapacity,
3182 const void* src, size_t srcSize)
3183{
3184 size_t endResult;
Yann Collet009d6042017-05-19 10:17:59 -07003185 size_t const cSize = ZSTD_compressContinue_internal(cctx,
3186 dst, dstCapacity, src, srcSize,
3187 1 /* frame mode */, 1 /* last chunk */);
Yann Collet62470b42016-07-28 15:29:08 +02003188 if (ZSTD_isError(cSize)) return cSize;
3189 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
3190 if (ZSTD_isError(endResult)) return endResult;
Yann Collet1ad7c822017-05-22 17:06:04 -07003191 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
Yann Collet0be6fd32017-05-08 16:08:01 -07003192 if (cctx->frameContentSize != cctx->consumedSrcSize)
3193 return ERROR(srcSize_wrong);
Yann Collet20d5e032017-04-11 18:34:02 -07003194 }
Yann Collet62470b42016-07-28 15:29:08 +02003195 return cSize + endResult;
3196}
3197
3198
Yann Collet19c10022016-07-28 01:25:46 +02003199static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
Yann Colletd1b26842016-03-15 01:24:33 +01003200 void* dst, size_t dstCapacity,
Yann Collet88fcd292015-11-25 14:42:45 +01003201 const void* src, size_t srcSize,
Yann Collet31683c02015-12-18 01:26:48 +01003202 const void* dict,size_t dictSize,
Yann Collet88fcd292015-11-25 14:42:45 +01003203 ZSTD_parameters params)
Yann Colletf3eca252015-10-22 15:31:46 +01003204{
Yann Collet5ac72b42017-05-23 11:18:24 -07003205 CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL,
3206 params, srcSize, ZSTDb_not_buffered));
Yann Collet62470b42016-07-28 15:29:08 +02003207 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
Yann Colletf3eca252015-10-22 15:31:46 +01003208}
3209
Yann Collet21588e32016-03-30 16:50:44 +02003210size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
3211 void* dst, size_t dstCapacity,
3212 const void* src, size_t srcSize,
3213 const void* dict,size_t dictSize,
3214 ZSTD_parameters params)
3215{
Yann Colletcf409a72016-09-26 16:41:05 +02003216 CHECK_F(ZSTD_checkCParams(params.cParams));
Yann Collet21588e32016-03-30 16:50:44 +02003217 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
3218}
3219
Yann Colletc17e0202017-04-20 12:50:02 -07003220size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
3221 const void* dict, size_t dictSize, int compressionLevel)
Yann Collet31683c02015-12-18 01:26:48 +01003222{
Yann Collet407a11f2016-11-03 15:52:01 -07003223 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
Yann Collet3b719252016-03-30 19:48:05 +02003224 params.fParams.contentSizeFlag = 1;
Yann Collet21588e32016-03-30 16:50:44 +02003225 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
Yann Collet31683c02015-12-18 01:26:48 +01003226}
3227
Yann Colletd1b26842016-03-15 01:24:33 +01003228size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
Yann Collet083fcc82015-10-25 14:06:35 +01003229{
Yann Collet21588e32016-03-30 16:50:44 +02003230 return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
Yann Collet083fcc82015-10-25 14:06:35 +01003231}
3232
Yann Colletd1b26842016-03-15 01:24:33 +01003233size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
Yann Colletf3eca252015-10-22 15:31:46 +01003234{
Yann Collet44fe9912015-10-29 22:02:40 +01003235 size_t result;
Yann Collet5be2dd22015-11-11 13:43:58 +01003236 ZSTD_CCtx ctxBody;
Yann Collet712def92015-10-29 18:41:45 +01003237 memset(&ctxBody, 0, sizeof(ctxBody));
Yann Colletae728a42017-05-30 17:11:39 -07003238 ctxBody.customMem = ZSTD_defaultCMem;
Yann Colletd1b26842016-03-15 01:24:33 +01003239 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
Yann Colletae728a42017-05-30 17:11:39 -07003240 ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
Yann Collet44fe9912015-10-29 22:02:40 +01003241 return result;
Yann Colletf3eca252015-10-22 15:31:46 +01003242}
Yann Colletfdcad6d2015-12-17 23:50:15 +01003243
Yann Colletfd416f12016-01-30 03:14:15 +01003244
Yann Collet81e13ef2016-06-07 00:51:51 +02003245/* ===== Dictionary API ===== */
3246
Yann Colleta1d67042017-05-08 17:51:49 -07003247/*! ZSTD_estimateCDictSize() :
3248 * Estimate amount of memory that will be needed to create a dictionary with following arguments */
Yann Collet25989e32017-05-25 15:07:37 -07003249size_t ZSTD_estimateCDictSize(ZSTD_compressionParameters cParams, size_t dictSize, unsigned byReference)
Yann Colleta1d67042017-05-08 17:51:49 -07003250{
Yann Collet25989e32017-05-25 15:07:37 -07003251 return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize(cParams)
3252 + (byReference ? 0 : dictSize);
Yann Colleta1d67042017-05-08 17:51:49 -07003253}
3254
Yann Colletd7c65892016-09-15 02:50:27 +02003255size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
3256{
3257 if (cdict==NULL) return 0; /* support sizeof on NULL */
Yann Colletaca113f2016-12-23 22:25:03 +01003258 return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
Yann Colletd7c65892016-09-15 02:50:27 +02003259}
3260
Yann Collet1c3ab0c2017-04-27 12:57:11 -07003261static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_frameParameters fParams)
3262{
3263 ZSTD_parameters params;
3264 params.cParams = cParams;
3265 params.fParams = fParams;
3266 return params;
3267}
3268
Yann Colletcdf7e822017-05-25 18:05:49 -07003269static size_t ZSTD_initCDict_internal(
3270 ZSTD_CDict* cdict,
3271 const void* dictBuffer, size_t dictSize, unsigned byReference,
3272 ZSTD_compressionParameters cParams)
3273{
3274 if ((byReference) || (!dictBuffer) || (!dictSize)) {
3275 cdict->dictBuffer = NULL;
3276 cdict->dictContent = dictBuffer;
3277 } else {
3278 void* const internalBuffer = ZSTD_malloc(dictSize, cdict->refContext->customMem);
3279 if (!internalBuffer) return ERROR(memory_allocation);
3280 memcpy(internalBuffer, dictBuffer, dictSize);
3281 cdict->dictBuffer = internalBuffer;
3282 cdict->dictContent = internalBuffer;
3283 }
3284 cdict->dictContentSize = dictSize;
3285
3286 { ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */,
3287 0 /* checksumFlag */, 0 /* noDictIDFlag */ }; /* dummy */
3288 ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams);
3289 CHECK_F( ZSTD_compressBegin_advanced(cdict->refContext,
3290 cdict->dictContent, dictSize,
3291 params, 0 /* srcSize */) );
3292 }
3293
3294 return 0;
3295}
3296
Yann Collet1f57c2e2016-12-21 16:20:11 +01003297ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
Yann Collet31533ba2017-04-27 00:29:04 -07003298 ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
Yann Collet81e13ef2016-06-07 00:51:51 +02003299{
Yann Collet8b21ec42017-05-19 19:46:15 -07003300 DEBUGLOG(5, "ZSTD_createCDict_advanced");
Yann Colletae728a42017-05-30 17:11:39 -07003301 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
Yann Collet81e13ef2016-06-07 00:51:51 +02003302
Yann Collet23b6e052016-08-28 21:05:43 -07003303 { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
Yann Collet81e13ef2016-06-07 00:51:51 +02003304 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
3305
Yann Collet1f57c2e2016-12-21 16:20:11 +01003306 if (!cdict || !cctx) {
Yann Collet23b6e052016-08-28 21:05:43 -07003307 ZSTD_free(cdict, customMem);
Przemyslaw Skibinskid8114e52017-02-21 18:59:56 +01003308 ZSTD_freeCCtx(cctx);
Yann Collet81e13ef2016-06-07 00:51:51 +02003309 return NULL;
3310 }
Yann Colletcdf7e822017-05-25 18:05:49 -07003311 cdict->refContext = cctx;
Yann Collet81e13ef2016-06-07 00:51:51 +02003312
Yann Colletcdf7e822017-05-25 18:05:49 -07003313 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3314 dictBuffer, dictSize, byReference,
3315 cParams) )) {
3316 ZSTD_freeCDict(cdict);
3317 return NULL;
Nick Terrell3b9cdf92016-10-12 20:54:42 -07003318 }
Yann Collet1f57c2e2016-12-21 16:20:11 +01003319
Yann Collet81e13ef2016-06-07 00:51:51 +02003320 return cdict;
3321 }
3322}
3323
3324ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
3325{
3326 ZSTD_customMem const allocator = { NULL, NULL, NULL };
Yann Collet31533ba2017-04-27 00:29:04 -07003327 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3328 return ZSTD_createCDict_advanced(dict, dictSize, 0, cParams, allocator);
Yann Collet1f57c2e2016-12-21 16:20:11 +01003329}
3330
3331ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
3332{
3333 ZSTD_customMem const allocator = { NULL, NULL, NULL };
Yann Collet31533ba2017-04-27 00:29:04 -07003334 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3335 return ZSTD_createCDict_advanced(dict, dictSize, 1, cParams, allocator);
Yann Collet81e13ef2016-06-07 00:51:51 +02003336}
3337
3338size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
3339{
Yann Collet23b6e052016-08-28 21:05:43 -07003340 if (cdict==NULL) return 0; /* support free on NULL */
Yann Collet993060e2016-09-21 16:46:08 +02003341 { ZSTD_customMem const cMem = cdict->refContext->customMem;
Yann Collet23b6e052016-08-28 21:05:43 -07003342 ZSTD_freeCCtx(cdict->refContext);
Yann Collet4e5eea62016-12-21 16:44:35 +01003343 ZSTD_free(cdict->dictBuffer, cMem);
Yann Collet23b6e052016-08-28 21:05:43 -07003344 ZSTD_free(cdict, cMem);
3345 return 0;
3346 }
Yann Collet81e13ef2016-06-07 00:51:51 +02003347}
3348
Yann Colletcdf7e822017-05-25 18:05:49 -07003349/*! ZSTD_initStaticCDict_advanced() :
3350 * Generate a digested dictionary in provided memory area.
3351 * workspace: The memory area to emplace the dictionary into.
3352 * Provided pointer must 8-bytes aligned.
3353 * It must outlive dictionary usage.
3354 * workspaceSize: Use ZSTD_estimateCDictSize()
3355 * to determine how large workspace must be.
3356 * cParams : use ZSTD_getCParams() to transform a compression level
3357 * into its relevants cParams.
3358 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
3359 * Note : there is no corresponding "free" function.
3360 * Since workspace was allocated externally, it must be freed externally.
3361 */
3362ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
3363 const void* dict, size_t dictSize, unsigned byReference,
3364 ZSTD_compressionParameters cParams)
3365{
3366 size_t const cctxSize = ZSTD_estimateCCtxSize(cParams);
3367 size_t const neededSize = sizeof(ZSTD_CDict) + (byReference ? 0 : dictSize)
3368 + cctxSize;
3369 ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
3370 void* ptr;
3371 DEBUGLOG(2, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7);
3372 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
3373 DEBUGLOG(2, "(workspaceSize < neededSize) : (%u < %u) => %u",
3374 (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
3375 if (workspaceSize < neededSize) return NULL;
3376
3377 if (!byReference) {
3378 memcpy(cdict+1, dict, dictSize);
3379 dict = cdict+1;
3380 ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
3381 } else {
3382 ptr = cdict+1;
3383 }
3384 cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
3385
3386 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3387 dict, dictSize, 1 /* by Reference */,
3388 cParams) ))
3389 return NULL;
3390
3391 return cdict;
3392}
3393
Yann Collet8c910d22017-06-03 01:15:02 -07003394ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
Yann Collet95162342016-10-25 16:19:52 -07003395 return ZSTD_getParamsFromCCtx(cdict->refContext);
3396}
3397
Yann Collet715b9aa2017-04-18 13:55:53 -07003398/* ZSTD_compressBegin_usingCDict_advanced() :
Yann Collet4f818182017-04-17 17:57:35 -07003399 * cdict must be != NULL */
Yann Collet715b9aa2017-04-18 13:55:53 -07003400size_t ZSTD_compressBegin_usingCDict_advanced(
Yann Collet4f818182017-04-17 17:57:35 -07003401 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
3402 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
Yann Collet4cb21292016-09-15 14:54:07 +02003403{
Yann Collet5ac72b42017-05-23 11:18:24 -07003404 if (cdict==NULL) return ERROR(dictionary_wrong);
Yann Collet18803372017-05-22 18:21:51 -07003405 { ZSTD_parameters params = cdict->refContext->appliedParams;
Yann Collet4f818182017-04-17 17:57:35 -07003406 params.fParams = fParams;
Yann Collet18803372017-05-22 18:21:51 -07003407 DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced");
Yann Collet5ac72b42017-05-23 11:18:24 -07003408 return ZSTD_compressBegin_internal(cctx, NULL, 0, cdict,
3409 params, pledgedSrcSize, ZSTDb_not_buffered);
Sean Purcell2db72492017-02-09 10:50:43 -08003410 }
Yann Collet4cb21292016-09-15 14:54:07 +02003411}
3412
Yann Collet4f818182017-04-17 17:57:35 -07003413/* ZSTD_compressBegin_usingCDict() :
3414 * pledgedSrcSize=0 means "unknown"
3415 * if pledgedSrcSize>0, it will enable contentSizeFlag */
Yann Collet768df122017-04-26 15:42:10 -07003416size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
Yann Collet4f818182017-04-17 17:57:35 -07003417{
Yann Collet768df122017-04-26 15:42:10 -07003418 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
Yann Collet009d6042017-05-19 10:17:59 -07003419 DEBUGLOG(5, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
Yann Collet768df122017-04-26 15:42:10 -07003420 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
Yann Collet4f818182017-04-17 17:57:35 -07003421}
3422
Yann Colletf4bd8572017-04-27 11:31:55 -07003423size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
3424 void* dst, size_t dstCapacity,
3425 const void* src, size_t srcSize,
3426 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
3427{
3428 CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
3429 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
Yann Collet81e13ef2016-06-07 00:51:51 +02003430}
3431
Yann Collet07639052016-08-03 01:57:57 +02003432/*! ZSTD_compress_usingCDict() :
Yann Collet4f818182017-04-17 17:57:35 -07003433 * Compression using a digested Dictionary.
3434 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
3435 * Note that compression parameters are decided at CDict creation time
3436 * while frame parameters are hardcoded */
Yann Collet4cb21292016-09-15 14:54:07 +02003437size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
3438 void* dst, size_t dstCapacity,
3439 const void* src, size_t srcSize,
3440 const ZSTD_CDict* cdict)
Yann Collet81e13ef2016-06-07 00:51:51 +02003441{
Yann Collet4f818182017-04-17 17:57:35 -07003442 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
Yann Colletf4bd8572017-04-27 11:31:55 -07003443 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
Yann Collet81e13ef2016-06-07 00:51:51 +02003444}
3445
3446
3447
Yann Collet104e5b02016-08-12 13:04:27 +02003448/* ******************************************************************
3449* Streaming
3450********************************************************************/
Yann Collet5a0c8e22016-08-12 01:20:36 +02003451
Yann Collet5a0c8e22016-08-12 01:20:36 +02003452ZSTD_CStream* ZSTD_createCStream(void)
3453{
Yann Colletae728a42017-05-30 17:11:39 -07003454 return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003455}
3456
3457ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
Yann Colletae728a42017-05-30 17:11:39 -07003458{ /* CStream and CCtx are now same object */
Yann Collet6fb2f242017-05-10 11:06:06 -07003459 return ZSTD_createCCtx_advanced(customMem);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003460}
3461
3462size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
3463{
Yann Collet78553662017-05-08 17:15:00 -07003464 return ZSTD_freeCCtx(zcs); /* same object */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003465}
3466
Yann Collet5a0c8e22016-08-12 01:20:36 +02003467
3468
Yann Collet104e5b02016-08-12 13:04:27 +02003469/*====== Initialization ======*/
3470
Yann Colletfa3671e2017-05-19 10:51:30 -07003471size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003472
Yann Colletc17e0202017-04-20 12:50:02 -07003473size_t ZSTD_CStreamOutSize(void)
3474{
Yann Colletfa3671e2017-05-19 10:51:30 -07003475 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
Yann Colletc17e0202017-04-20 12:50:02 -07003476}
Yann Collet5a0c8e22016-08-12 01:20:36 +02003477
Yann Collet1ad7c822017-05-22 17:06:04 -07003478static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
3479 ZSTD_parameters params,
3480 unsigned long long pledgedSrcSize)
Yann Collet4cb21292016-09-15 14:54:07 +02003481{
Yann Collet1ad7c822017-05-22 17:06:04 -07003482 DEBUGLOG(5, "ZSTD_resetCStream_internal");
Yann Collet31533ba2017-04-27 00:29:04 -07003483
Yann Collet5ac72b42017-05-23 11:18:24 -07003484 CHECK_F(ZSTD_compressBegin_internal(zcs, NULL, 0, zcs->cdict,
3485 params, pledgedSrcSize, ZSTDb_buffered));
Yann Collet4cb21292016-09-15 14:54:07 +02003486
3487 zcs->inToCompress = 0;
3488 zcs->inBuffPos = 0;
3489 zcs->inBuffTarget = zcs->blockSize;
3490 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
Yann Collet0be6fd32017-05-08 16:08:01 -07003491 zcs->streamStage = zcss_load;
Yann Collet4cb21292016-09-15 14:54:07 +02003492 zcs->frameEnded = 0;
3493 return 0; /* ready to go */
3494}
3495
Yann Collet009d6042017-05-19 10:17:59 -07003496size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
3497{
Yann Collet1ad7c822017-05-22 17:06:04 -07003498 ZSTD_parameters params = zcs->requestedParams;
Yann Collet009d6042017-05-19 10:17:59 -07003499 params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
Yann Colletb0739bc2017-05-22 17:45:15 -07003500 DEBUGLOG(5, "ZSTD_resetCStream");
Yann Collet009d6042017-05-19 10:17:59 -07003501 if (zcs->compressionLevel != ZSTD_CLEVEL_CUSTOM) {
3502 params.cParams = ZSTD_getCParams(zcs->compressionLevel, pledgedSrcSize, 0 /* dictSize */);
3503 }
Yann Collet5ac72b42017-05-23 11:18:24 -07003504 return ZSTD_resetCStream_internal(zcs, params, pledgedSrcSize);
Yann Collet009d6042017-05-19 10:17:59 -07003505}
3506
Yann Collet8c910d22017-06-03 01:15:02 -07003507size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
3508 const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
3509 ZSTD_parameters params, unsigned long long pledgedSrcSize)
Yann Collete88034f2017-04-10 22:24:02 -07003510{
3511 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
Yann Collet8c910d22017-06-03 01:15:02 -07003512 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
Yann Collete88034f2017-04-10 22:24:02 -07003513
3514 if (dict && dictSize >= 8) {
Yann Colletc7fe2622017-05-23 13:16:00 -07003515 if (zcs->staticSize) { /* static CCtx : never uses malloc */
3516 /* incompatible with internal cdict creation */
3517 return ERROR(memory_allocation);
3518 }
Yann Collete88034f2017-04-10 22:24:02 -07003519 ZSTD_freeCDict(zcs->cdictLocal);
Yann Collet8c910d22017-06-03 01:15:02 -07003520 zcs->cdict = NULL;
Yann Collet31533ba2017-04-27 00:29:04 -07003521 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* copy */, params.cParams, zcs->customMem);
Yann Collete88034f2017-04-10 22:24:02 -07003522 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
3523 zcs->cdict = zcs->cdictLocal;
Yann Collet8c910d22017-06-03 01:15:02 -07003524 } else {
3525 if (cdict) {
3526 ZSTD_parameters const cdictParams = ZSTD_getParamsFromCDict(cdict);
3527 params.cParams = cdictParams.cParams; /* cParams are enforced from cdict */
3528 }
3529 zcs->cdict = cdict;
Yann Collete88034f2017-04-10 22:24:02 -07003530 }
3531
Yann Collet8c910d22017-06-03 01:15:02 -07003532 zcs->requestedParams = params;
3533 zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
Yann Collet5ac72b42017-05-23 11:18:24 -07003534 return ZSTD_resetCStream_internal(zcs, params, pledgedSrcSize);
Sean Purcell2db72492017-02-09 10:50:43 -08003535}
3536
Yann Collet8c910d22017-06-03 01:15:02 -07003537/* ZSTD_initCStream_usingCDict_advanced() :
3538 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
3539size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
3540 const ZSTD_CDict* cdict,
3541 ZSTD_frameParameters fParams,
3542 unsigned long long pledgedSrcSize)
3543{ /* cannot handle NULL cdict (does not know what to do) */
3544 if (!cdict) return ERROR(dictionary_wrong);
3545 { ZSTD_parameters params = ZSTD_getParamsFromCDict(cdict);
3546 params.fParams = fParams;
3547 return ZSTD_initCStream_internal(zcs,
3548 NULL, 0, cdict,
3549 params, pledgedSrcSize);
3550 }
3551}
3552
3553/* note : cdict must outlive compression session */
3554size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
3555{
3556 ZSTD_frameParameters const fParams = { 0 /* contentSize */, 0 /* checksum */, 0 /* hideDictID */ };
3557 return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, 0); /* note : will check that cdict != NULL */
3558}
3559
Yann Collet5a0c8e22016-08-12 01:20:36 +02003560size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
3561 const void* dict, size_t dictSize,
3562 ZSTD_parameters params, unsigned long long pledgedSrcSize)
3563{
Yann Collet4b987ad2017-04-10 17:50:44 -07003564 CHECK_F( ZSTD_checkCParams(params.cParams) );
Yann Collet1ad7c822017-05-22 17:06:04 -07003565 zcs->requestedParams = params;
3566 zcs->compressionLevel = ZSTD_CLEVEL_CUSTOM;
Yann Collet8c910d22017-06-03 01:15:02 -07003567 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, pledgedSrcSize);
Yann Collet95162342016-10-25 16:19:52 -07003568}
3569
Yann Collet5a0c8e22016-08-12 01:20:36 +02003570size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3571{
3572 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
Yann Collet1ad7c822017-05-22 17:06:04 -07003573 zcs->compressionLevel = compressionLevel;
Yann Collet8c910d22017-06-03 01:15:02 -07003574 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, params, 0);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003575}
3576
Yann Collete795c8a2016-12-13 16:39:36 +01003577size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
3578{
Yann Colletd564faa2016-12-18 21:39:15 +01003579 ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
Yann Collete88034f2017-04-10 22:24:02 -07003580 params.fParams.contentSizeFlag = (pledgedSrcSize>0);
Yann Collet8c910d22017-06-03 01:15:02 -07003581 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, pledgedSrcSize);
Yann Collete795c8a2016-12-13 16:39:36 +01003582}
3583
Yann Collet5a0c8e22016-08-12 01:20:36 +02003584size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3585{
Yann Collete88034f2017-04-10 22:24:02 -07003586 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
Yann Collet8c910d22017-06-03 01:15:02 -07003587 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, params, 0);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003588}
3589
Yann Collet104e5b02016-08-12 13:04:27 +02003590/*====== Compression ======*/
Yann Collet5a0c8e22016-08-12 01:20:36 +02003591
Yann Collet01b15492017-05-30 18:10:26 -07003592MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
3593 const void* src, size_t srcSize)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003594{
3595 size_t const length = MIN(dstCapacity, srcSize);
Yann Collet18ab5af2017-05-31 09:59:22 -07003596 if (length) memcpy(dst, src, length);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003597 return length;
3598}
3599
3600static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
Yann Collet01b15492017-05-30 18:10:26 -07003601 ZSTD_outBuffer* output,
3602 ZSTD_inBuffer* input,
3603 ZSTD_EndDirective const flushMode)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003604{
Yann Collet01b15492017-05-30 18:10:26 -07003605 const char* const istart = (const char*)input->src;
3606 const char* const iend = istart + input->size;
3607 const char* ip = istart + input->pos;
3608 char* const ostart = (char*)output->dst;
3609 char* const oend = ostart + output->size;
3610 char* op = ostart + output->pos;
Yann Collet58e8d792017-06-02 18:20:48 -07003611 U32 someMoreWork = 1;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003612
Yann Collet58e8d792017-06-02 18:20:48 -07003613 /* check expectations */
Yann Collet8c910d22017-06-03 01:15:02 -07003614 DEBUGLOG(5, "ZSTD_compressStream_generic");
Yann Collet6d4fef32017-05-17 18:36:15 -07003615 assert(zcs->inBuff != NULL);
3616 assert(zcs->outBuff!= NULL);
Yann Collet58e8d792017-06-02 18:20:48 -07003617 assert(output->pos <= output->size);
3618 assert(input->pos <= input->size);
Yann Collet009d6042017-05-19 10:17:59 -07003619
Yann Collet5a0c8e22016-08-12 01:20:36 +02003620 while (someMoreWork) {
Yann Collet0be6fd32017-05-08 16:08:01 -07003621 switch(zcs->streamStage)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003622 {
Yann Collet1ad7c822017-05-22 17:06:04 -07003623 case zcss_init:
3624 /* call ZSTD_initCStream() first ! */
3625 return ERROR(init_missing);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003626
3627 case zcss_load:
3628 /* complete inBuffer */
3629 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
Yann Collet06589fe2017-05-31 10:03:20 -07003630 size_t const loaded = ZSTD_limitCopy(
3631 zcs->inBuff + zcs->inBuffPos, toLoad,
3632 ip, iend-ip);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003633 zcs->inBuffPos += loaded;
3634 ip += loaded;
Yann Collet009d6042017-05-19 10:17:59 -07003635 if ( (flushMode == ZSTD_e_continue)
3636 && (zcs->inBuffPos < zcs->inBuffTarget) ) {
3637 /* not enough input to fill full block : stop here */
3638 someMoreWork = 0; break;
3639 }
3640 if ( (flushMode == ZSTD_e_flush)
3641 && (zcs->inBuffPos == zcs->inToCompress) ) {
3642 /* empty */
3643 someMoreWork = 0; break;
3644 }
3645 }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003646 /* compress current block (note : this stage cannot be stopped in the middle) */
Yann Collet009d6042017-05-19 10:17:59 -07003647 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003648 { void* cDst;
3649 size_t cSize;
3650 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
3651 size_t oSize = oend-op;
Yann Collet009d6042017-05-19 10:17:59 -07003652 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003653 if (oSize >= ZSTD_compressBound(iSize))
Yann Collet009d6042017-05-19 10:17:59 -07003654 cDst = op; /* compress directly into output buffer (skip flush stage) */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003655 else
3656 cDst = zcs->outBuff, oSize = zcs->outBuffSize;
Yann Collet009d6042017-05-19 10:17:59 -07003657 cSize = lastBlock ?
3658 ZSTD_compressEnd(zcs, cDst, oSize,
3659 zcs->inBuff + zcs->inToCompress, iSize) :
3660 ZSTD_compressContinue(zcs, cDst, oSize,
3661 zcs->inBuff + zcs->inToCompress, iSize);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003662 if (ZSTD_isError(cSize)) return cSize;
Yann Collet009d6042017-05-19 10:17:59 -07003663 DEBUGLOG(5, "cSize = %u (lastBlock:%u)", (U32)cSize, lastBlock);
3664 zcs->frameEnded = lastBlock;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003665 /* prepare next block */
3666 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
3667 if (zcs->inBuffTarget > zcs->inBuffSize)
Yann Collet009d6042017-05-19 10:17:59 -07003668 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
Yann Collet8b21ec42017-05-19 19:46:15 -07003669 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
3670 (U32)zcs->inBuffTarget, (U32)zcs->inBuffSize);
3671 if (!lastBlock)
3672 assert(zcs->inBuffTarget <= zcs->inBuffSize);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003673 zcs->inToCompress = zcs->inBuffPos;
Yann Collet009d6042017-05-19 10:17:59 -07003674 if (cDst == op) { /* no need to flush */
3675 op += cSize;
3676 if (zcs->frameEnded) {
3677 DEBUGLOG(5, "Frame directly completed");
3678 someMoreWork = 0;
3679 zcs->streamStage = zcss_init;
3680 }
3681 break;
3682 }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003683 zcs->outBuffContentSize = cSize;
3684 zcs->outBuffFlushedSize = 0;
Yann Collet009d6042017-05-19 10:17:59 -07003685 zcs->streamStage = zcss_flush; /* pass-through to flush stage */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003686 }
Jos Collin7cd7a752017-05-11 13:17:20 +05303687 /* fall-through */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003688 case zcss_flush:
Yann Collet009d6042017-05-19 10:17:59 -07003689 DEBUGLOG(5, "flush stage");
Yann Collet5a0c8e22016-08-12 01:20:36 +02003690 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
Yann Collet01b15492017-05-30 18:10:26 -07003691 size_t const flushed = ZSTD_limitCopy(op, oend-op,
3692 zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
Yann Collet009d6042017-05-19 10:17:59 -07003693 DEBUGLOG(5, "toFlush: %u ; flushed: %u", (U32)toFlush, (U32)flushed);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003694 op += flushed;
3695 zcs->outBuffFlushedSize += flushed;
Yann Collet01b15492017-05-30 18:10:26 -07003696 if (toFlush!=flushed) {
3697 /* dst too small to store flushed data : stop there */
3698 someMoreWork = 0;
3699 break;
3700 }
Yann Collet5a0c8e22016-08-12 01:20:36 +02003701 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
Yann Collet009d6042017-05-19 10:17:59 -07003702 if (zcs->frameEnded) {
3703 DEBUGLOG(5, "Frame completed");
3704 someMoreWork = 0;
3705 zcs->streamStage = zcss_init;
3706 break;
3707 }
Yann Collet0be6fd32017-05-08 16:08:01 -07003708 zcs->streamStage = zcss_load;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003709 break;
3710 }
3711
3712 case zcss_final:
Yann Collet009d6042017-05-19 10:17:59 -07003713 someMoreWork = 0; break; /* useless */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003714
Yann Colletcd2892f2017-06-01 09:44:54 -07003715 default: /* impossible */
3716 assert(0);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003717 }
3718 }
3719
Yann Collet01b15492017-05-30 18:10:26 -07003720 input->pos = ip - istart;
3721 output->pos = op - ostart;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003722 if (zcs->frameEnded) return 0;
3723 { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
3724 if (hintInSize==0) hintInSize = zcs->blockSize;
3725 return hintInSize;
3726 }
3727}
3728
Yann Collet53e17fb2016-08-17 01:39:22 +02003729size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003730{
Yann Collet01b15492017-05-30 18:10:26 -07003731 /* check conditions */
3732 if (output->pos > output->size) return ERROR(GENERIC);
3733 if (input->pos > input->size) return ERROR(GENERIC);
3734
3735 return ZSTD_compressStream_generic(zcs, output, input, ZSTD_e_continue);
Yann Collet5a0c8e22016-08-12 01:20:36 +02003736}
3737
Yann Colletf35e2de2017-06-05 18:32:48 -07003738/*! ZSTDMT_initCStream_internal() :
3739 * Private use only. Init streaming operation.
3740 * expects params to be valid.
3741 * must receive dict, or cdict, or none, but not both.
3742 * @return : 0, or an error code */
3743size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
3744 const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
3745 ZSTD_parameters params, unsigned long long pledgedSrcSize);
3746
3747
Yann Colletdeee6e52017-05-30 17:42:00 -07003748size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
3749 ZSTD_outBuffer* output,
3750 ZSTD_inBuffer* input,
3751 ZSTD_EndDirective endOp)
Yann Collet6d4fef32017-05-17 18:36:15 -07003752{
3753 /* check conditions */
Yann Colletdeee6e52017-05-30 17:42:00 -07003754 if (output->pos > output->size) return ERROR(GENERIC);
3755 if (input->pos > input->size) return ERROR(GENERIC);
Yann Collet6d4fef32017-05-17 18:36:15 -07003756 assert(cctx!=NULL);
Yann Collet01b15492017-05-30 18:10:26 -07003757
Yann Collet6d4fef32017-05-17 18:36:15 -07003758 if (cctx->streamStage == zcss_init) {
3759 /* transparent reset */
Yann Collet1ad7c822017-05-22 17:06:04 -07003760 ZSTD_parameters params = cctx->requestedParams;
Yann Collet6d4fef32017-05-17 18:36:15 -07003761 if (cctx->compressionLevel != ZSTD_CLEVEL_CUSTOM)
3762 params.cParams = ZSTD_getCParams(cctx->compressionLevel,
3763 cctx->frameContentSize, 0 /* dictSize */);
Yann Colletf129fd32017-06-11 18:46:09 -07003764
3765#ifdef ZSTD_MULTITHREAD
Yann Colletf35e2de2017-06-05 18:32:48 -07003766 if (cctx->nbThreads > 1) {
3767 CHECK_F( ZSTDMT_initCStream_internal(cctx->mtctx, NULL, 0, cctx->cdict, params, cctx->frameContentSize) );
Yann Collet23aace92017-06-11 18:32:36 -07003768 cctx->streamStage = zcss_load;
Yann Colletf129fd32017-06-11 18:46:09 -07003769 } else
3770#endif
3771 {
Yann Colletf35e2de2017-06-05 18:32:48 -07003772 CHECK_F( ZSTD_resetCStream_internal(cctx, params, cctx->frameContentSize) );
3773 } }
3774
Yann Colletf129fd32017-06-11 18:46:09 -07003775#ifdef ZSTD_MULTITHREAD
Yann Colletf35e2de2017-06-05 18:32:48 -07003776 if (cctx->nbThreads > 1) {
Yann Collet23aace92017-06-11 18:32:36 -07003777 size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
3778 if (ZSTD_isError(flushMin)) cctx->streamStage = zcss_init;
3779 return flushMin;
Yann Collet6d4fef32017-05-17 18:36:15 -07003780 }
Yann Colletf129fd32017-06-11 18:46:09 -07003781#endif
Yann Collet6d4fef32017-05-17 18:36:15 -07003782
Yann Collet01b15492017-05-30 18:10:26 -07003783 DEBUGLOG(5, "starting ZSTD_compressStream_generic");
3784 CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
Yann Collet58e8d792017-06-02 18:20:48 -07003785 DEBUGLOG(5, "completing ZSTD_compress_generic");
Yann Colletdeee6e52017-05-30 17:42:00 -07003786 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
Yann Collet6d4fef32017-05-17 18:36:15 -07003787}
3788
Yann Colletdeee6e52017-05-30 17:42:00 -07003789size_t ZSTD_compress_generic_simpleArgs (
3790 ZSTD_CCtx* cctx,
3791 void* dst, size_t dstCapacity, size_t* dstPos,
3792 const void* src, size_t srcSize, size_t* srcPos,
3793 ZSTD_EndDirective endOp)
Yann Collet6d4fef32017-05-17 18:36:15 -07003794{
Yann Colletdeee6e52017-05-30 17:42:00 -07003795 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
3796 ZSTD_inBuffer input = { src, srcSize, *srcPos };
Yann Collet01b15492017-05-30 18:10:26 -07003797 /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
Yann Collet58e8d792017-06-02 18:20:48 -07003798 size_t const cErr = ZSTD_compress_generic(cctx, &output, &input, endOp);
Yann Colletdeee6e52017-05-30 17:42:00 -07003799 *dstPos = output.pos;
3800 *srcPos = input.pos;
Yann Collet58e8d792017-06-02 18:20:48 -07003801 return cErr;
Yann Collet6d4fef32017-05-17 18:36:15 -07003802}
3803
Yann Collet5a0c8e22016-08-12 01:20:36 +02003804
Yann Collet104e5b02016-08-12 13:04:27 +02003805/*====== Finalize ======*/
Yann Collet5a0c8e22016-08-12 01:20:36 +02003806
3807/*! ZSTD_flushStream() :
3808* @return : amount of data remaining to flush */
Yann Collet53e17fb2016-08-17 01:39:22 +02003809size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003810{
Yann Collet18ab5af2017-05-31 09:59:22 -07003811 ZSTD_inBuffer input = { NULL, 0, 0 };
Yann Collet01b15492017-05-30 18:10:26 -07003812 if (output->pos > output->size) return ERROR(GENERIC);
3813 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_flush) );
3814 return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
Yann Collet5a0c8e22016-08-12 01:20:36 +02003815}
3816
3817
Yann Collet53e17fb2016-08-17 01:39:22 +02003818size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
Yann Collet5a0c8e22016-08-12 01:20:36 +02003819{
Yann Collet18ab5af2017-05-31 09:59:22 -07003820 ZSTD_inBuffer input = { NULL, 0, 0 };
Yann Collet01b15492017-05-30 18:10:26 -07003821 if (output->pos > output->size) return ERROR(GENERIC);
3822 CHECK_F( ZSTD_compressStream_generic(zcs, output, &input, ZSTD_e_end) );
Yann Collet009d6042017-05-19 10:17:59 -07003823
Yann Collet48855fa2017-05-19 10:56:11 -07003824 DEBUGLOG(5, "ZSTD_endStream : remaining to flush : %u",
Yann Collet009d6042017-05-19 10:17:59 -07003825 (unsigned)(zcs->outBuffContentSize - zcs->outBuffFlushedSize));
Yann Collet009d6042017-05-19 10:17:59 -07003826 return zcs->outBuffContentSize - zcs->outBuffFlushedSize;
Yann Collet5a0c8e22016-08-12 01:20:36 +02003827}
3828
3829
Yann Collet70e8c382016-02-10 13:37:52 +01003830/*-===== Pre-defined compression levels =====-*/
Yann Colletfd416f12016-01-30 03:14:15 +01003831
inikep2c5eeea2016-04-15 13:44:46 +02003832#define ZSTD_MAX_CLEVEL 22
Yann Collet41105342016-07-27 15:09:11 +02003833int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
Yann Collet7d968c72016-02-03 02:11:32 +01003834
Yann Collet3b719252016-03-30 19:48:05 +02003835static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
Yann Colletfd416f12016-01-30 03:14:15 +01003836{ /* "default" */
Yann Collet793c6492016-04-09 20:32:00 +02003837 /* W, C, H, S, L, TL, strat */
Yann Collete19a9ef2016-08-26 20:02:49 +02003838 { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
Yann Collet3c242e72016-07-13 14:56:24 +02003839 { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
3840 { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
Yann Collete19a9ef2016-08-26 20:02:49 +02003841 { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/
3842 { 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/
Yann Collet3c242e72016-07-13 14:56:24 +02003843 { 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */
3844 { 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
3845 { 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */
Yann Collete19a9ef2016-08-26 20:02:49 +02003846 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
Yann Collet3c242e72016-07-13 14:56:24 +02003847 { 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
3848 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3849 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3850 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3851 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
3852 { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
3853 { 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
3854 { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
Yann Collete19a9ef2016-08-26 20:02:49 +02003855 { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
Yann Collet29297c62017-04-27 17:44:01 -07003856 { 23, 22, 22, 5, 4, 32, ZSTD_btopt }, /* level 18 */
Yann Collete19a9ef2016-08-26 20:02:49 +02003857 { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
Nick Terrell374f8682017-05-10 17:48:42 -07003858 { 25, 25, 23, 7, 3, 64, ZSTD_btultra }, /* level 20 */
3859 { 26, 26, 23, 7, 3,256, ZSTD_btultra }, /* level 21 */
3860 { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */
Yann Colletfd416f12016-01-30 03:14:15 +01003861},
3862{ /* for srcSize <= 256 KB */
Yann Collet3b719252016-03-30 19:48:05 +02003863 /* W, C, H, S, L, T, strat */
Yann Collete19a9ef2016-08-26 20:02:49 +02003864 { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
Yann Colleta2cdffe2016-08-24 19:42:15 +02003865 { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
Yann Collet24b68a52016-08-24 14:22:26 +02003866 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
3867 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3868 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
3869 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
3870 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
3871 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
3872 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3873 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3874 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3875 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
3876 { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
3877 { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
Yann Collet78267d12016-04-08 12:36:19 +02003878 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
Yann Collet24b68a52016-08-24 14:22:26 +02003879 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
3880 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
3881 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
Yann Collet78267d12016-04-08 12:36:19 +02003882 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
3883 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
Nick Terrell374f8682017-05-10 17:48:42 -07003884 { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/
3885 { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/
3886 { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/
Yann Colletfd416f12016-01-30 03:14:15 +01003887},
3888{ /* for srcSize <= 128 KB */
Yann Collet3b719252016-03-30 19:48:05 +02003889 /* W, C, H, S, L, T, strat */
Yann Collet5894ea82016-07-22 14:36:46 +02003890 { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
3891 { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
3892 { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
3893 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
3894 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
3895 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
3896 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
3897 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
3898 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3899 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3900 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3901 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
3902 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
3903 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
Yann Collet3b719252016-03-30 19:48:05 +02003904 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
3905 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
3906 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
3907 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
3908 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
3909 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
Nick Terrell374f8682017-05-10 17:48:42 -07003910 { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/
3911 { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/
3912 { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/
Yann Colletfd416f12016-01-30 03:14:15 +01003913},
3914{ /* for srcSize <= 16 KB */
Yann Collet3b719252016-03-30 19:48:05 +02003915 /* W, C, H, S, L, T, strat */
Yann Collet2b1a3632016-07-13 15:16:00 +02003916 { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
Yann Collete557fd52016-07-17 16:21:37 +02003917 { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
Yann Collet2b1a3632016-07-13 15:16:00 +02003918 { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
3919 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
3920 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
3921 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
3922 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
3923 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
3924 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
3925 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
Yann Collet3b719252016-03-30 19:48:05 +02003926 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
3927 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
3928 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
3929 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
3930 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
3931 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3932 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
3933 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
3934 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
3935 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
Nick Terrell374f8682017-05-10 17:48:42 -07003936 { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
3937 { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
3938 { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/
Yann Colletfd416f12016-01-30 03:14:15 +01003939},
3940};
3941
Yann Collet236d94f2016-05-18 12:06:33 +02003942/*! ZSTD_getCParams() :
3943* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
3944* Size values are optional, provide 0 if not known or unused */
Yann Collet009d6042017-05-19 10:17:59 -07003945ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
Yann Colletfd416f12016-01-30 03:14:15 +01003946{
Yann Collet15354142016-04-04 04:22:53 +02003947 ZSTD_compressionParameters cp;
Yann Collet009d6042017-05-19 10:17:59 -07003948 size_t const addedSize = srcSizeHint ? 0 : 500;
3949 U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
Yann Collet3b719252016-03-30 19:48:05 +02003950 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
Yann Collet6d4fef32017-05-17 18:36:15 -07003951 if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */
Yann Colletfd416f12016-01-30 03:14:15 +01003952 if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
Yann Collet15354142016-04-04 04:22:53 +02003953 cp = ZSTD_defaultCParameters[tableID][compressionLevel];
Yann Collet1005fc12016-04-04 13:28:28 +02003954 if (MEM_32bits()) { /* auto-correction, for 32-bits mode */
3955 if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
Yann Collet8a57b922016-04-04 13:49:18 +02003956 if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
Yann Collet1005fc12016-04-04 13:28:28 +02003957 if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
3958 }
Yann Collet009d6042017-05-19 10:17:59 -07003959 cp = ZSTD_adjustCParams(cp, srcSizeHint, dictSize);
Yann Collet15354142016-04-04 04:22:53 +02003960 return cp;
Yann Colletfd416f12016-01-30 03:14:15 +01003961}
Yann Collet3d2cd7f2016-06-27 15:12:26 +02003962
3963/*! ZSTD_getParams() :
Yann Colleta43a8542016-07-12 13:42:10 +02003964* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
Yann Collet3d2cd7f2016-06-27 15:12:26 +02003965* All fields of `ZSTD_frameParameters` are set to default (0) */
Yann Collet009d6042017-05-19 10:17:59 -07003966ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
Yann Collet3d2cd7f2016-06-27 15:12:26 +02003967 ZSTD_parameters params;
Yann Collet009d6042017-05-19 10:17:59 -07003968 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
Yann Collet3d2cd7f2016-06-27 15:12:26 +02003969 memset(&params, 0, sizeof(params));
3970 params.cParams = cParams;
3971 return params;
3972}