blob: cfc590a1bcc51745d8f3d6099e0547ebe4d5a36c [file] [log] [blame]
Yann Colletf3eca252015-10-22 15:31:46 +01001/*
2 ZSTD HC - High Compression Mode of Zstandard
3 Copyright (C) 2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - Zstd source repository : https://www.zstd.net
32*/
33
34
Yann Collet4b100f42015-10-30 15:49:48 +010035/* *******************************************************
36* Compiler specifics
37*********************************************************/
38#ifdef _MSC_VER /* Visual Studio */
39# define FORCE_INLINE static __forceinline
40# include <intrin.h> /* For Visual 2005 */
41# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
42# pragma warning(disable : 4324) /* disable: C4324: padded structure */
43#else
44# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
45# ifdef __GNUC__
46# define FORCE_INLINE static inline __attribute__((always_inline))
47# else
48# define FORCE_INLINE static inline
49# endif
50#endif
51
52
Yann Colletf3eca252015-10-22 15:31:46 +010053/* *************************************
54* Includes
55***************************************/
56#include <stdlib.h> /* malloc */
57#include <string.h> /* memset */
Yann Collet14983e72015-11-11 21:38:21 +010058#include "mem.h"
59#include "fse_static.h"
60#include "huff0.h"
Yann Colletf3eca252015-10-22 15:31:46 +010061#include "zstd_static.h"
Yann Collet3d9cf7a2015-10-29 17:15:14 +010062#include "zstd_internal.h"
Yann Colletf3eca252015-10-22 15:31:46 +010063
64
65/* *************************************
Yann Collet14983e72015-11-11 21:38:21 +010066* Constants
Yann Colletf3eca252015-10-22 15:31:46 +010067***************************************/
Yann Collet14983e72015-11-11 21:38:21 +010068static const U32 g_searchStrength = 8;
Yann Colletf3eca252015-10-22 15:31:46 +010069
Yann Colletf3eca252015-10-22 15:31:46 +010070
71/* *************************************
Yann Collet14983e72015-11-11 21:38:21 +010072* Sequence storage
Yann Colletf3eca252015-10-22 15:31:46 +010073***************************************/
Yann Collet14983e72015-11-11 21:38:21 +010074typedef struct {
75 void* buffer;
76 U32* offsetStart;
77 U32* offset;
78 BYTE* offCodeStart;
79 BYTE* offCode;
80 BYTE* litStart;
81 BYTE* lit;
82 BYTE* litLengthStart;
83 BYTE* litLength;
84 BYTE* matchLengthStart;
85 BYTE* matchLength;
86 BYTE* dumpsStart;
87 BYTE* dumps;
88} seqStore_t;
89
90static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
91{
92 ssPtr->offset = ssPtr->offsetStart;
93 ssPtr->lit = ssPtr->litStart;
94 ssPtr->litLength = ssPtr->litLengthStart;
95 ssPtr->matchLength = ssPtr->matchLengthStart;
96 ssPtr->dumps = ssPtr->dumpsStart;
97}
98
99
100/* *************************************
101* Context memory management
102***************************************/
Yann Colletf3eca252015-10-22 15:31:46 +0100103#define WORKPLACESIZE (BLOCKSIZE*3)
104
Yann Collet5be2dd22015-11-11 13:43:58 +0100105struct ZSTD_CCtx_s
Yann Colletf3eca252015-10-22 15:31:46 +0100106{
Yann Collet89db5e02015-11-13 11:27:46 +0100107 const BYTE* nextSrc; /* next block here to continue on current prefix */
Yann Colleteeb8ba12015-10-22 16:55:40 +0100108 const BYTE* base; /* All regular indexes relative to this position */
109 const BYTE* dictBase; /* extDict indexes relative to this position */
Yann Colletf3eca252015-10-22 15:31:46 +0100110 U32 dictLimit; /* below that point, need extDict */
Yann Colleteeb8ba12015-10-22 16:55:40 +0100111 U32 lowLimit; /* below that point, no more data */
Yann Colletf3eca252015-10-22 15:31:46 +0100112 U32 nextToUpdate; /* index from which to continue dictionary update */
Yann Collet5be2dd22015-11-11 13:43:58 +0100113 ZSTD_parameters params;
Yann Collet712def92015-10-29 18:41:45 +0100114 void* workSpace;
115 size_t workSpaceSize;
116
117 seqStore_t seqStore; /* sequences storage ptrs */
Yann Collet083fcc82015-10-25 14:06:35 +0100118 U32* hashTable;
Yann Collet1f44b3f2015-11-05 17:32:18 +0100119 U32* contentTable;
Yann Colletf3eca252015-10-22 15:31:46 +0100120};
121
122
Yann Collet5be2dd22015-11-11 13:43:58 +0100123ZSTD_CCtx* ZSTD_createCCtx(void)
Yann Colletf3eca252015-10-22 15:31:46 +0100124{
Yann Collet5be2dd22015-11-11 13:43:58 +0100125 return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx));
Yann Colletf3eca252015-10-22 15:31:46 +0100126}
127
Yann Collet5be2dd22015-11-11 13:43:58 +0100128size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
Yann Colletf3eca252015-10-22 15:31:46 +0100129{
Yann Collet712def92015-10-29 18:41:45 +0100130 free(cctx->workSpace);
Yann Collet083fcc82015-10-25 14:06:35 +0100131 free(cctx);
132 return 0;
133}
134
Yann Collet59d70632015-11-04 12:05:27 +0100135
Yann Collet14983e72015-11-11 21:38:21 +0100136static unsigned ZSTD_highbit(U32 val);
137
Yann Collet5be2dd22015-11-11 13:43:58 +0100138/** ZSTD_validateParams
Yann Collet59d70632015-11-04 12:05:27 +0100139 correct params value to remain within authorized range
140 optimize for srcSize if srcSize > 0 */
Yann Collet5be2dd22015-11-11 13:43:58 +0100141void ZSTD_validateParams(ZSTD_parameters* params, U64 srcSizeHint)
Yann Collet59d70632015-11-04 12:05:27 +0100142{
Yann Collet5be2dd22015-11-11 13:43:58 +0100143 const U32 btPlus = (params->strategy == ZSTD_btlazy2);
Yann Collet59d70632015-11-04 12:05:27 +0100144
145 /* validate params */
Yann Collet5be2dd22015-11-11 13:43:58 +0100146 if (params->windowLog > ZSTD_WINDOWLOG_MAX) params->windowLog = ZSTD_WINDOWLOG_MAX;
147 if (params->windowLog < ZSTD_WINDOWLOG_MIN) params->windowLog = ZSTD_WINDOWLOG_MIN;
Yann Collet59d70632015-11-04 12:05:27 +0100148
149 /* correct params, to use less memory */
Yann Collet5be2dd22015-11-11 13:43:58 +0100150 if ((srcSizeHint > 0) && (srcSizeHint < (1<<ZSTD_WINDOWLOG_MAX)))
Yann Collet59d70632015-11-04 12:05:27 +0100151 {
Yann Collet9f432922015-11-09 17:42:17 +0100152 U32 srcLog = ZSTD_highbit((U32)srcSizeHint-1) + 1;
Yann Collet59d70632015-11-04 12:05:27 +0100153 if (params->windowLog > srcLog) params->windowLog = srcLog;
154 }
155
Yann Collet5be2dd22015-11-11 13:43:58 +0100156 if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */
157 if (params->contentLog < ZSTD_CONTENTLOG_MIN) params->contentLog = ZSTD_CONTENTLOG_MIN;
158 if (params->hashLog > ZSTD_HASHLOG_MAX) params->hashLog = ZSTD_HASHLOG_MAX;
159 if (params->hashLog < ZSTD_HASHLOG_MIN) params->hashLog = ZSTD_HASHLOG_MIN;
160 if (params->searchLog > ZSTD_SEARCHLOG_MAX) params->searchLog = ZSTD_SEARCHLOG_MAX;
161 if (params->searchLog < ZSTD_SEARCHLOG_MIN) params->searchLog = ZSTD_SEARCHLOG_MIN;
162 if (params->searchLength> ZSTD_SEARCHLENGTH_MAX) params->searchLength = ZSTD_SEARCHLENGTH_MAX;
163 if (params->searchLength< ZSTD_SEARCHLENGTH_MIN) params->searchLength = ZSTD_SEARCHLENGTH_MIN;
164 if ((U32)params->strategy>(U32)ZSTD_btlazy2) params->strategy = ZSTD_btlazy2;
Yann Collet59d70632015-11-04 12:05:27 +0100165}
166
167
Yann Collet5be2dd22015-11-11 13:43:58 +0100168static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
Yann Collet89db5e02015-11-13 11:27:46 +0100169 ZSTD_parameters params,
170 U64 srcSizeHint)
Yann Collet083fcc82015-10-25 14:06:35 +0100171{
Yann Collet5be2dd22015-11-11 13:43:58 +0100172 ZSTD_validateParams(&params, srcSizeHint);
Yann Collet083fcc82015-10-25 14:06:35 +0100173
Yann Collet2acb5d32015-10-29 16:49:43 +0100174 /* reserve table memory */
Yann Collet083fcc82015-10-25 14:06:35 +0100175 {
Yann Collet743402c2015-11-20 12:03:53 +0100176 const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
Yann Collet1f44b3f2015-11-05 17:32:18 +0100177 const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32);
Yann Collet712def92015-10-29 18:41:45 +0100178 const size_t neededSpace = tableSpace + WORKPLACESIZE;
179 if (zc->workSpaceSize < neededSpace)
Yann Collet2acb5d32015-10-29 16:49:43 +0100180 {
Yann Collet712def92015-10-29 18:41:45 +0100181 free(zc->workSpace);
182 zc->workSpaceSize = neededSpace;
183 zc->workSpace = malloc(neededSpace);
Yann Collet4114f952015-10-30 06:40:22 +0100184 if (zc->workSpace == NULL) return ERROR(memory_allocation);
Yann Collet2acb5d32015-10-29 16:49:43 +0100185 }
Yann Collet805a52a2015-11-06 10:52:17 +0100186 memset(zc->workSpace, 0, tableSpace );
187 zc->hashTable = (U32*)(zc->workSpace);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100188 zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog);
189 zc->seqStore.buffer = (void*) (zc->contentTable + ((size_t)1 << contentLog));
Yann Collet083fcc82015-10-25 14:06:35 +0100190 }
Yann Collet083fcc82015-10-25 14:06:35 +0100191
Yann Colletf48e35c2015-11-07 01:13:31 +0100192 zc->nextToUpdate = 1;
Yann Collet89db5e02015-11-13 11:27:46 +0100193 zc->nextSrc = NULL;
Yann Collet2acb5d32015-10-29 16:49:43 +0100194 zc->base = NULL;
195 zc->dictBase = NULL;
196 zc->dictLimit = 0;
197 zc->lowLimit = 0;
Yann Collet083fcc82015-10-25 14:06:35 +0100198 zc->params = params;
Yann Colletf3eca252015-10-22 15:31:46 +0100199 zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer);
200 zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (BLOCKSIZE>>2));
201 zc->seqStore.litStart = zc->seqStore.offCodeStart + (BLOCKSIZE>>2);
202 zc->seqStore.litLengthStart = zc->seqStore.litStart + BLOCKSIZE;
203 zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (BLOCKSIZE>>2);
204 zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (BLOCKSIZE>>2);
Yann Collet2acb5d32015-10-29 16:49:43 +0100205
Yann Collet4114f952015-10-30 06:40:22 +0100206 return 0;
Yann Colletf3eca252015-10-22 15:31:46 +0100207}
208
Yann Collet083fcc82015-10-25 14:06:35 +0100209
Yann Collet89db5e02015-11-13 11:27:46 +0100210static void ZSTD_reduceIndex (ZSTD_CCtx* zc,
211 const U32 reducerValue)
212{
213 const U32 contentLog = zc->params.strategy == ZSTD_fast ? 1 : zc->params.contentLog;
214 const U32 tableSpaceU32 = (1 << contentLog) + (1 << zc->params.hashLog);
215 U32* table32 = zc->hashTable;
216 U32 index;
217
218 for (index=0 ; index < tableSpaceU32 ; index++)
219 {
220 if (table32[index] < reducerValue) table32[index] = 0;
221 else table32[index] -= reducerValue;
222 }
223}
224
225
Yann Collet14983e72015-11-11 21:38:21 +0100226/* *******************************************************
227* Block entropic compression
228*********************************************************/
229size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */
230{
231 return FSE_compressBound(srcSize) + 12;
232}
233
234
235size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
236{
237 BYTE* const ostart = (BYTE* const)dst;
238
239 if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return ERROR(dstSize_tooSmall);
240 memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
241
242 /* Build header */
243 ostart[0] = (BYTE)(srcSize>>16);
244 ostart[1] = (BYTE)(srcSize>>8);
245 ostart[2] = (BYTE) srcSize;
246 ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
247
248 return ZSTD_blockHeaderSize+srcSize;
249}
250
251
252static size_t ZSTD_noCompressLiterals (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
253{
254 BYTE* const ostart = (BYTE* const)dst;
255
256 if (srcSize + 3 > maxDstSize) return ERROR(dstSize_tooSmall);
257
258 MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RAW);
259 memcpy(ostart + 3, src, srcSize);
260 return srcSize + 3;
261}
262
263static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
264{
265 BYTE* const ostart = (BYTE* const)dst;
266
267 (void)maxDstSize;
268 MEM_writeLE32(dst, ((U32)srcSize << 2) | IS_RLE); /* note : maxDstSize > litHeaderSize > 4 */
269 ostart[3] = *(const BYTE*)src;
270 return 4;
271}
272
273size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 1; }
274
275static size_t ZSTD_compressLiterals (void* dst, size_t maxDstSize,
276 const void* src, size_t srcSize)
277{
278 const size_t minGain = ZSTD_minGain(srcSize);
279 BYTE* const ostart = (BYTE*)dst;
280 size_t hsize;
281 static const size_t litHeaderSize = 5;
282
283 if (maxDstSize < litHeaderSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
284
285 hsize = HUF_compress(ostart+litHeaderSize, maxDstSize-litHeaderSize, src, srcSize);
286
287 if ((hsize==0) || (hsize >= srcSize - minGain)) return ZSTD_noCompressLiterals(dst, maxDstSize, src, srcSize);
288 if (hsize==1) return ZSTD_compressRleLiteralsBlock(dst, maxDstSize, src, srcSize);
289
290 /* Build header */
291 {
292 ostart[0] = (BYTE)(srcSize << 2); /* is a block, is compressed */
293 ostart[1] = (BYTE)(srcSize >> 6);
294 ostart[2] = (BYTE)(srcSize >>14);
295 ostart[2] += (BYTE)(hsize << 5);
296 ostart[3] = (BYTE)(hsize >> 3);
297 ostart[4] = (BYTE)(hsize >>11);
298 }
299
300 return hsize+litHeaderSize;
301}
302
303
304#define LITERAL_NOENTROPY 63 /* cheap heuristic */
305
306size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
307 const seqStore_t* seqStorePtr,
308 size_t srcSize)
309{
310 U32 count[MaxSeq+1];
311 S16 norm[MaxSeq+1];
312 size_t mostFrequent;
313 U32 max = 255;
314 U32 tableLog = 11;
315 U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
316 U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)];
317 U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
318 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
319 const BYTE* const op_lit_start = seqStorePtr->litStart;
320 const BYTE* const llTable = seqStorePtr->litLengthStart;
321 const BYTE* const llPtr = seqStorePtr->litLength;
322 const BYTE* const mlTable = seqStorePtr->matchLengthStart;
323 const U32* const offsetTable = seqStorePtr->offsetStart;
324 BYTE* const offCodeTable = seqStorePtr->offCodeStart;
325 BYTE* op = dst;
326 BYTE* const oend = dst + maxDstSize;
327 const size_t nbSeq = llPtr - llTable;
328 const size_t minGain = ZSTD_minGain(srcSize);
329 const size_t maxCSize = srcSize - minGain;
330 BYTE* seqHead;
331
332
333 /* Compress literals */
334 {
335 size_t cSize;
336 size_t litSize = seqStorePtr->lit - op_lit_start;
337
338 if (litSize <= LITERAL_NOENTROPY)
339 cSize = ZSTD_noCompressLiterals(op, maxDstSize, op_lit_start, litSize);
340 else
341 cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize);
342 if (ZSTD_isError(cSize)) return cSize;
343 op += cSize;
344 }
345
346 /* Sequences Header */
347 if ((oend-op) < MIN_SEQUENCES_SIZE)
348 return ERROR(dstSize_tooSmall);
349 MEM_writeLE16(op, (U16)nbSeq); op+=2;
350 seqHead = op;
351
352 /* dumps : contains too large lengths */
353 {
354 size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
355 if (dumpsLength < 512)
356 {
357 op[0] = (BYTE)(dumpsLength >> 8);
358 op[1] = (BYTE)(dumpsLength);
359 op += 2;
360 }
361 else
362 {
363 op[0] = 2;
364 op[1] = (BYTE)(dumpsLength>>8);
365 op[2] = (BYTE)(dumpsLength);
366 op += 3;
367 }
368 if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall);
369 memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
370 op += dumpsLength;
371 }
372
373 /* CTable for Literal Lengths */
374 max = MaxLL;
375 mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq);
376 if ((mostFrequent == nbSeq) && (nbSeq > 2))
377 {
378 *op++ = *(seqStorePtr->litLengthStart);
379 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
380 LLtype = bt_rle;
381 }
382 else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1))))
383 {
384 FSE_buildCTable_raw(CTable_LitLength, LLbits);
385 LLtype = bt_raw;
386 }
387 else
388 {
389 size_t NCountSize;
390 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
391 FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
392 NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
393 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
394 op += NCountSize;
395 FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
396 LLtype = bt_compressed;
397 }
398
399 /* CTable for Offsets codes */
400 {
401 /* create Offset codes */
402 size_t i;
403 max = MaxOff;
404 for (i=0; i<nbSeq; i++)
405 {
406 offCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]) + 1;
407 if (offsetTable[i]==0) offCodeTable[i]=0;
408 }
409 mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq);
410 }
411 if ((mostFrequent == nbSeq) && (nbSeq > 2))
412 {
413 *op++ = *offCodeTable;
414 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
415 Offtype = bt_rle;
416 }
417 else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1))))
418 {
419 FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
420 Offtype = bt_raw;
421 }
422 else
423 {
424 size_t NCountSize;
425 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
426 FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
427 NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
428 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
429 op += NCountSize;
430 FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
431 Offtype = bt_compressed;
432 }
433
434 /* CTable for MatchLengths */
435 max = MaxML;
436 mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq);
437 if ((mostFrequent == nbSeq) && (nbSeq > 2))
438 {
439 *op++ = *seqStorePtr->matchLengthStart;
440 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
441 MLtype = bt_rle;
442 }
443 else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1))))
444 {
445 FSE_buildCTable_raw(CTable_MatchLength, MLbits);
446 MLtype = bt_raw;
447 }
448 else
449 {
450 size_t NCountSize;
451 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
452 FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
453 NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
454 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
455 op += NCountSize;
456 FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
457 MLtype = bt_compressed;
458 }
459
460 seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
461
462 /* Encoding Sequences */
463 {
464 size_t streamSize, errorCode;
465 BIT_CStream_t blockStream;
466 FSE_CState_t stateMatchLength;
467 FSE_CState_t stateOffsetBits;
468 FSE_CState_t stateLitLength;
469 int i;
470
471 errorCode = BIT_initCStream(&blockStream, op, oend-op);
472 if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); /* not enough space remaining */
473 FSE_initCState(&stateMatchLength, CTable_MatchLength);
474 FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
475 FSE_initCState(&stateLitLength, CTable_LitLength);
476
477 for (i=(int)nbSeq-1; i>=0; i--)
478 {
479 BYTE matchLength = mlTable[i];
480 U32 offset = offsetTable[i];
481 BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/
482 U32 nbBits = (offCode-1) * (!!offCode);
483 BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/
484 FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
Yann Collet743402c2015-11-20 12:03:53 +0100485 if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
Yann Collet14983e72015-11-11 21:38:21 +0100486 BIT_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */
Yann Collet743402c2015-11-20 12:03:53 +0100487 if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
Yann Collet14983e72015-11-11 21:38:21 +0100488 FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
489 FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
490 BIT_flushBits(&blockStream); /* 7 */ /* 7 */
491 }
492
493 FSE_flushCState(&blockStream, &stateMatchLength);
494 FSE_flushCState(&blockStream, &stateOffsetBits);
495 FSE_flushCState(&blockStream, &stateLitLength);
496
497 streamSize = BIT_closeCStream(&blockStream);
498 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
499 op += streamSize;
500 }
501
502 /* check compressibility */
503 if ((size_t)(op-dst) >= maxCSize) return 0;
504
505 return op - dst;
506}
507
508
509/** ZSTD_storeSeq
510 Store a sequence (literal length, literals, offset code and match length) into seqStore_t
511 @offsetCode : distance to match, or 0 == repCode
512 @matchCode : matchLength - MINMATCH
513*/
514MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode)
515{
516#if 0
517 static const BYTE* g_start = NULL;
518 if (g_start==NULL) g_start = literals;
519 if (literals - g_start == 8695)
520 printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
521 (U32)(literals - g_start), (U32)litLength, (U32)matchCode+4, (U32)offsetCode);
522#endif
523
524 /* copy Literals */
525 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
526 seqStorePtr->lit += litLength;
527
528 /* literal Length */
529 if (litLength >= MaxLL)
530 {
531 *(seqStorePtr->litLength++) = MaxLL;
532 if (litLength<255 + MaxLL)
533 *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL);
534 else
535 {
536 *(seqStorePtr->dumps++) = 255;
537 MEM_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3;
538 }
539 }
540 else *(seqStorePtr->litLength++) = (BYTE)litLength;
541
542 /* match offset */
543 *(seqStorePtr->offset++) = (U32)offsetCode;
544
545 /* match Length */
546 if (matchCode >= MaxML)
547 {
548 *(seqStorePtr->matchLength++) = MaxML;
549 if (matchCode < 255+MaxML)
550 *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML);
551 else
552 {
553 *(seqStorePtr->dumps++) = 255;
554 MEM_writeLE32(seqStorePtr->dumps, (U32)matchCode); seqStorePtr->dumps += 3;
555 }
556 }
557 else *(seqStorePtr->matchLength++) = (BYTE)matchCode;
558}
559
560
Yann Colletf3eca252015-10-22 15:31:46 +0100561/* *************************************
Yann Collet14983e72015-11-11 21:38:21 +0100562* Match length counter
563***************************************/
564static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
565
566static unsigned ZSTD_highbit(U32 val)
567{
568# if defined(_MSC_VER) /* Visual */
569 unsigned long r=0;
570 _BitScanReverse(&r, val);
571 return (unsigned)r;
572# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
573 return 31 - __builtin_clz(val);
574# else /* Software version */
575 static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
576 U32 v = val;
577 int r;
578 v |= v >> 1;
579 v |= v >> 2;
580 v |= v >> 4;
581 v |= v >> 8;
582 v |= v >> 16;
583 r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
584 return r;
585# endif
586}
587
588MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val)
589{
590 if (MEM_isLittleEndian())
591 {
592 if (MEM_64bits())
593 {
594# if defined(_MSC_VER) && defined(_WIN64)
595 unsigned long r = 0;
596 _BitScanForward64( &r, (U64)val );
597 return (int)(r>>3);
598# elif defined(__GNUC__) && (__GNUC__ >= 3)
599 return (__builtin_ctzll((U64)val) >> 3);
600# else
601 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
602 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
603# endif
604 }
605 else /* 32 bits */
606 {
607# if defined(_MSC_VER)
608 unsigned long r=0;
609 _BitScanForward( &r, (U32)val );
610 return (int)(r>>3);
611# elif defined(__GNUC__) && (__GNUC__ >= 3)
612 return (__builtin_ctz((U32)val) >> 3);
613# else
614 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
615 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
616# endif
617 }
618 }
619 else /* Big Endian CPU */
620 {
621 if (MEM_32bits())
622 {
623# if defined(_MSC_VER) && defined(_WIN64)
624 unsigned long r = 0;
625 _BitScanReverse64( &r, val );
626 return (unsigned)(r>>3);
627# elif defined(__GNUC__) && (__GNUC__ >= 3)
628 return (__builtin_clzll(val) >> 3);
629# else
630 unsigned r;
631 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
632 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
633 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
634 r += (!val);
635 return r;
636# endif
637 }
638 else /* 32 bits */
639 {
640# if defined(_MSC_VER)
641 unsigned long r = 0;
642 _BitScanReverse( &r, (unsigned long)val );
643 return (unsigned)(r>>3);
644# elif defined(__GNUC__) && (__GNUC__ >= 3)
645 return (__builtin_clz((U32)val) >> 3);
646# else
647 unsigned r;
648 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
649 r += (!val);
650 return r;
651# endif
652 }
653 }
654}
655
656
657MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
658{
659 const BYTE* const pStart = pIn;
660
661 while ((pIn<pInLimit-(sizeof(size_t)-1)))
662 {
663 size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
664 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
665 pIn += ZSTD_NbCommonBytes(diff);
666 return (size_t)(pIn - pStart);
667 }
668
669 if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
670 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
671 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
672 return (size_t)(pIn - pStart);
673}
674
675
676
677/* *************************************
678* Hashes
Yann Colletf3eca252015-10-22 15:31:46 +0100679***************************************/
Yann Collet2acb5d32015-10-29 16:49:43 +0100680
Yann Collet4b100f42015-10-30 15:49:48 +0100681static const U32 prime4bytes = 2654435761U;
Yann Collet5be2dd22015-11-11 13:43:58 +0100682static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
683static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
Yann Collet2acb5d32015-10-29 16:49:43 +0100684
Yann Collet4b100f42015-10-30 15:49:48 +0100685static const U64 prime5bytes = 889523592379ULL;
Yann Collet5be2dd22015-11-11 13:43:58 +0100686static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; }
687static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_read64(p), h); }
Yann Collet4b100f42015-10-30 15:49:48 +0100688
689static const U64 prime6bytes = 227718039650203ULL;
Yann Collet5be2dd22015-11-11 13:43:58 +0100690static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; }
691static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); }
Yann Collet4b100f42015-10-30 15:49:48 +0100692
Yann Collet14983e72015-11-11 21:38:21 +0100693static const U64 prime7bytes = 58295818150454627ULL;
Yann Collet5be2dd22015-11-11 13:43:58 +0100694static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; }
695static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); }
Yann Collet1f44b3f2015-11-05 17:32:18 +0100696
Yann Collet5be2dd22015-11-11 13:43:58 +0100697static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
Yann Collet4b100f42015-10-30 15:49:48 +0100698{
699 switch(mls)
700 {
701 default:
Yann Collet5be2dd22015-11-11 13:43:58 +0100702 case 4: return ZSTD_hash4Ptr(p, hBits);
703 case 5: return ZSTD_hash5Ptr(p, hBits);
704 case 6: return ZSTD_hash6Ptr(p, hBits);
705 case 7: return ZSTD_hash7Ptr(p, hBits);
Yann Collet4b100f42015-10-30 15:49:48 +0100706 }
707}
Yann Collet2acb5d32015-10-29 16:49:43 +0100708
Yann Collet1f44b3f2015-11-05 17:32:18 +0100709/* *************************************
710* Fast Scan
711***************************************/
712
713FORCE_INLINE
Yann Collet5be2dd22015-11-11 13:43:58 +0100714size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* ctx,
Yann Collet89db5e02015-11-13 11:27:46 +0100715 void* dst, size_t maxDstSize,
716 const void* src, size_t srcSize,
717 const U32 mls)
Yann Collet1f44b3f2015-11-05 17:32:18 +0100718{
719 U32* hashTable = ctx->hashTable;
720 const U32 hBits = ctx->params.hashLog;
721 seqStore_t* seqStorePtr = &(ctx->seqStore);
722 const BYTE* const base = ctx->base;
Yann Collet1f44b3f2015-11-05 17:32:18 +0100723 const BYTE* const istart = (const BYTE*)src;
Yann Collet805a52a2015-11-06 10:52:17 +0100724 const BYTE* ip = istart;
Yann Collet1f44b3f2015-11-05 17:32:18 +0100725 const BYTE* anchor = istart;
Yann Collet89db5e02015-11-13 11:27:46 +0100726 const BYTE* const lowest = base + ctx->lowLimit;
Yann Collet1f44b3f2015-11-05 17:32:18 +0100727 const BYTE* const iend = istart + srcSize;
728 const BYTE* const ilimit = iend - 8;
729
Yann Collet89db5e02015-11-13 11:27:46 +0100730 size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
Yann Collet1f44b3f2015-11-05 17:32:18 +0100731
732
733 /* init */
Yann Collet743402c2015-11-20 12:03:53 +0100734 ZSTD_resetSeqStore(seqStorePtr);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100735 if (ip == base)
736 {
Yann Collet5be2dd22015-11-11 13:43:58 +0100737 hashTable[ZSTD_hashPtr(base+1, hBits, mls)] = 1;
738 hashTable[ZSTD_hashPtr(base+2, hBits, mls)] = 2;
739 hashTable[ZSTD_hashPtr(base+3, hBits, mls)] = 3;
Yann Collet1f44b3f2015-11-05 17:32:18 +0100740 ip = base+4;
741 }
Yann Collet1f44b3f2015-11-05 17:32:18 +0100742
743 /* Main Search Loop */
Yann Collet743402c2015-11-20 12:03:53 +0100744 while (ip < ilimit) /* < instead of <=, because repcode check at (ip+1) */
Yann Collet1f44b3f2015-11-05 17:32:18 +0100745 {
Yann Collet743402c2015-11-20 12:03:53 +0100746 size_t matchLength;
747 size_t offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100748 const size_t h = ZSTD_hashPtr(ip, hBits, mls);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100749 const BYTE* match = base + hashTable[h];
750 hashTable[h] = (U32)(ip-base);
751
Yann Collet743402c2015-11-20 12:03:53 +0100752 if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))
Yann Collet1f44b3f2015-11-05 17:32:18 +0100753 {
Yann Collet402fdcf2015-11-20 12:46:08 +0100754 matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend);
755 ip++;
756 offset = 0;
757 }
758 else
759 {
760 if ( (match < lowest) ||
761 (MEM_read32(match) != MEM_read32(ip)) )
762 {
763 ip += ((ip-anchor) >> g_searchStrength) + 1;
764 continue;
765 }
766 matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
767 while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */
768 offset = ip-match;
769 offset_2 = offset_1;
770 offset_1 = offset;
771 }
Yann Collet1f44b3f2015-11-05 17:32:18 +0100772
Yann Collet402fdcf2015-11-20 12:46:08 +0100773 /* match found */
774 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength);
775 ip += matchLength + MINMATCH;
776 anchor = ip;
777
778 if (ip <= ilimit)
779 {
780 /* Fill Table */
781 hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base);
782 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
783 /* check immediate repcode */
784 while ( (ip <= ilimit)
785 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) )
786 {
787 /* store sequence */
788 size_t ml = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
789 size_t tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
790 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
791 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml);
792 ip += ml+MINMATCH;
793 anchor = ip;
794 continue; /* faster when present ... (?) */
795 }
796 }
Yann Collet1f44b3f2015-11-05 17:32:18 +0100797 }
798
799 /* Last Literals */
800 {
801 size_t lastLLSize = iend - anchor;
802 memcpy(seqStorePtr->lit, anchor, lastLLSize);
803 seqStorePtr->lit += lastLLSize;
804 }
805
806 /* Finale compression stage */
807 return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
808 seqStorePtr, srcSize);
809}
810
811
Yann Collet5be2dd22015-11-11 13:43:58 +0100812size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
Yann Collet1f44b3f2015-11-05 17:32:18 +0100813 void* dst, size_t maxDstSize,
814 const void* src, size_t srcSize)
815{
816 const U32 mls = ctx->params.searchLength;
817 switch(mls)
818 {
819 default:
820 case 4 :
Yann Collet5be2dd22015-11-11 13:43:58 +0100821 return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 4);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100822 case 5 :
Yann Collet5be2dd22015-11-11 13:43:58 +0100823 return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 5);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100824 case 6 :
Yann Collet5be2dd22015-11-11 13:43:58 +0100825 return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 6);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100826 case 7 :
Yann Collet5be2dd22015-11-11 13:43:58 +0100827 return ZSTD_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 7);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100828 }
829}
Yann Colletf3eca252015-10-22 15:31:46 +0100830
Yann Colletf3eca252015-10-22 15:31:46 +0100831
Yann Collet93a823c2015-11-13 15:08:43 +0100832//FORCE_INLINE
Yann Collet89db5e02015-11-13 11:27:46 +0100833size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
834 void* dst, size_t maxDstSize,
835 const void* src, size_t srcSize,
836 const U32 mls)
837{
838 U32* hashTable = ctx->hashTable;
839 const U32 hBits = ctx->params.hashLog;
840 seqStore_t* seqStorePtr = &(ctx->seqStore);
841 const BYTE* const base = ctx->base;
842 const BYTE* const dictBase = ctx->dictBase;
843 const BYTE* const istart = (const BYTE*)src;
844 const BYTE* ip = istart;
845 const BYTE* anchor = istart;
846 const U32 lowLimit = ctx->lowLimit;
847 const U32 dictLimit = ctx->dictLimit;
Yann Collet743402c2015-11-20 12:03:53 +0100848 const BYTE* const lowPrefixPtr = base + dictLimit;
849 const BYTE* const dictEnd = dictBase + dictLimit;
Yann Collet89db5e02015-11-13 11:27:46 +0100850 const BYTE* const iend = istart + srcSize;
851 const BYTE* const ilimit = iend - 8;
852
Yann Collet138e89c2015-11-17 14:26:54 +0100853 U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
Yann Collet89db5e02015-11-13 11:27:46 +0100854
855
856 /* init */
857 ZSTD_resetSeqStore(seqStorePtr);
858 {
859 /* skip first 4 positions to avoid read overflow during repcode match check */
860 hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0);
861 hashTable[ZSTD_hashPtr(ip+1, hBits, mls)] = (U32)(ip-base+1);
862 hashTable[ZSTD_hashPtr(ip+2, hBits, mls)] = (U32)(ip-base+2);
863 hashTable[ZSTD_hashPtr(ip+3, hBits, mls)] = (U32)(ip-base+3);
864 ip += 4;
865 }
866
867 /* Main Search Loop */
Yann Collet743402c2015-11-20 12:03:53 +0100868 while (ip < ilimit) /* < instead of <=, because (ip+1) */
Yann Collet89db5e02015-11-13 11:27:46 +0100869 {
870 const size_t h = ZSTD_hashPtr(ip, hBits, mls);
Yann Collet743402c2015-11-20 12:03:53 +0100871 const U32 matchIndex = hashTable[h];
Yann Collet89db5e02015-11-13 11:27:46 +0100872 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
Yann Collet89db5e02015-11-13 11:27:46 +0100873 const BYTE* match = matchBase + matchIndex;
Yann Collet743402c2015-11-20 12:03:53 +0100874 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictBase + lowLimit : lowPrefixPtr;
Yann Collet89db5e02015-11-13 11:27:46 +0100875 const U32 current = (U32)(ip-base);
Yann Collet743402c2015-11-20 12:03:53 +0100876 const U32 repIndex = current + 1 - offset_1;
Yann Collet402fdcf2015-11-20 12:46:08 +0100877 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
Yann Collet89db5e02015-11-13 11:27:46 +0100878 const BYTE* repMatch = repBase + repIndex;
Yann Collet743402c2015-11-20 12:03:53 +0100879 size_t matchLength;
880 U32 offset;
Yann Collet89db5e02015-11-13 11:27:46 +0100881 hashTable[h] = current; /* update hash table */
882
883 if ( ((repIndex <= dictLimit-4) || (repIndex >= dictLimit))
Yann Collet743402c2015-11-20 12:03:53 +0100884 && (MEM_read32(repMatch) == MEM_read32(ip+1)) )
Yann Collet89db5e02015-11-13 11:27:46 +0100885 {
Yann Collet402fdcf2015-11-20 12:46:08 +0100886 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
Yann Collet9a24e592015-11-22 02:53:43 +0100887 const BYTE* vEnd = ip+1 + (repMatchEnd-repMatch);
888 if (vEnd > iend) vEnd = iend;
889 matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, vEnd);
890 if (repMatch + matchLength + MINMATCH == dictEnd)
891 matchLength += ZSTD_count(ip+1 + matchLength + MINMATCH, lowPrefixPtr, iend);
Yann Collet743402c2015-11-20 12:03:53 +0100892 ip++;
Yann Collet402fdcf2015-11-20 12:46:08 +0100893 offset = 0;
894 }
895 else
896 {
897 if ( (matchIndex < lowLimit) ||
898 (MEM_read32(match) != MEM_read32(ip)) )
899 { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
900 {
901 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
902 const BYTE* iEndCount = (matchEnd - match < iend - ip) ? ip + (matchEnd - match) : iend;
903 matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iEndCount);
904 if (match + matchLength + MINMATCH == dictEnd)
905 matchLength += ZSTD_count(ip + matchLength + MINMATCH, lowPrefixPtr, iend);
906 while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; matchLength++; } /* catch up */
907 offset = current - matchIndex;
908 offset_2 = offset_1;
909 offset_1 = offset;
910 }
911 }
Yann Collet89db5e02015-11-13 11:27:46 +0100912
Yann Collet402fdcf2015-11-20 12:46:08 +0100913 /* found a match */
914 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset, matchLength);
915 ip += matchLength + MINMATCH;
916 anchor = ip;
917
918 if (ip <= ilimit)
919 {
920 /* Fill Table */
921 hashTable[ZSTD_hashPtr(ip-2-matchLength, hBits, mls)] = (U32)(ip-2-matchLength-base);
922 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
923 /* check immediate repcode */
924 while (ip <= ilimit)
925 {
926 U32 current2 = (U32)(ip-base);
927 const U32 repIndex2 = current2 - offset_2;
928 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
Yann Collet743402c2015-11-20 12:03:53 +0100929 if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit))
930 && (MEM_read32(repMatch2) == MEM_read32(ip)) )
Yann Collet402fdcf2015-11-20 12:46:08 +0100931 {
932 size_t maxIlength = iend - ip;
933 size_t maxMlength = repIndex2 < dictLimit ? (size_t)(dictLimit - repIndex2) : (size_t)(iend - repMatch2);
934 size_t maxML = MIN(maxMlength, maxIlength);
Yann Collet743402c2015-11-20 12:03:53 +0100935 size_t ml = ZSTD_count(ip+MINMATCH, repMatch2+MINMATCH, ip + maxML);
Yann Collet402fdcf2015-11-20 12:46:08 +0100936 U32 tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
Yann Collet743402c2015-11-20 12:03:53 +0100937 if (ml+MINMATCH == maxMlength) /* reached end of extDict */
Yann Collet402fdcf2015-11-20 12:46:08 +0100938 ml += ZSTD_count(ip+MINMATCH+ml, lowPrefixPtr, iend);
939 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
940 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, ml);
941 ip += ml+MINMATCH;
942 anchor = ip;
943 continue;
944 }
Yann Collet743402c2015-11-20 12:03:53 +0100945 break;
Yann Collet402fdcf2015-11-20 12:46:08 +0100946 }
947 }
Yann Collet89db5e02015-11-13 11:27:46 +0100948 }
949
950 /* Last Literals */
951 {
952 size_t lastLLSize = iend - anchor;
953 memcpy(seqStorePtr->lit, anchor, lastLLSize);
954 seqStorePtr->lit += lastLLSize;
955 }
956
957 /* Finale compression stage */
958 return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
959 seqStorePtr, srcSize);
960}
961
962
963size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
964 void* dst, size_t maxDstSize,
965 const void* src, size_t srcSize)
966{
967 const U32 mls = ctx->params.searchLength;
968 switch(mls)
969 {
970 default:
971 case 4 :
972 return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 4);
973 case 5 :
974 return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 5);
975 case 6 :
976 return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 6);
977 case 7 :
978 return ZSTD_compressBlock_fast_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 7);
979 }
980}
981
982
Yann Colletf3eca252015-10-22 15:31:46 +0100983/* *************************************
Yann Collet96b9f0b2015-11-04 03:52:54 +0100984* Binary Tree search
Yann Colletf3eca252015-10-22 15:31:46 +0100985***************************************/
Yann Collet5be2dd22015-11-11 13:43:58 +0100986/** ZSTD_insertBt1 : add one ptr to tree
Yann Collet96b9f0b2015-11-04 03:52:54 +0100987 @ip : assumed <= iend-8 */
Yann Collet5be2dd22015-11-11 13:43:58 +0100988static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares)
Yann Collet96b9f0b2015-11-04 03:52:54 +0100989{
990 U32* const hashTable = zc->hashTable;
991 const U32 hashLog = zc->params.hashLog;
Yann Collet5be2dd22015-11-11 13:43:58 +0100992 const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
Yann Collet1f44b3f2015-11-05 17:32:18 +0100993 U32* const bt = zc->contentTable;
994 const U32 btLog = zc->params.contentLog - 1;
Yann Collet96b9f0b2015-11-04 03:52:54 +0100995 const U32 btMask= (1 << btLog) - 1;
996 U32 matchIndex = hashTable[h];
997 size_t commonLengthSmaller=0, commonLengthLarger=0;
998 const BYTE* const base = zc->base;
Yann Colletf48e35c2015-11-07 01:13:31 +0100999 const BYTE* match = base + matchIndex;
1000 U32 current = (U32)(ip-base);
Yann Collete9eba602015-11-08 15:08:03 +01001001 const U32 btLow = btMask >= current ? 0 : current - btMask;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001002 U32* smallerPtr = bt + 2*(current&btMask);
1003 U32* largerPtr = bt + 2*(current&btMask) + 1;
Yann Collet59d70632015-11-04 12:05:27 +01001004 U32 dummy32; /* to be nullified at the end */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001005 const U32 windowSize = 1 << zc->params.windowLog;
1006 const U32 windowLow = windowSize >= current ? 0 : current - windowSize;
1007
Yann Collete9eba602015-11-08 15:08:03 +01001008 if ((current-matchIndex == 1) /* RLE */
Yann Colletd1ade5a2015-11-08 15:49:20 +01001009 && MEM_read64(match) == MEM_read64(ip))
Yann Colletf48e35c2015-11-07 01:13:31 +01001010 {
Yann Collete9eba602015-11-08 15:08:03 +01001011 size_t rleLength = ZSTD_count(ip+sizeof(size_t), match+sizeof(size_t), iend) + sizeof(size_t);
1012 return (U32)(rleLength - mls);
Yann Colletf48e35c2015-11-07 01:13:31 +01001013 }
1014
1015 hashTable[h] = (U32)(ip - base); /* Update Hash Table */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001016
1017 while (nbCompares-- && (matchIndex > windowLow))
1018 {
1019 U32* nextPtr = bt + 2*(matchIndex & btMask);
Yann Collet96b9f0b2015-11-04 03:52:54 +01001020 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1021
Yann Colletf48e35c2015-11-07 01:13:31 +01001022 match = base + matchIndex;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001023 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
1024
Yann Collet59d70632015-11-04 12:05:27 +01001025 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
Yann Colletf48e35c2015-11-07 01:13:31 +01001026 break; /* just drop , to guarantee consistency (miss a bit of compression; if someone knows better, please tell) */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001027
1028 if (match[matchLength] < ip[matchLength])
Yann Collet59d70632015-11-04 12:05:27 +01001029 {
Yann Collet96b9f0b2015-11-04 03:52:54 +01001030 /* match is smaller than current */
1031 *smallerPtr = matchIndex; /* update smaller idx */
1032 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
Yann Colletf48e35c2015-11-07 01:13:31 +01001033 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001034 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
Yann Colletf48e35c2015-11-07 01:13:31 +01001035 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
Yann Collet59d70632015-11-04 12:05:27 +01001036 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01001037 else
Yann Collet59d70632015-11-04 12:05:27 +01001038 {
Yann Collet96b9f0b2015-11-04 03:52:54 +01001039 /* match is larger than current */
1040 *largerPtr = matchIndex;
1041 commonLengthLarger = matchLength;
Yann Colletf48e35c2015-11-07 01:13:31 +01001042 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001043 largerPtr = nextPtr;
Yann Colletf48e35c2015-11-07 01:13:31 +01001044 matchIndex = nextPtr[0];
Yann Collet59d70632015-11-04 12:05:27 +01001045 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01001046 }
1047
Yann Collet59d70632015-11-04 12:05:27 +01001048 *smallerPtr = *largerPtr = 0;
Yann Collete9eba602015-11-08 15:08:03 +01001049 return 1;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001050}
1051
1052
1053FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
Yann Collet5be2dd22015-11-11 13:43:58 +01001054size_t ZSTD_insertBtAndFindBestMatch (
1055 ZSTD_CCtx* zc,
Yann Collet96b9f0b2015-11-04 03:52:54 +01001056 const BYTE* const ip, const BYTE* const iend,
1057 size_t* offsetPtr,
1058 U32 nbCompares, const U32 mls)
1059{
1060 U32* const hashTable = zc->hashTable;
1061 const U32 hashLog = zc->params.hashLog;
Yann Collet5be2dd22015-11-11 13:43:58 +01001062 const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
Yann Collet1f44b3f2015-11-05 17:32:18 +01001063 U32* const bt = zc->contentTable;
1064 const U32 btLog = zc->params.contentLog - 1;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001065 const U32 btMask= (1 << btLog) - 1;
1066 U32 matchIndex = hashTable[h];
1067 size_t commonLengthSmaller=0, commonLengthLarger=0;
1068 const BYTE* const base = zc->base;
1069 const U32 current = (U32)(ip-base);
1070 const U32 btLow = btMask >= current ? 0 : current - btMask;
1071 const U32 windowSize = 1 << zc->params.windowLog;
1072 const U32 windowLow = windowSize >= current ? 0 : current - windowSize;
1073 U32* smallerPtr = bt + 2*(current&btMask);
1074 U32* largerPtr = bt + 2*(current&btMask) + 1;
Yann Colletb241e9d2015-11-04 13:57:24 +01001075 size_t bestLength = 0;
Yann Collet59d70632015-11-04 12:05:27 +01001076 U32 dummy32; /* to be nullified at the end */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001077
Yann Collet59d70632015-11-04 12:05:27 +01001078 hashTable[h] = (U32)(ip-base); /* Update Hash Table */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001079
1080 while (nbCompares-- && (matchIndex > windowLow))
1081 {
1082 U32* nextPtr = bt + 2*(matchIndex & btMask);
1083 const BYTE* match = base + matchIndex;
1084 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1085
1086 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
1087
1088 if (matchLength > bestLength)
1089 {
Yann Collete8455f52015-11-04 17:41:20 +01001090 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) )
Yann Colletb241e9d2015-11-04 13:57:24 +01001091 bestLength = matchLength, *offsetPtr = current - matchIndex;
Yann Collet59d70632015-11-04 12:05:27 +01001092 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
Yann Colleta81d9ac2015-11-06 19:03:59 +01001093 break; /* just drop, to guarantee consistency (miss a little bit of compression) */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001094 }
1095
1096 if (match[matchLength] < ip[matchLength])
Yann Collet59d70632015-11-04 12:05:27 +01001097 {
Yann Collet96b9f0b2015-11-04 03:52:54 +01001098 /* match is smaller than current */
1099 *smallerPtr = matchIndex; /* update smaller idx */
1100 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
Yann Collet59d70632015-11-04 12:05:27 +01001101 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
Yann Colleta81d9ac2015-11-06 19:03:59 +01001102 if (matchIndex <= btLow) smallerPtr=&dummy32; /* beyond tree size, stop the search */
1103 matchIndex = (matchIndex <= btLow) ? windowLow : nextPtr[1];
Yann Collet59d70632015-11-04 12:05:27 +01001104 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01001105 else
Yann Collet59d70632015-11-04 12:05:27 +01001106 {
Yann Collet96b9f0b2015-11-04 03:52:54 +01001107 /* match is larger than current */
1108 *largerPtr = matchIndex;
1109 commonLengthLarger = matchLength;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001110 largerPtr = nextPtr;
Yann Colleta81d9ac2015-11-06 19:03:59 +01001111 if (matchIndex <= btLow) largerPtr=&dummy32; /* beyond tree size, stop the search */
1112 matchIndex = (matchIndex <= btLow) ? windowLow : nextPtr[0];
Yann Collet59d70632015-11-04 12:05:27 +01001113 }
Yann Collet96b9f0b2015-11-04 03:52:54 +01001114 }
1115
Yann Collet59d70632015-11-04 12:05:27 +01001116 *smallerPtr = *largerPtr = 0;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001117
1118 zc->nextToUpdate = current+1; /* current has been inserted */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001119 return bestLength;
1120}
1121
1122
Yann Collet5be2dd22015-11-11 13:43:58 +01001123static const BYTE* ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
Yann Collet96b9f0b2015-11-04 03:52:54 +01001124{
1125 const BYTE* const base = zc->base;
1126 const U32 target = (U32)(ip - base);
1127 U32 idx = zc->nextToUpdate;
Yann Collet59d70632015-11-04 12:05:27 +01001128 //size_t dummy;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001129
Yann Colletf48e35c2015-11-07 01:13:31 +01001130 for( ; idx < target ; )
Yann Collet5be2dd22015-11-11 13:43:58 +01001131 idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares);
Yann Collet96b9f0b2015-11-04 03:52:54 +01001132
Yann Colletf48e35c2015-11-07 01:13:31 +01001133 zc->nextToUpdate = idx;
1134 return base + idx;
Yann Collet96b9f0b2015-11-04 03:52:54 +01001135}
1136
1137
1138/** Tree updater, providing best match */
1139FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
Yann Collet5be2dd22015-11-11 13:43:58 +01001140size_t ZSTD_BtFindBestMatch (
1141 ZSTD_CCtx* zc,
Yann Collet96b9f0b2015-11-04 03:52:54 +01001142 const BYTE* const ip, const BYTE* const iLimit,
1143 size_t* offsetPtr,
1144 const U32 maxNbAttempts, const U32 mls)
1145{
Yann Collet5be2dd22015-11-11 13:43:58 +01001146 const BYTE* nextToUpdate = ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
Yann Colletf48e35c2015-11-07 01:13:31 +01001147 if (nextToUpdate > ip)
1148 {
1149 /* RLE data */
1150 *offsetPtr = 1;
1151 return ZSTD_count(ip, ip-1, iLimit);
1152 }
Yann Collet5be2dd22015-11-11 13:43:58 +01001153 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls);
Yann Collet96b9f0b2015-11-04 03:52:54 +01001154}
1155
1156
Yann Collet5be2dd22015-11-11 13:43:58 +01001157FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS (
1158 ZSTD_CCtx* zc, /* Index table will be updated */
Yann Collet96b9f0b2015-11-04 03:52:54 +01001159 const BYTE* ip, const BYTE* const iLimit,
1160 size_t* offsetPtr,
1161 const U32 maxNbAttempts, const U32 matchLengthSearch)
1162{
1163 switch(matchLengthSearch)
1164 {
1165 default :
Yann Collet5be2dd22015-11-11 13:43:58 +01001166 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1167 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1168 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
Yann Collet96b9f0b2015-11-04 03:52:54 +01001169 }
1170}
1171
1172
Yann Collet5106a762015-11-05 15:00:24 +01001173/* ***********************
1174* Hash Chain
1175*************************/
1176
Yann Collet1f44b3f2015-11-05 17:32:18 +01001177#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
1178
Yann Collet5106a762015-11-05 15:00:24 +01001179/* Update chains up to ip (excluded) */
Yann Collet5be2dd22015-11-11 13:43:58 +01001180static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
Yann Collet5106a762015-11-05 15:00:24 +01001181{
1182 U32* const hashTable = zc->hashTable;
1183 const U32 hashLog = zc->params.hashLog;
Yann Collet1f44b3f2015-11-05 17:32:18 +01001184 U32* const chainTable = zc->contentTable;
1185 const U32 chainMask = (1 << zc->params.contentLog) - 1;
Yann Collet5106a762015-11-05 15:00:24 +01001186 const BYTE* const base = zc->base;
1187 const U32 target = (U32)(ip - base);
1188 U32 idx = zc->nextToUpdate;
1189
1190 while(idx < target)
1191 {
Yann Collet5be2dd22015-11-11 13:43:58 +01001192 size_t h = ZSTD_hashPtr(base+idx, hashLog, mls);
Yann Collet5106a762015-11-05 15:00:24 +01001193 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
1194 hashTable[h] = idx;
1195 idx++;
1196 }
1197
1198 zc->nextToUpdate = target;
Yann Collet5be2dd22015-11-11 13:43:58 +01001199 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
Yann Collet5106a762015-11-05 15:00:24 +01001200}
1201
1202
1203FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
Yann Collet5be2dd22015-11-11 13:43:58 +01001204size_t ZSTD_HcFindBestMatch (
1205 ZSTD_CCtx* zc, /* Index table will be updated */
Yann Collet5106a762015-11-05 15:00:24 +01001206 const BYTE* const ip, const BYTE* const iLimit,
1207 size_t* offsetPtr,
1208 const U32 maxNbAttempts, const U32 matchLengthSearch)
1209{
Yann Collet287b7d92015-11-22 13:24:05 +01001210 const BYTE* const base = zc->base;
1211 const U32 current = (U32)(ip-base);
1212 U32* const chainTable = zc->contentTable;
1213 const U32 chainSize = (1 << zc->params.contentLog);
1214 const U32 minChain = current > chainSize ? current - chainSize : 0;
1215 const U32 chainMask = chainSize-1;
1216 const U32 lowLimit = zc->lowLimit;
1217 U32 matchIndex;
1218 const BYTE* match;
1219 int nbAttempts=maxNbAttempts;
1220 size_t ml=0;
1221
1222 /* HC4 match finder */
1223 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
1224
1225 while ((matchIndex>lowLimit) && (nbAttempts))
1226 {
1227 nbAttempts--;
1228 match = base + matchIndex;
1229 if (match[ml] == ip[ml]) /* potentially better */
1230 {
1231 const size_t mlt = ZSTD_count(ip, match, iLimit);
1232 if (mlt > ml)
1233 //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1))))
1234 {
1235 ml = mlt; *offsetPtr = ip-match;
1236 if (ip+mlt >= iLimit) break;
1237 }
1238 }
1239
1240 if (matchIndex <= minChain) break;
1241 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
1242 }
1243
1244 return ml;
1245}
1246
1247FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
1248 ZSTD_CCtx* zc, /* Index table will be updated */
1249 const BYTE* ip, const BYTE* const iLimit,
1250 size_t* offsetPtr,
1251 const U32 maxNbAttempts, const U32 matchLengthSearch)
1252{
1253 switch(matchLengthSearch)
1254 {
1255 default :
1256 case 4 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1257 case 5 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1258 case 6 : return ZSTD_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1259 }
1260}
1261
1262
1263FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
1264size_t ZSTD_HcFindBestMatch_extDict (
1265 ZSTD_CCtx* zc, /* Index table will be updated */
1266 const BYTE* const ip, const BYTE* const iLimit,
1267 size_t* offsetPtr,
1268 const U32 maxNbAttempts, const U32 matchLengthSearch)
1269{
Yann Collet1f44b3f2015-11-05 17:32:18 +01001270 U32* const chainTable = zc->contentTable;
1271 const U32 chainSize = (1 << zc->params.contentLog);
Yann Collet5106a762015-11-05 15:00:24 +01001272 const U32 chainMask = chainSize-1;
1273 const BYTE* const base = zc->base;
1274 const BYTE* const dictBase = zc->dictBase;
1275 const U32 dictLimit = zc->dictLimit;
Yann Collet89db5e02015-11-13 11:27:46 +01001276 const U32 lowLimit = zc->lowLimit;
Yann Collet9a24e592015-11-22 02:53:43 +01001277 const U32 current = (U32)(ip-base);
1278 const U32 minChain = current > chainSize ? current - chainSize : 0;
Yann Collet5106a762015-11-05 15:00:24 +01001279 U32 matchIndex;
1280 const BYTE* match;
1281 int nbAttempts=maxNbAttempts;
1282 size_t ml=0;
1283
1284 /* HC4 match finder */
Yann Collet5be2dd22015-11-11 13:43:58 +01001285 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
Yann Collet5106a762015-11-05 15:00:24 +01001286
1287 while ((matchIndex>lowLimit) && (nbAttempts))
1288 {
1289 nbAttempts--;
1290 if (matchIndex >= dictLimit)
1291 {
1292 match = base + matchIndex;
Yann Collet4baee502015-11-09 03:19:33 +01001293 if (match[ml] == ip[ml]) /* potentially better */
Yann Collet5106a762015-11-05 15:00:24 +01001294 {
Yann Collet4baee502015-11-09 03:19:33 +01001295 const size_t mlt = ZSTD_count(ip, match, iLimit);
Yann Collet5106a762015-11-05 15:00:24 +01001296 if (mlt > ml)
1297 //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1))))
1298 {
1299 ml = mlt; *offsetPtr = ip-match;
Yann Collet4baee502015-11-09 03:19:33 +01001300 if (ip+mlt >= iLimit) break;
Yann Collet5106a762015-11-05 15:00:24 +01001301 }
1302 }
1303 }
1304 else
1305 {
1306 match = dictBase + matchIndex;
Yann Collet9a24e592015-11-22 02:53:43 +01001307 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 */
Yann Collet5106a762015-11-05 15:00:24 +01001308 {
1309 size_t mlt;
1310 const BYTE* vLimit = ip + (dictLimit - matchIndex);
1311 if (vLimit > iLimit) vLimit = iLimit;
1312 mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
Yann Collet9a24e592015-11-22 02:53:43 +01001313 if (match+mlt == dictBase+dictLimit)
Yann Collet5106a762015-11-05 15:00:24 +01001314 mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit);
Yann Collet9a24e592015-11-22 02:53:43 +01001315 if (mlt > ml) { ml = mlt; *offsetPtr = current - matchIndex; }
Yann Collet5106a762015-11-05 15:00:24 +01001316 }
1317 }
1318
Yann Collet9a24e592015-11-22 02:53:43 +01001319 if (matchIndex <= minChain) break;
Yann Collet5106a762015-11-05 15:00:24 +01001320 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
1321 }
1322
1323 return ml;
1324}
1325
1326
Yann Collet287b7d92015-11-22 13:24:05 +01001327FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
Yann Collet5be2dd22015-11-11 13:43:58 +01001328 ZSTD_CCtx* zc, /* Index table will be updated */
Yann Collet5106a762015-11-05 15:00:24 +01001329 const BYTE* ip, const BYTE* const iLimit,
1330 size_t* offsetPtr,
1331 const U32 maxNbAttempts, const U32 matchLengthSearch)
1332{
1333 switch(matchLengthSearch)
1334 {
1335 default :
Yann Collet287b7d92015-11-22 13:24:05 +01001336 case 4 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1337 case 5 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1338 case 6 : return ZSTD_HcFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
Yann Collet5106a762015-11-05 15:00:24 +01001339 }
1340}
1341
1342
Yann Collet287b7d92015-11-22 13:24:05 +01001343/* *******************************
Yann Collet9a24e592015-11-22 02:53:43 +01001344* Common parser - lazy strategy
Yann Collet287b7d92015-11-22 13:24:05 +01001345*********************************/
Yann Collet5106a762015-11-05 15:00:24 +01001346FORCE_INLINE
Yann Collet5be2dd22015-11-11 13:43:58 +01001347size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
Yann Colletc95f8992015-11-19 17:28:35 +01001348 void* dst, size_t maxDstSize, const void* src, size_t srcSize,
Yann Collet007c1c62015-11-22 02:42:28 +01001349 const U32 searchMethod, const U32 depth)
Yann Collet5106a762015-11-05 15:00:24 +01001350{
1351 seqStore_t* seqStorePtr = &(ctx->seqStore);
1352 const BYTE* const istart = (const BYTE*)src;
1353 const BYTE* ip = istart;
1354 const BYTE* anchor = istart;
1355 const BYTE* const iend = istart + srcSize;
1356 const BYTE* const ilimit = iend - 8;
1357
1358 size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
1359 const U32 maxSearches = 1 << ctx->params.searchLog;
1360 const U32 mls = ctx->params.searchLength;
1361
Yann Collet5be2dd22015-11-11 13:43:58 +01001362 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
Yann Collet5106a762015-11-05 15:00:24 +01001363 size_t* offsetPtr,
1364 U32 maxNbAttempts, U32 matchLengthSearch);
Yann Collet5be2dd22015-11-11 13:43:58 +01001365 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
Yann Collet5106a762015-11-05 15:00:24 +01001366
1367 /* init */
1368 ZSTD_resetSeqStore(seqStorePtr);
1369 if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
1370
1371 /* Match Loop */
Yann Collet7a231792015-11-21 15:27:35 +01001372 while (ip < ilimit)
Yann Collet5106a762015-11-05 15:00:24 +01001373 {
Yann Collet7a231792015-11-21 15:27:35 +01001374 size_t matchLength=0;
1375 size_t offset=0;
1376 const BYTE* start=ip+1;
Yann Collet5106a762015-11-05 15:00:24 +01001377
Yann Collet743402c2015-11-20 12:03:53 +01001378 /* check repCode */
Yann Collet7a231792015-11-21 15:27:35 +01001379 if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))
Yann Collet5106a762015-11-05 15:00:24 +01001380 {
Yann Collet743402c2015-11-20 12:03:53 +01001381 /* repcode : we take it */
Yann Collet7a231792015-11-21 15:27:35 +01001382 matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend) + MINMATCH;
Yann Collet007c1c62015-11-22 02:42:28 +01001383 if (depth==0) goto _storeSequence;
Yann Collet5106a762015-11-05 15:00:24 +01001384 }
1385
Yann Colletfc2afcf2015-11-06 15:40:14 +01001386 {
Yann Collet9a24e592015-11-22 02:53:43 +01001387 /* first search (depth 0) */
Yann Collet7a231792015-11-21 15:27:35 +01001388 size_t offsetFound = 99999999;
1389 size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
1390 if (ml2 > matchLength)
Yann Collet9a24e592015-11-22 02:53:43 +01001391 matchLength = ml2, start = ip, offset=offsetFound;
1392 }
1393
1394 if (matchLength < MINMATCH)
1395 {
1396 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
1397 continue;
Yann Collet7a231792015-11-21 15:27:35 +01001398 }
Yann Collet5106a762015-11-05 15:00:24 +01001399
1400 /* let's try to find a better solution */
Yann Collet007c1c62015-11-22 02:42:28 +01001401 if (depth>=1)
1402 while (ip<ilimit)
Yann Collet5106a762015-11-05 15:00:24 +01001403 {
1404 ip ++;
Yann Collet7a231792015-11-21 15:27:35 +01001405 if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1)))
Yann Collet5106a762015-11-05 15:00:24 +01001406 {
Yann Collet007c1c62015-11-22 02:42:28 +01001407 size_t mlRep = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
1408 int gain2 = (int)(mlRep * 3);
Yann Collet5106a762015-11-05 15:00:24 +01001409 int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
Yann Collet007c1c62015-11-22 02:42:28 +01001410 if ((mlRep >= MINMATCH) && (gain2 > gain1))
1411 matchLength = mlRep, offset = 0, start = ip;
Yann Collet5106a762015-11-05 15:00:24 +01001412 }
1413 {
1414 size_t offset2=999999;
1415 size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet007c1c62015-11-22 02:42:28 +01001416 int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
1417 int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
Yann Collet4baee502015-11-09 03:19:33 +01001418 if ((ml2 >= MINMATCH) && (gain2 > gain1))
Yann Collet5106a762015-11-05 15:00:24 +01001419 {
Yann Collet628065c2015-11-06 18:44:54 +01001420 matchLength = ml2, offset = offset2, start = ip;
Yann Collet5106a762015-11-05 15:00:24 +01001421 continue; /* search a better one */
1422 }
1423 }
1424
1425 /* let's find an even better one */
Yann Collet007c1c62015-11-22 02:42:28 +01001426 if ((depth==2) && (ip<ilimit))
Yann Collet5106a762015-11-05 15:00:24 +01001427 {
1428 ip ++;
Yann Collet7a231792015-11-21 15:27:35 +01001429 if ((offset) && (MEM_read32(ip) == MEM_read32(ip - offset_1)))
Yann Collet5106a762015-11-05 15:00:24 +01001430 {
1431 size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
1432 int gain2 = (int)(ml2 * 4);
1433 int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
Yann Collet4baee502015-11-09 03:19:33 +01001434 if ((ml2 >= MINMATCH) && (gain2 > gain1))
Yann Collet5106a762015-11-05 15:00:24 +01001435 matchLength = ml2, offset = 0, start = ip;
1436 }
1437 {
1438 size_t offset2=999999;
1439 size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
Yann Collet628065c2015-11-06 18:44:54 +01001440 int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
1441 int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
Yann Collet4baee502015-11-09 03:19:33 +01001442 if ((ml2 >= MINMATCH) && (gain2 > gain1))
Yann Collet5106a762015-11-05 15:00:24 +01001443 {
Yann Collet628065c2015-11-06 18:44:54 +01001444 matchLength = ml2, offset = offset2, start = ip;
1445 continue;
Yann Collet5106a762015-11-05 15:00:24 +01001446 }
1447 }
1448 }
1449 break; /* nothing found : store previous solution */
1450 }
1451
Yann Collet4baee502015-11-09 03:19:33 +01001452 /* catch up */
Yann Collet628065c2015-11-06 18:44:54 +01001453 if (offset)
Yann Collet4baee502015-11-09 03:19:33 +01001454 {
Yann Collet9a24e592015-11-22 02:53:43 +01001455 while ((start>anchor) && (start>ctx->base+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */
Yann Collet4baee502015-11-09 03:19:33 +01001456 { start--; matchLength++; }
Yann Collet743402c2015-11-20 12:03:53 +01001457 offset_2 = offset_1; offset_1 = offset;
Yann Collet4baee502015-11-09 03:19:33 +01001458 }
1459
1460 /* store sequence */
Yann Collet7a231792015-11-21 15:27:35 +01001461_storeSequence:
Yann Collet5106a762015-11-05 15:00:24 +01001462 {
1463 size_t litLength = start - anchor;
Yann Collet5106a762015-11-05 15:00:24 +01001464 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
Yann Collet4baee502015-11-09 03:19:33 +01001465 anchor = ip = start + matchLength;
Yann Collet5106a762015-11-05 15:00:24 +01001466 }
1467
Yann Collet743402c2015-11-20 12:03:53 +01001468 /* check immediate repcode */
1469 while ( (ip <= ilimit)
1470 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) )
1471 {
1472 /* store sequence */
1473 matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
1474 offset = offset_2;
1475 offset_2 = offset_1;
1476 offset_1 = offset;
1477 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength);
1478 ip += matchLength+MINMATCH;
1479 anchor = ip;
1480 continue; /* faster when present ... (?) */
1481 }
Yann Collet5106a762015-11-05 15:00:24 +01001482 }
1483
1484 /* Last Literals */
1485 {
1486 size_t lastLLSize = iend - anchor;
1487 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1488 seqStorePtr->lit += lastLLSize;
1489 }
1490
1491 /* Final compression stage */
1492 return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
1493 seqStorePtr, srcSize);
1494}
1495
Yann Collet5be2dd22015-11-11 13:43:58 +01001496size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5106a762015-11-05 15:00:24 +01001497{
Yann Collet7a231792015-11-21 15:27:35 +01001498 return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 2);
Yann Collet5106a762015-11-05 15:00:24 +01001499}
1500
Yann Collet5be2dd22015-11-11 13:43:58 +01001501size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5106a762015-11-05 15:00:24 +01001502{
Yann Collet7a231792015-11-21 15:27:35 +01001503 return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2);
Yann Collet5106a762015-11-05 15:00:24 +01001504}
1505
Yann Collet5be2dd22015-11-11 13:43:58 +01001506size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5106a762015-11-05 15:00:24 +01001507{
Yann Collet7a231792015-11-21 15:27:35 +01001508 return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1);
Yann Collet5106a762015-11-05 15:00:24 +01001509}
1510
Yann Collet5be2dd22015-11-11 13:43:58 +01001511size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Colletf3eca252015-10-22 15:31:46 +01001512{
Yann Collet7a231792015-11-21 15:27:35 +01001513 return ZSTD_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0);
Yann Colletbe2010e2015-10-31 12:57:14 +01001514}
1515
1516
Yann Collet9a24e592015-11-22 02:53:43 +01001517FORCE_INLINE
1518size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
1519 void* dst, size_t maxDstSize, const void* src, size_t srcSize,
1520 const U32 searchMethod, const U32 depth)
1521{
1522 seqStore_t* seqStorePtr = &(ctx->seqStore);
1523 const BYTE* const istart = (const BYTE*)src;
1524 const BYTE* ip = istart;
1525 const BYTE* anchor = istart;
1526 const BYTE* const iend = istart + srcSize;
1527 const BYTE* const ilimit = iend - 8;
1528 const BYTE* const base = ctx->base;
1529 const U32 dictLimit = ctx->dictLimit;
1530 const BYTE* const prefixStart = base + dictLimit;
1531 const BYTE* const dictBase = ctx->dictBase;
1532 const BYTE* const dictEnd = dictBase + dictLimit;
1533
1534 size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
1535 const U32 maxSearches = 1 << ctx->params.searchLog;
1536 const U32 mls = ctx->params.searchLength;
1537
1538 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
1539 size_t* offsetPtr,
1540 U32 maxNbAttempts, U32 matchLengthSearch);
Yann Collet287b7d92015-11-22 13:24:05 +01001541 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
Yann Collet9a24e592015-11-22 02:53:43 +01001542
1543 /* init */
1544 ZSTD_resetSeqStore(seqStorePtr);
1545 if (((ip-base) - dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
1546
1547 /* Match Loop */
1548 while (ip < ilimit)
1549 {
1550 size_t matchLength=0;
1551 size_t offset=0;
1552 const BYTE* start=ip+1;
1553 U32 current = (U32)(ip-base);
1554
1555 /* check repCode */
1556 {
1557 const U32 repIndex = (U32)(current+1 - offset_1);
1558 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1559 const BYTE* const repMatch = repBase + repIndex;
Yann Collet734aa922015-11-22 03:01:33 +01001560 if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit))
Yann Colletd7233d62015-11-22 14:40:51 +01001561 if (MEM_read32(ip+1) == MEM_read32(repMatch))
Yann Collet9a24e592015-11-22 02:53:43 +01001562 {
1563 /* repcode detected we should take it */
1564 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1565 const BYTE* vEnd = ip+1 + (repEnd - repMatch);
1566 if (vEnd > iend) vEnd = iend;
1567 matchLength = ZSTD_count(ip+1+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH;
1568 if (repMatch + matchLength == dictEnd)
1569 matchLength += ZSTD_count(ip+1+matchLength, prefixStart, iend);
1570 if (depth==0) goto _storeSequence;
1571 }
1572 }
1573
1574 {
1575 /* first search (depth 0) */
1576 size_t offsetFound = 99999999;
1577 size_t ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
1578 if (ml2 > matchLength)
1579 matchLength = ml2, start = ip, offset=offsetFound;
1580 }
1581
1582 if (matchLength < MINMATCH)
1583 {
1584 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
1585 continue;
1586 }
1587
1588 /* let's try to find a better solution */
1589 if (depth>=1)
1590 while (ip<ilimit)
1591 {
1592 ip ++;
1593 current++;
1594 /* check repCode */
1595 if (offset)
1596 {
1597 const U32 repIndex = (U32)(current - offset_1);
1598 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1599 const BYTE* const repMatch = repBase + repIndex;
Yann Colletb7fc88e2015-11-22 03:12:28 +01001600 if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit))
Yann Colletd7233d62015-11-22 14:40:51 +01001601 if (MEM_read32(ip) == MEM_read32(repMatch))
Yann Collet9a24e592015-11-22 02:53:43 +01001602 {
1603 /* repcode detected */
1604 size_t repLength;
1605 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1606 const BYTE* vEnd = ip + (repEnd - repMatch);
1607 if (vEnd > iend) vEnd = iend;
1608 repLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH;
1609 if (repMatch + repLength == dictEnd)
1610 repLength += ZSTD_count(ip+repLength, prefixStart, iend);
1611 {
1612 int gain2 = (int)(repLength * 3);
1613 int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
1614 if ((repLength >= MINMATCH) && (gain2 > gain1))
1615 matchLength = repLength, offset = 0, start = ip;
1616 }
1617 }
1618 }
1619
1620 /* search match, depth 1 */
1621 {
1622 size_t offset2=999999;
1623 size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1624 int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
1625 int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
1626 if ((ml2 >= MINMATCH) && (gain2 > gain1))
1627 {
1628 matchLength = ml2, offset = offset2, start = ip;
1629 continue; /* search a better one */
1630 }
1631 }
1632
1633 /* let's find an even better one */
1634 if ((depth==2) && (ip<ilimit))
1635 {
1636 ip ++;
1637 current++;
1638 /* check repCode */
1639 if (offset)
1640 {
1641 const U32 repIndex = (U32)(current - offset_1);
1642 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1643 const BYTE* const repMatch = repBase + repIndex;
Yann Colleta85c77b2015-11-22 12:22:04 +01001644 if ((repIndex <= dictLimit-4) || (repIndex >= dictLimit))
Yann Colletd7233d62015-11-22 14:40:51 +01001645 if (MEM_read32(ip) == MEM_read32(repMatch))
Yann Collet9a24e592015-11-22 02:53:43 +01001646 {
1647 /* repcode detected */
1648 size_t repLength;
1649 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1650 const BYTE* vEnd = ip + (repEnd - repMatch);
1651 if (vEnd > iend) vEnd = iend;
1652 repLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH;
1653 if (repMatch + repLength == dictEnd)
1654 repLength += ZSTD_count(ip+repLength, prefixStart, iend);
1655 {
1656 int gain2 = (int)(repLength * 4);
1657 int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
1658 if ((repLength >= MINMATCH) && (gain2 > gain1))
1659 matchLength = repLength, offset = 0, start = ip;
1660 }
1661 }
1662 }
1663
1664 /* search match, depth 2 */
1665 {
1666 size_t offset2=999999;
1667 size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1668 int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
1669 int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
1670 if ((ml2 >= MINMATCH) && (gain2 > gain1))
1671 {
1672 matchLength = ml2, offset = offset2, start = ip;
1673 continue;
1674 }
1675 }
1676 }
1677 break; /* nothing found : store previous solution */
1678 }
1679
1680 /* catch up */
1681 if (offset)
1682 {
1683 while ((start>anchor) && (start>prefixStart+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */
1684 { start--; matchLength++; }
1685 offset_2 = offset_1; offset_1 = offset;
1686 }
1687
1688 /* store sequence */
1689_storeSequence:
1690 {
1691 size_t litLength = start - anchor;
1692 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
1693 anchor = ip = start + matchLength;
1694 }
1695
1696 /* check immediate repcode */
1697 while (ip <= ilimit)
1698 {
1699 const U32 repIndex = (U32)((ip-base) - offset_2);
1700 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1701 const BYTE* const repMatch = repBase + repIndex;
Yann Colletd7233d62015-11-22 14:40:51 +01001702 if (MEM_read32(ip) == MEM_read32(repMatch))
Yann Collet9a24e592015-11-22 02:53:43 +01001703 {
1704 /* repcode detected we should take it */
1705 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1706 const BYTE* vEnd = ip + (repEnd - repMatch);
1707 if (vEnd > iend) vEnd = iend;
1708 matchLength = ZSTD_count(ip+MINMATCH, repMatch+MINMATCH, vEnd) + MINMATCH;
1709 if (repMatch + matchLength == dictEnd)
1710 matchLength += ZSTD_count(ip+matchLength, prefixStart, iend);
1711 offset = offset_2;
1712 offset_2 = offset_1;
1713 offset_1 = offset;
1714 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
1715 ip += matchLength;
1716 anchor = ip;
1717 continue; /* faster when present ... (?) */
1718 }
1719 break;
1720 }
1721 }
1722
1723 /* Last Literals */
1724 {
1725 size_t lastLLSize = iend - anchor;
1726 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1727 seqStorePtr->lit += lastLLSize;
1728 }
1729
1730 /* Final compression stage */
1731 return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
1732 seqStorePtr, srcSize);
1733}
1734
1735size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1736{
1737 return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0);
1738}
1739
Yann Colletb7fc88e2015-11-22 03:12:28 +01001740size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1741{
1742 return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1);
1743}
Yann Collet9a24e592015-11-22 02:53:43 +01001744
Yann Colleta85c77b2015-11-22 12:22:04 +01001745size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1746{
1747 return ZSTD_compressBlock_lazy_extDict_generic(ctx, dst, maxDstSize, src, srcSize, 0, 2);
1748}
1749
Yann Collet7a231792015-11-21 15:27:35 +01001750
Yann Collet5be2dd22015-11-11 13:43:58 +01001751typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
Yann Collet59d70632015-11-04 12:05:27 +01001752
Yann Collet89db5e02015-11-13 11:27:46 +01001753static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
Yann Collet59d70632015-11-04 12:05:27 +01001754{
Yann Collet89db5e02015-11-13 11:27:46 +01001755 if (extDict)
Yann Collet59d70632015-11-04 12:05:27 +01001756 {
Yann Collet89db5e02015-11-13 11:27:46 +01001757 switch(strat)
1758 {
1759 default :
1760 case ZSTD_fast:
1761 return ZSTD_compressBlock_fast_extDict;
1762 case ZSTD_greedy:
Yann Collet9a24e592015-11-22 02:53:43 +01001763 return ZSTD_compressBlock_greedy_extDict;
Yann Collet89db5e02015-11-13 11:27:46 +01001764 case ZSTD_lazy:
Yann Colletb7fc88e2015-11-22 03:12:28 +01001765 return ZSTD_compressBlock_lazy_extDict;
Yann Collet89db5e02015-11-13 11:27:46 +01001766 case ZSTD_lazy2:
Yann Colleta85c77b2015-11-22 12:22:04 +01001767 return ZSTD_compressBlock_lazy2_extDict;
Yann Collet89db5e02015-11-13 11:27:46 +01001768 case ZSTD_btlazy2:
1769 return ZSTD_compressBlock_btlazy2;
1770 }
1771 }
1772 else
1773 {
1774 switch(strat)
1775 {
1776 default :
1777 case ZSTD_fast:
1778 return ZSTD_compressBlock_fast;
1779 case ZSTD_greedy:
1780 return ZSTD_compressBlock_greedy;
1781 case ZSTD_lazy:
1782 return ZSTD_compressBlock_lazy;
1783 case ZSTD_lazy2:
1784 return ZSTD_compressBlock_lazy2;
1785 case ZSTD_btlazy2:
1786 return ZSTD_compressBlock_btlazy2;
1787 }
Yann Collet59d70632015-11-04 12:05:27 +01001788 }
1789}
1790
1791
Yann Collet5be2dd22015-11-11 13:43:58 +01001792size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Colletbe2010e2015-10-31 12:57:14 +01001793{
Yann Collet89db5e02015-11-13 11:27:46 +01001794 ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctx->params.strategy, ctx->lowLimit < ctx->dictLimit);
Yann Collet7dfd56b2015-11-19 17:46:29 +01001795 if (srcSize < MIN_CBLOCK_SIZE+3) return 0; /* don't even attempt compression below a certain srcSize */
Yann Collet59d70632015-11-04 12:05:27 +01001796 return blockCompressor(ctx, dst, maxDstSize, src, srcSize);
Yann Colletbe2010e2015-10-31 12:57:14 +01001797}
1798
1799
Yann Collet5be2dd22015-11-11 13:43:58 +01001800static size_t ZSTD_compress_generic (ZSTD_CCtx* ctxPtr,
Yann Colletf3eca252015-10-22 15:31:46 +01001801 void* dst, size_t maxDstSize,
1802 const void* src, size_t srcSize)
1803{
Yann Collet3e358272015-11-04 18:19:39 +01001804 size_t blockSize = BLOCKSIZE;
Yann Colletf3eca252015-10-22 15:31:46 +01001805 size_t remaining = srcSize;
1806 const BYTE* ip = (const BYTE*)src;
1807 BYTE* const ostart = (BYTE*)dst;
1808 BYTE* op = ostart;
Yann Collet89db5e02015-11-13 11:27:46 +01001809 const U32 maxDist = 1 << ctxPtr->params.windowLog;
1810 //const ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(ctxPtr->params.strategy, ctxPtr->lowLimit < ctxPtr->dictLimit);
Yann Collet9b11b462015-11-01 12:40:22 +01001811
Yann Collet3e358272015-11-04 18:19:39 +01001812 while (remaining)
Yann Colletf3eca252015-10-22 15:31:46 +01001813 {
Yann Collet3e358272015-11-04 18:19:39 +01001814 size_t cSize;
1815
Yann Collet3137d1a2015-11-04 23:36:36 +01001816 if (maxDstSize < 3 + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
Yann Collet3e358272015-11-04 18:19:39 +01001817 if (remaining < blockSize) blockSize = remaining;
Yann Collet89db5e02015-11-13 11:27:46 +01001818
1819 if ((U32)(ip+blockSize - (ctxPtr->base + ctxPtr->lowLimit)) > maxDist)
1820 /* respect windowLog contract */
1821 ctxPtr->lowLimit = (U32)(ip+blockSize - ctxPtr->base) - maxDist;
1822
1823 //cSize = blockCompressor(ctxPtr, op+3, maxDstSize-3, ip, blockSize);
1824 cSize = ZSTD_compressBlock(ctxPtr, op+3, maxDstSize-3, ip, blockSize);
Yann Collet3e358272015-11-04 18:19:39 +01001825 if (ZSTD_isError(cSize)) return cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01001826
1827 if (cSize == 0)
1828 {
1829 cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize); /* block is not compressible */
1830 }
1831 else
1832 {
1833 op[0] = (BYTE)(cSize>>16);
1834 op[1] = (BYTE)(cSize>>8);
1835 op[2] = (BYTE)cSize;
Yann Colletc95f8992015-11-19 17:28:35 +01001836 op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
Yann Colletf3eca252015-10-22 15:31:46 +01001837 cSize += 3;
1838 }
1839
1840 remaining -= blockSize;
Yann Collet3e358272015-11-04 18:19:39 +01001841 maxDstSize -= cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01001842 ip += blockSize;
1843 op += cSize;
Yann Colletf3eca252015-10-22 15:31:46 +01001844 }
1845
1846 return op-ostart;
1847}
1848
1849
Yann Collet5be2dd22015-11-11 13:43:58 +01001850size_t ZSTD_compressContinue (ZSTD_CCtx* ctxPtr,
Yann Collet2acb5d32015-10-29 16:49:43 +01001851 void* dst, size_t dstSize,
1852 const void* src, size_t srcSize)
Yann Colletf3eca252015-10-22 15:31:46 +01001853{
Yann Collet2acb5d32015-10-29 16:49:43 +01001854 const BYTE* const ip = (const BYTE*) src;
Yann Colletf3eca252015-10-22 15:31:46 +01001855
Yann Collet89db5e02015-11-13 11:27:46 +01001856 /* preemptive overflow correction */
1857 if (ctxPtr->lowLimit > (1<<30) )
Yann Colletf3eca252015-10-22 15:31:46 +01001858 {
Yann Collet89db5e02015-11-13 11:27:46 +01001859 U32 correction = ctxPtr->lowLimit;
1860 ZSTD_reduceIndex(ctxPtr, correction);
1861 ctxPtr->base += correction;
1862 ctxPtr->dictBase += correction;
1863 ctxPtr->lowLimit -= correction;
1864 ctxPtr->dictLimit -= correction;
1865 if (ctxPtr->nextToUpdate < correction) ctxPtr->nextToUpdate = 0;
1866 else ctxPtr->nextToUpdate -= correction;
Yann Colletf3eca252015-10-22 15:31:46 +01001867 }
1868
Yann Collet89db5e02015-11-13 11:27:46 +01001869 /* Check if blocks follow each other */
1870 if (src != ctxPtr->nextSrc)
1871 {
1872 /* not contiguous */
1873 ctxPtr->lowLimit = ctxPtr->dictLimit;
1874 ctxPtr->dictLimit = (U32)(ctxPtr->nextSrc - ctxPtr->base);
1875 ctxPtr->dictBase = ctxPtr->base;
1876 ctxPtr->base += ip - ctxPtr->nextSrc;
Yann Colletd7233d62015-11-22 14:40:51 +01001877 ctxPtr->nextToUpdate = ctxPtr->dictLimit;
Yann Collet89db5e02015-11-13 11:27:46 +01001878 }
1879
1880 /* input-dictionary overlap */
1881 if ((ip+srcSize > ctxPtr->dictBase + ctxPtr->lowLimit) && (ip < ctxPtr->dictBase + ctxPtr->dictLimit))
1882 {
1883 ctxPtr->lowLimit = (U32)(ip + srcSize - ctxPtr->dictBase);
1884 if (ctxPtr->lowLimit > ctxPtr->dictLimit) ctxPtr->lowLimit = ctxPtr->dictLimit;
1885 }
1886
1887 ctxPtr->nextSrc = ip + srcSize;
1888
Yann Collet5be2dd22015-11-11 13:43:58 +01001889 return ZSTD_compress_generic (ctxPtr, dst, dstSize, src, srcSize);
Yann Colletf3eca252015-10-22 15:31:46 +01001890}
1891
1892
Yann Collet5be2dd22015-11-11 13:43:58 +01001893size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx,
Yann Collet89db5e02015-11-13 11:27:46 +01001894 void* dst, size_t maxDstSize,
1895 const ZSTD_parameters params,
1896 const U64 srcSizeHint)
Yann Colletf3eca252015-10-22 15:31:46 +01001897{
Yann Collet4114f952015-10-30 06:40:22 +01001898 size_t errorCode;
Yann Colletf3eca252015-10-22 15:31:46 +01001899 if (maxDstSize < 4) return ERROR(dstSize_tooSmall);
Yann Collet5be2dd22015-11-11 13:43:58 +01001900 errorCode = ZSTD_resetCCtx_advanced(ctx, params, srcSizeHint);
Yann Collet4114f952015-10-30 06:40:22 +01001901 if (ZSTD_isError(errorCode)) return errorCode;
Yann Collet89db5e02015-11-13 11:27:46 +01001902
Yann Collet2acb5d32015-10-29 16:49:43 +01001903 MEM_writeLE32(dst, ZSTD_magicNumber); /* Write Header */
Yann Colletf3eca252015-10-22 15:31:46 +01001904 return 4;
1905}
1906
Yann Collet083fcc82015-10-25 14:06:35 +01001907
Yann Collet5be2dd22015-11-11 13:43:58 +01001908size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel, U64 srcSizeHint)
Yann Collet083fcc82015-10-25 14:06:35 +01001909{
Yann Collet89db5e02015-11-13 11:27:46 +01001910 int tableID = ((srcSizeHint-1) > 128 KB); /* intentional underflow for srcSizeHint == 0 */
Yann Collet2acb5d32015-10-29 16:49:43 +01001911 if (compressionLevel<=0) compressionLevel = 1;
Yann Collet5be2dd22015-11-11 13:43:58 +01001912 if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
1913 return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_defaultParameters[tableID][compressionLevel], srcSizeHint);
Yann Collet083fcc82015-10-25 14:06:35 +01001914}
1915
1916
Yann Collet5be2dd22015-11-11 13:43:58 +01001917size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize)
Yann Collet2acb5d32015-10-29 16:49:43 +01001918{
1919 BYTE* op = (BYTE*)dst;
1920
1921 /* Sanity check */
1922 (void)ctx;
1923 if (maxDstSize < 3) return ERROR(dstSize_tooSmall);
1924
1925 /* End of frame */
1926 op[0] = (BYTE)(bt_end << 6);
1927 op[1] = 0;
1928 op[2] = 0;
1929
1930 return 3;
1931}
1932
Yann Collet5be2dd22015-11-11 13:43:58 +01001933size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
Yann Collet083fcc82015-10-25 14:06:35 +01001934 void* dst, size_t maxDstSize,
1935 const void* src, size_t srcSize,
Yann Collet5be2dd22015-11-11 13:43:58 +01001936 ZSTD_parameters params)
Yann Colletf3eca252015-10-22 15:31:46 +01001937{
1938 BYTE* const ostart = (BYTE*)dst;
1939 BYTE* op = ostart;
Yann Collet4114f952015-10-30 06:40:22 +01001940 size_t oSize;
Yann Colletf3eca252015-10-22 15:31:46 +01001941
1942 /* Header */
Yann Collet5be2dd22015-11-11 13:43:58 +01001943 oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params, srcSize);
Yann Colletf3eca252015-10-22 15:31:46 +01001944 if(ZSTD_isError(oSize)) return oSize;
1945 op += oSize;
1946 maxDstSize -= oSize;
1947
1948 /* body (compression) */
Yann Collet3d9cf7a2015-10-29 17:15:14 +01001949 ctx->base = (const BYTE*)src;
Yann Collet5be2dd22015-11-11 13:43:58 +01001950 oSize = ZSTD_compress_generic (ctx, op, maxDstSize, src, srcSize);
Yann Colletf3eca252015-10-22 15:31:46 +01001951 if(ZSTD_isError(oSize)) return oSize;
1952 op += oSize;
1953 maxDstSize -= oSize;
1954
1955 /* Close frame */
Yann Collet5be2dd22015-11-11 13:43:58 +01001956 oSize = ZSTD_compressEnd(ctx, op, maxDstSize);
Yann Colletf3eca252015-10-22 15:31:46 +01001957 if(ZSTD_isError(oSize)) return oSize;
1958 op += oSize;
1959
1960 return (op - ostart);
1961}
1962
Yann Collet5be2dd22015-11-11 13:43:58 +01001963size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
Yann Collet083fcc82015-10-25 14:06:35 +01001964{
Yann Collet43e0cd52015-11-09 16:38:17 +01001965 const int tableID = (srcSize > 128 KB);
Yann Collet5be2dd22015-11-11 13:43:58 +01001966 if (compressionLevel < 1) compressionLevel = 1;
1967 if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
1968 return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_defaultParameters[tableID][compressionLevel]);
Yann Collet083fcc82015-10-25 14:06:35 +01001969}
1970
Yann Collet5be2dd22015-11-11 13:43:58 +01001971size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
Yann Colletf3eca252015-10-22 15:31:46 +01001972{
Yann Collet44fe9912015-10-29 22:02:40 +01001973 size_t result;
Yann Collet5be2dd22015-11-11 13:43:58 +01001974 ZSTD_CCtx ctxBody;
Yann Collet712def92015-10-29 18:41:45 +01001975 memset(&ctxBody, 0, sizeof(ctxBody));
Yann Collet5be2dd22015-11-11 13:43:58 +01001976 result = ZSTD_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel);
Yann Collet89db5e02015-11-13 11:27:46 +01001977 free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; take care of heap content */
Yann Collet44fe9912015-10-29 22:02:40 +01001978 return result;
Yann Colletf3eca252015-10-22 15:31:46 +01001979}