blob: 14918cb42ffbdd29aecdd5cbb23b85afabb032db [file] [log] [blame]
Yann Collet5be2dd22015-11-11 13:43:58 +01001/*
2 zstd - standard compression library
3 Copyright (C) 2014-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 You can contact the author at :
29 - zstd source repository : https://github.com/Cyan4973/zstd
30 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31*/
32
33/* ***************************************************************
34* Tuning parameters
35*****************************************************************/
36/*!
Yann Collet5be2dd22015-11-11 13:43:58 +010037 * HEAPMODE :
Yann Colletb3a2af92015-11-19 17:13:19 +010038 * Select how default compression functions will allocate memory for their hash table,
Yann Collet5be2dd22015-11-11 13:43:58 +010039 * in memory stack (0, fastest), or in memory heap (1, requires malloc())
Yann Colletb3a2af92015-11-19 17:13:19 +010040 * Note that compression context is fairly large, as a consequence heap memory is recommended.
Yann Collet5be2dd22015-11-11 13:43:58 +010041 */
42#ifndef ZSTD_HEAPMODE
43# define ZSTD_HEAPMODE 1
44#endif /* ZSTD_HEAPMODE */
45
46/*!
47* LEGACY_SUPPORT :
Yann Collet14983e72015-11-11 21:38:21 +010048* ZSTD_decompress() can decode older formats (starting from zstd 0.1+)
Yann Collet5be2dd22015-11-11 13:43:58 +010049*/
50#ifndef ZSTD_LEGACY_SUPPORT
51# define ZSTD_LEGACY_SUPPORT 1
52#endif
53
54
55/* *******************************************************
56* Includes
57*********************************************************/
58#include <stdlib.h> /* calloc */
59#include <string.h> /* memcpy, memmove */
60#include <stdio.h> /* debug : printf */
61#include "mem.h" /* low level memory routines */
62#include "zstd_static.h"
63#include "zstd_internal.h"
64#include "fse_static.h"
65#include "huff0.h"
66
67#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
68# include "zstd_legacy.h"
69#endif
70
Yann Collet5be2dd22015-11-11 13:43:58 +010071/* *******************************************************
72* Compiler specifics
73*********************************************************/
Yann Collet5be2dd22015-11-11 13:43:58 +010074#ifdef _MSC_VER /* Visual Studio */
75# define FORCE_INLINE static __forceinline
76# include <intrin.h> /* For Visual 2005 */
77# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
78# pragma warning(disable : 4324) /* disable: C4324: padded structure */
79#else
80# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
81# ifdef __GNUC__
82# define FORCE_INLINE static inline __attribute__((always_inline))
83# else
84# define FORCE_INLINE static inline
85# endif
86#endif
87
88
Yann Collet14983e72015-11-11 21:38:21 +010089/* *************************************
90* Local types
91***************************************/
92typedef struct
93{
94 blockType_t blockType;
95 U32 origSize;
96} blockProperties_t;
Yann Collet5be2dd22015-11-11 13:43:58 +010097
98
99/* *******************************************************
100* Memory operations
101**********************************************************/
102static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
103
104
Yann Collet5be2dd22015-11-11 13:43:58 +0100105/* *************************************
106* Error Management
107***************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100108unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
109
Yann Collet5be2dd22015-11-11 13:43:58 +0100110/*! ZSTD_isError
111* tells if a return value is an error code */
112unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
113
114/*! ZSTD_getErrorName
115* provides error code string (useful for debugging) */
116const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
117
118
Yann Collet5be2dd22015-11-11 13:43:58 +0100119/* *************************************************************
Yann Collet5b78d2f2015-11-12 15:36:05 +0100120* Context management
Yann Collet5be2dd22015-11-11 13:43:58 +0100121***************************************************************/
Yann Collete4fdad52015-11-25 21:09:17 +0100122typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
Yann Collet88fcd292015-11-25 14:42:45 +0100123 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
124
Yann Collet5be2dd22015-11-11 13:43:58 +0100125struct ZSTD_DCtx_s
126{
127 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
128 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
129 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
Yann Collet417890c2015-12-04 17:16:37 +0100130 const void* previousDstEnd;
131 const void* base;
132 const void* vBase;
133 const void* dictEnd;
Yann Collet5be2dd22015-11-11 13:43:58 +0100134 size_t expected;
Yann Collet88fcd292015-11-25 14:42:45 +0100135 size_t headerSize;
136 ZSTD_parameters params;
Yann Collet5be2dd22015-11-11 13:43:58 +0100137 blockType_t bType;
Yann Collet88fcd292015-11-25 14:42:45 +0100138 ZSTD_dStage stage;
Yann Collet5be2dd22015-11-11 13:43:58 +0100139 const BYTE* litPtr;
140 size_t litBufSize;
141 size_t litSize;
142 BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
Yann Collet88fcd292015-11-25 14:42:45 +0100143 BYTE headerBuffer[ZSTD_frameHeaderSize_max];
Yann Collet417890c2015-12-04 17:16:37 +0100144}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
Yann Collet5be2dd22015-11-11 13:43:58 +0100145
Yann Collet5b78d2f2015-11-12 15:36:05 +0100146size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
147{
Yann Collet88fcd292015-11-25 14:42:45 +0100148 dctx->expected = ZSTD_frameHeaderSize_min;
149 dctx->stage = ZSTDds_getFrameHeaderSize;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100150 dctx->previousDstEnd = NULL;
151 dctx->base = NULL;
152 dctx->vBase = NULL;
153 dctx->dictEnd = NULL;
154 return 0;
155}
156
157ZSTD_DCtx* ZSTD_createDCtx(void)
158{
159 ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
160 if (dctx==NULL) return NULL;
161 ZSTD_resetDCtx(dctx);
162 return dctx;
163}
164
165size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
166{
167 free(dctx);
168 return 0;
169}
170
171
172/* *************************************************************
173* Decompression section
174***************************************************************/
Yann Collet88fcd292015-11-25 14:42:45 +0100175/** ZSTD_decodeFrameHeader_Part1
176* decode the 1st part of the Frame Header, which tells Frame Header size.
177* srcSize must be == ZSTD_frameHeaderSize_min
178* @return : the full size of the Frame Header */
179static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
180{
181 U32 magicNumber;
182 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
183 magicNumber = MEM_readLE32(src);
184 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
185 zc->headerSize = ZSTD_frameHeaderSize_min;
186 return zc->headerSize;
187}
188
Yann Collet88fcd292015-11-25 14:42:45 +0100189
190size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
191{
192 U32 magicNumber;
193 if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
194 magicNumber = MEM_readLE32(src);
195 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
196 memset(params, 0, sizeof(*params));
Yann Collet26415d32015-11-26 12:43:28 +0100197 params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
Yann Colletbf7aa3c2015-11-28 18:19:44 +0100198 if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */
Yann Collet88fcd292015-11-25 14:42:45 +0100199 return 0;
200}
201
Yann Collet26415d32015-11-26 12:43:28 +0100202/** ZSTD_decodeFrameHeader_Part2
203* decode the full Frame Header
204* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
205* @return : 0, or an error code, which can be tested using ZSTD_isError() */
206static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
207{
Yann Collet00fd7a22015-11-28 16:03:22 +0100208 size_t result;
Yann Collet26415d32015-11-26 12:43:28 +0100209 if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
Yann Collet00fd7a22015-11-28 16:03:22 +0100210 result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
211 if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bitsImplementation);
212 return result;
Yann Collet26415d32015-11-26 12:43:28 +0100213}
214
Yann Collet5be2dd22015-11-11 13:43:58 +0100215
216size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
217{
218 const BYTE* const in = (const BYTE* const)src;
219 BYTE headerFlags;
220 U32 cSize;
221
222 if (srcSize < 3) return ERROR(srcSize_wrong);
223
224 headerFlags = *in;
225 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
226
227 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
228 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
229
230 if (bpPtr->blockType == bt_end) return 0;
231 if (bpPtr->blockType == bt_rle) return 1;
232 return cSize;
233}
234
Yann Collet0f366c62015-11-12 16:19:30 +0100235static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100236{
237 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
238 memcpy(dst, src, srcSize);
239 return srcSize;
240}
241
242
243/** ZSTD_decompressLiterals
244 @return : nb of bytes read from src, or an error code*/
245static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
246 const void* src, size_t srcSize)
247{
248 const BYTE* ip = (const BYTE*)src;
249
250 const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
251 const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
252
253 if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
254 if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
255
256 if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
257
258 *maxDstSizePtr = litSize;
259 return litCSize + 5;
260}
261
262
263/** ZSTD_decodeLiteralsBlock
Yann Collet14983e72015-11-11 21:38:21 +0100264 @return : nb of bytes read from src (< srcSize ) */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100265size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100266 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
267{
Yann Collet5be2dd22015-11-11 13:43:58 +0100268 const BYTE* const istart = (const BYTE*) src;
269
270 /* any compressed block with literals segment must be at least this size */
271 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
272
273 switch(*istart & 3)
274 {
275 /* compressed */
276 case 0:
277 {
278 size_t litSize = BLOCKSIZE;
279 const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
280 dctx->litPtr = dctx->litBuffer;
281 dctx->litBufSize = BLOCKSIZE+8;
282 dctx->litSize = litSize;
283 return readSize; /* works if it's an error too */
284 }
285 case IS_RAW:
286 {
287 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
288 if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
289 {
290 if (litSize > srcSize-3) return ERROR(corruption_detected);
291 memcpy(dctx->litBuffer, istart, litSize);
292 dctx->litPtr = dctx->litBuffer;
293 dctx->litBufSize = BLOCKSIZE+8;
294 dctx->litSize = litSize;
295 return litSize+3;
296 }
297 /* direct reference into compressed stream */
298 dctx->litPtr = istart+3;
299 dctx->litBufSize = srcSize-3;
300 dctx->litSize = litSize;
301 return litSize+3; }
302 case IS_RLE:
303 {
304 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
305 if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
306 memset(dctx->litBuffer, istart[3], litSize);
307 dctx->litPtr = dctx->litBuffer;
308 dctx->litBufSize = BLOCKSIZE+8;
309 dctx->litSize = litSize;
310 return 4;
311 }
312 default:
313 return ERROR(corruption_detected); /* forbidden nominal case */
314 }
315}
316
317
318size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
319 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
320 const void* src, size_t srcSize)
321{
322 const BYTE* const istart = (const BYTE* const)src;
323 const BYTE* ip = istart;
324 const BYTE* const iend = istart + srcSize;
325 U32 LLtype, Offtype, MLtype;
326 U32 LLlog, Offlog, MLlog;
327 size_t dumpsLength;
328
329 /* check */
330 if (srcSize < 5) return ERROR(srcSize_wrong);
331
332 /* SeqHead */
333 *nbSeq = MEM_readLE16(ip); ip+=2;
334 LLtype = *ip >> 6;
335 Offtype = (*ip >> 4) & 3;
336 MLtype = (*ip >> 2) & 3;
337 if (*ip & 2)
338 {
339 dumpsLength = ip[2];
340 dumpsLength += ip[1] << 8;
341 ip += 3;
342 }
343 else
344 {
345 dumpsLength = ip[1];
346 dumpsLength += (ip[0] & 1) << 8;
347 ip += 2;
348 }
349 *dumpsPtr = ip;
350 ip += dumpsLength;
351 *dumpsLengthPtr = dumpsLength;
352
353 /* check */
354 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
355
356 /* sequences */
357 {
Yann Collet82368cf2015-11-16 19:10:56 +0100358 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */
Yann Collet5be2dd22015-11-11 13:43:58 +0100359 size_t headerSize;
360
361 /* Build DTables */
362 switch(LLtype)
363 {
364 U32 max;
365 case bt_rle :
366 LLlog = 0;
367 FSE_buildDTable_rle(DTableLL, *ip++); break;
368 case bt_raw :
369 LLlog = LLbits;
370 FSE_buildDTable_raw(DTableLL, LLbits); break;
371 default :
372 max = MaxLL;
373 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
374 if (FSE_isError(headerSize)) return ERROR(GENERIC);
375 if (LLlog > LLFSELog) return ERROR(corruption_detected);
376 ip += headerSize;
377 FSE_buildDTable(DTableLL, norm, max, LLlog);
378 }
379
380 switch(Offtype)
381 {
382 U32 max;
383 case bt_rle :
384 Offlog = 0;
385 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
386 FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
387 break;
388 case bt_raw :
389 Offlog = Offbits;
390 FSE_buildDTable_raw(DTableOffb, Offbits); break;
391 default :
392 max = MaxOff;
393 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
394 if (FSE_isError(headerSize)) return ERROR(GENERIC);
395 if (Offlog > OffFSELog) return ERROR(corruption_detected);
396 ip += headerSize;
397 FSE_buildDTable(DTableOffb, norm, max, Offlog);
398 }
399
400 switch(MLtype)
401 {
402 U32 max;
403 case bt_rle :
404 MLlog = 0;
405 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
406 FSE_buildDTable_rle(DTableML, *ip++); break;
407 case bt_raw :
408 MLlog = MLbits;
409 FSE_buildDTable_raw(DTableML, MLbits); break;
410 default :
411 max = MaxML;
412 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
413 if (FSE_isError(headerSize)) return ERROR(GENERIC);
414 if (MLlog > MLFSELog) return ERROR(corruption_detected);
415 ip += headerSize;
416 FSE_buildDTable(DTableML, norm, max, MLlog);
417 }
418 }
419
420 return ip-istart;
421}
422
423
424typedef struct {
425 size_t litLength;
426 size_t offset;
427 size_t matchLength;
428} seq_t;
429
430typedef struct {
431 BIT_DStream_t DStream;
432 FSE_DState_t stateLL;
433 FSE_DState_t stateOffb;
434 FSE_DState_t stateML;
435 size_t prevOffset;
436 const BYTE* dumps;
437 const BYTE* dumpsEnd;
438} seqState_t;
439
440
441static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
442{
443 size_t litLength;
444 size_t prevOffset;
445 size_t offset;
446 size_t matchLength;
447 const BYTE* dumps = seqState->dumps;
448 const BYTE* const de = seqState->dumpsEnd;
449
450 /* Literal length */
451 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
Yann Collete4fdad52015-11-25 21:09:17 +0100452 prevOffset = litLength ? seq->offset : seqState->prevOffset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100453 if (litLength == MaxLL)
454 {
455 U32 add = *dumps++;
456 if (add < 255) litLength += add;
457 else
458 {
459 litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
460 dumps += 3;
461 }
462 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
463 }
464
465 /* Offset */
466 {
467 static const U32 offsetPrefix[MaxOff+1] = {
468 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
469 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
470 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
471 U32 offsetCode, nbBits;
472 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* <= maxOff, by table construction */
473 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
474 nbBits = offsetCode - 1;
475 if (offsetCode==0) nbBits = 0; /* cmove */
476 offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
477 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
478 if (offsetCode==0) offset = prevOffset; /* cmove */
Yann Collet55aa7f92015-11-20 12:04:52 +0100479 if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */
Yann Collet5be2dd22015-11-11 13:43:58 +0100480 }
481
482 /* MatchLength */
483 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
484 if (matchLength == MaxML)
485 {
486 U32 add = *dumps++;
487 if (add < 255) matchLength += add;
488 else
489 {
490 matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
491 dumps += 3;
492 }
493 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
494 }
495 matchLength += MINMATCH;
496
497 /* save result */
498 seq->litLength = litLength;
499 seq->offset = offset;
500 seq->matchLength = matchLength;
501 seqState->dumps = dumps;
502}
503
504
Yann Collet5b78d2f2015-11-12 15:36:05 +0100505FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
Yann Colletb3a2af92015-11-19 17:13:19 +0100506 BYTE* const oend, seq_t sequence,
Yann Collet5be2dd22015-11-11 13:43:58 +0100507 const BYTE** litPtr, const BYTE* const litLimit_8,
Yann Collet417890c2015-12-04 17:16:37 +0100508 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
Yann Collet5be2dd22015-11-11 13:43:58 +0100509{
Yann Colletb3a2af92015-11-19 17:13:19 +0100510 static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
511 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100512 BYTE* const oLitEnd = op + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100513 const size_t sequenceLength = sequence.litLength + sequence.matchLength;
514 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100515 BYTE* const oend_8 = oend-8;
516 const BYTE* const litEnd = *litPtr + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100517 const BYTE* match = oLitEnd - sequence.offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100518
519 /* check */
520 if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
521 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
522 if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
523
524 /* copy Literals */
525 ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
526 op = oLitEnd;
527 *litPtr = litEnd; /* update for next sequence */
528
529 /* copy Match */
Yann Collet9f5ab1a2015-12-11 00:27:41 +0100530 if (sequence.offset > (size_t)(oLitEnd - base))
Yann Collet44287a32015-11-30 23:13:56 +0100531 {
532 /* offset beyond prefix */
Yann Collet9f5ab1a2015-12-11 00:27:41 +0100533 if (sequence.offset > (size_t)(oLitEnd - vBase))
534 return ERROR(corruption_detected);
Yann Collet44287a32015-11-30 23:13:56 +0100535 match = dictEnd - (base-match);
536 if (match + sequence.matchLength <= dictEnd)
537 {
Yann Collet4bfe4152015-12-06 13:18:37 +0100538 memmove(oLitEnd, match, sequence.matchLength);
Yann Collet44287a32015-11-30 23:13:56 +0100539 return sequenceLength;
540 }
541 /* span extDict & currentPrefixSegment */
542 {
543 size_t length1 = dictEnd - match;
Yann Collet4bfe4152015-12-06 13:18:37 +0100544 memmove(oLitEnd, match, length1);
Yann Collet44287a32015-11-30 23:13:56 +0100545 op = oLitEnd + length1;
546 sequence.matchLength -= length1;
547 match = base;
548 }
549 }
Yann Collet0f366c62015-11-12 16:19:30 +0100550
Yann Collet44287a32015-11-30 23:13:56 +0100551 /* match within prefix */
552 if (sequence.offset < 8)
553 {
554 /* close range match, overlap */
555 const int sub2 = dec64table[sequence.offset];
556 op[0] = match[0];
557 op[1] = match[1];
558 op[2] = match[2];
559 op[3] = match[3];
560 match += dec32table[sequence.offset];
561 ZSTD_copy4(op+4, match);
562 match -= sub2;
563 }
564 else
565 {
566 ZSTD_copy8(op, match);
567 }
568 op += 8; match += 8;
Yann Collet5be2dd22015-11-11 13:43:58 +0100569
Yann Collet44287a32015-11-30 23:13:56 +0100570 if (oMatchEnd > oend-12)
571 {
572 if (op < oend_8)
573 {
574 ZSTD_wildcopy(op, match, oend_8 - op);
575 match += oend_8 - op;
576 op = oend_8;
577 }
578 while (op < oMatchEnd) *op++ = *match++;
579 }
580 else
581 {
582 ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
583 }
584 return sequenceLength;
Yann Collet5be2dd22015-11-11 13:43:58 +0100585}
586
Yann Colletb3a2af92015-11-19 17:13:19 +0100587
Yann Collet5be2dd22015-11-11 13:43:58 +0100588static size_t ZSTD_decompressSequences(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100589 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100590 void* dst, size_t maxDstSize,
591 const void* seqStart, size_t seqSize)
592{
Yann Collet5be2dd22015-11-11 13:43:58 +0100593 const BYTE* ip = (const BYTE*)seqStart;
594 const BYTE* const iend = ip + seqSize;
595 BYTE* const ostart = (BYTE* const)dst;
596 BYTE* op = ostart;
597 BYTE* const oend = ostart + maxDstSize;
598 size_t errorCode, dumpsLength;
599 const BYTE* litPtr = dctx->litPtr;
600 const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
601 const BYTE* const litEnd = litPtr + dctx->litSize;
602 int nbSeq;
603 const BYTE* dumps;
604 U32* DTableLL = dctx->LLTable;
605 U32* DTableML = dctx->MLTable;
606 U32* DTableOffb = dctx->OffTable;
Yann Collet417890c2015-12-04 17:16:37 +0100607 const BYTE* const base = (const BYTE*) (dctx->base);
608 const BYTE* const vBase = (const BYTE*) (dctx->vBase);
609 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100610
611 /* Build Decoding Tables */
612 errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
613 DTableLL, DTableML, DTableOffb,
614 ip, iend-ip);
615 if (ZSTD_isError(errorCode)) return errorCode;
616 ip += errorCode;
617
618 /* Regen sequences */
619 {
620 seq_t sequence;
621 seqState_t seqState;
622
623 memset(&sequence, 0, sizeof(sequence));
624 sequence.offset = 4;
625 seqState.dumps = dumps;
626 seqState.dumpsEnd = dumps + dumpsLength;
627 seqState.prevOffset = 4;
628 errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
629 if (ERR_isError(errorCode)) return ERROR(corruption_detected);
630 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
631 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
632 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
633
Yann Collet44287a32015-11-30 23:13:56 +0100634 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; )
Yann Colletb3a2af92015-11-19 17:13:19 +0100635 {
636 size_t oneSeqSize;
637 nbSeq--;
638 ZSTD_decodeSequence(&sequence, &seqState);
639 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100640 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
641 op += oneSeqSize;
642 }
643
644 /* check if reached exact end */
Yann Collet44287a32015-11-30 23:13:56 +0100645 if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* DStream should be entirely and exactly consumed; otherwise data is corrupted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100646
647 /* last literal segment */
648 {
649 size_t lastLLSize = litEnd - litPtr;
650 if (litPtr > litEnd) return ERROR(corruption_detected);
651 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
652 if (op != litPtr) memcpy(op, litPtr, lastLLSize);
653 op += lastLLSize;
654 }
655 }
656
657 return op-ostart;
658}
659
660
Yann Colletb0125102016-01-09 02:00:10 +0100661static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
662{
663 if (dst != dctx->previousDstEnd) /* not contiguous */
664 {
665 dctx->dictEnd = dctx->previousDstEnd;
666 dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
667 dctx->base = dst;
668 dctx->previousDstEnd = dst;
669 }
670}
671
672
673static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100674 void* dst, size_t maxDstSize,
675 const void* src, size_t srcSize)
676{
677 /* blockType == blockCompressed */
678 const BYTE* ip = (const BYTE*)src;
679
680 /* Decode literals sub-block */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100681 size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100682 if (ZSTD_isError(litCSize)) return litCSize;
683 ip += litCSize;
684 srcSize -= litCSize;
685
Yann Collet5b78d2f2015-11-12 15:36:05 +0100686 return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100687}
688
689
Yann Colletb0125102016-01-09 02:00:10 +0100690size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
691 void* dst, size_t maxDstSize,
692 const void* src, size_t srcSize)
693{
694 ZSTD_checkContinuity(dctx, dst);
695 return ZSTD_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
696}
697
698
Yann Collet31683c02015-12-18 01:26:48 +0100699size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
700 void* dst, size_t maxDstSize,
701 const void* src, size_t srcSize,
702 const void* dict, size_t dictSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100703{
704 const BYTE* ip = (const BYTE*)src;
705 const BYTE* iend = ip + srcSize;
706 BYTE* const ostart = (BYTE* const)dst;
707 BYTE* op = ostart;
708 BYTE* const oend = ostart + maxDstSize;
709 size_t remainingSize = srcSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100710 blockProperties_t blockProperties;
711
Yann Colletb2549842015-11-18 11:29:32 +0100712 /* init */
Yann Collet31683c02015-12-18 01:26:48 +0100713 ZSTD_resetDCtx(ctx);
714 if (dict)
715 {
716 ZSTD_decompress_insertDictionary(ctx, dict, dictSize);
717 ctx->dictEnd = ctx->previousDstEnd;
718 ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
719 ctx->base = dst;
720 }
721 else
722 {
723 ctx->vBase = ctx->base = ctx->dictEnd = dst;
724 }
Yann Collet5b78d2f2015-11-12 15:36:05 +0100725
Yann Collet5be2dd22015-11-11 13:43:58 +0100726 /* Frame Header */
Yann Collet88fcd292015-11-25 14:42:45 +0100727 {
728 size_t frameHeaderSize;
729 if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Collet5be2dd22015-11-11 13:43:58 +0100730#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
Yann Collet88fcd292015-11-25 14:42:45 +0100731 {
732 const U32 magicNumber = MEM_readLE32(src);
733 if (ZSTD_isLegacy(magicNumber))
734 return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
735 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100736#endif
Yann Collet88fcd292015-11-25 14:42:45 +0100737 frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
738 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
739 if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
740 ip += frameHeaderSize; remainingSize -= frameHeaderSize;
741 frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
742 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
743 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100744
745 /* Loop on each block */
746 while (1)
747 {
748 size_t decodedSize=0;
749 size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
750 if (ZSTD_isError(cBlockSize)) return cBlockSize;
751
752 ip += ZSTD_blockHeaderSize;
753 remainingSize -= ZSTD_blockHeaderSize;
754 if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
755
756 switch(blockProperties.blockType)
757 {
758 case bt_compressed:
Yann Colletb0125102016-01-09 02:00:10 +0100759 decodedSize = ZSTD_decompressBlock_internal(ctx, op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100760 break;
761 case bt_raw :
Yann Collet0f366c62015-11-12 16:19:30 +0100762 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100763 break;
764 case bt_rle :
765 return ERROR(GENERIC); /* not yet supported */
766 break;
767 case bt_end :
768 /* end of frame */
769 if (remainingSize) return ERROR(srcSize_wrong);
770 break;
771 default:
772 return ERROR(GENERIC); /* impossible */
773 }
774 if (cBlockSize == 0) break; /* bt_end */
775
776 if (ZSTD_isError(decodedSize)) return decodedSize;
777 op += decodedSize;
778 ip += cBlockSize;
779 remainingSize -= cBlockSize;
780 }
781
782 return op-ostart;
783}
784
Yann Collet31683c02015-12-18 01:26:48 +0100785
786size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
787{
788 return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
789}
790
Yann Collet5be2dd22015-11-11 13:43:58 +0100791size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
792{
Yann Collet31683c02015-12-18 01:26:48 +0100793 ZSTD_DCtx dctx;
794 return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100795}
796
797
798/* ******************************
799* Streaming Decompression API
800********************************/
Yann Collet5be2dd22015-11-11 13:43:58 +0100801size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
802{
803 return dctx->expected;
804}
805
806size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
807{
808 /* Sanity check */
809 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
Yann Colletb0125102016-01-09 02:00:10 +0100810 ZSTD_checkContinuity(ctx, dst);
Yann Collet5be2dd22015-11-11 13:43:58 +0100811
Yann Collet88fcd292015-11-25 14:42:45 +0100812 /* Decompress : frame header; part 1 */
813 switch (ctx->stage)
Yann Collet5be2dd22015-11-11 13:43:58 +0100814 {
Yann Collet88fcd292015-11-25 14:42:45 +0100815 case ZSTDds_getFrameHeaderSize :
Yann Collet5be2dd22015-11-11 13:43:58 +0100816 {
Yann Collet88fcd292015-11-25 14:42:45 +0100817 /* get frame header size */
818 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */
819 ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
Yann Collete4fdad52015-11-25 21:09:17 +0100820 if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
Yann Collet88fcd292015-11-25 14:42:45 +0100821 memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
822 if (ctx->headerSize > ZSTD_frameHeaderSize_min)
823 {
824 ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min;
825 ctx->stage = ZSTDds_decodeFrameHeader;
826 return 0;
827 }
828 ctx->expected = 0; /* not necessary to copy more */
Yann Collet5be2dd22015-11-11 13:43:58 +0100829 }
Yann Collet88fcd292015-11-25 14:42:45 +0100830 case ZSTDds_decodeFrameHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100831 {
Yann Collet88fcd292015-11-25 14:42:45 +0100832 /* get frame header */
833 size_t result;
834 memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected);
835 result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
836 if (ZSTD_isError(result)) return result;
837 ctx->expected = ZSTD_blockHeaderSize;
838 ctx->stage = ZSTDds_decodeBlockHeader;
839 return 0;
Yann Collet5be2dd22015-11-11 13:43:58 +0100840 }
Yann Collet88fcd292015-11-25 14:42:45 +0100841 case ZSTDds_decodeBlockHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100842 {
Yann Collet88fcd292015-11-25 14:42:45 +0100843 /* Decode block header */
844 blockProperties_t bp;
845 size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
846 if (ZSTD_isError(blockSize)) return blockSize;
847 if (bp.blockType == bt_end)
848 {
849 ctx->expected = 0;
850 ctx->stage = ZSTDds_getFrameHeaderSize;
851 }
852 else
853 {
854 ctx->expected = blockSize;
855 ctx->bType = bp.blockType;
856 ctx->stage = ZSTDds_decompressBlock;
857 }
Yann Collet88fcd292015-11-25 14:42:45 +0100858 return 0;
859 }
Yann Collet417890c2015-12-04 17:16:37 +0100860 case ZSTDds_decompressBlock:
Yann Collet88fcd292015-11-25 14:42:45 +0100861 {
862 /* Decompress : block content */
863 size_t rSize;
864 switch(ctx->bType)
865 {
866 case bt_compressed:
Yann Colletb0125102016-01-09 02:00:10 +0100867 rSize = ZSTD_decompressBlock_internal(ctx, dst, maxDstSize, src, srcSize);
Yann Collet88fcd292015-11-25 14:42:45 +0100868 break;
869 case bt_raw :
870 rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
871 break;
872 case bt_rle :
873 return ERROR(GENERIC); /* not yet handled */
874 break;
875 case bt_end : /* should never happen (filtered at phase 1) */
876 rSize = 0;
877 break;
878 default:
879 return ERROR(GENERIC);
880 }
881 ctx->stage = ZSTDds_decodeBlockHeader;
882 ctx->expected = ZSTD_blockHeaderSize;
883 ctx->previousDstEnd = (char*)dst + rSize;
884 return rSize;
885 }
886 default:
887 return ERROR(GENERIC); /* impossible */
888 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100889}
890
891
Yann Colletb0125102016-01-09 02:00:10 +0100892void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* dict, size_t dictSize)
Yann Collet417890c2015-12-04 17:16:37 +0100893{
894 ctx->dictEnd = ctx->previousDstEnd;
Yann Colletb0125102016-01-09 02:00:10 +0100895 ctx->vBase = (const char*)dict - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
896 ctx->base = dict;
897 ctx->previousDstEnd = (const char*)dict + dictSize;
Yann Collet417890c2015-12-04 17:16:37 +0100898}