blob: 4a026df33d634495d6986d1a3033adca3f674ec3 [file] [log] [blame]
Yann Collet5be2dd22015-11-11 13:43:58 +01001/*
2 zstd - standard compression library
3 Copyright (C) 2014-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 You can contact the author at :
29 - zstd source repository : https://github.com/Cyan4973/zstd
30 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31*/
32
33/* ***************************************************************
34* Tuning parameters
35*****************************************************************/
36/*!
Yann Collet5be2dd22015-11-11 13:43:58 +010037 * HEAPMODE :
Yann Collet3a3b72f2016-01-11 12:56:11 +010038 * Select how default decompression function ZSTD_decompress() will allocate memory,
39 * in memory stack (0), or in memory heap (1, requires malloc())
Yann Collet5be2dd22015-11-11 13:43:58 +010040 */
41#ifndef ZSTD_HEAPMODE
42# define ZSTD_HEAPMODE 1
Yann Collet3a3b72f2016-01-11 12:56:11 +010043#endif
Yann Collet5be2dd22015-11-11 13:43:58 +010044
45/*!
46* LEGACY_SUPPORT :
Yann Colletfba6aed2016-01-18 12:03:27 +010047* ZSTD_decompress() can decode older formats (v0.1+) if set to 1
Yann Collet5be2dd22015-11-11 13:43:58 +010048*/
49#ifndef ZSTD_LEGACY_SUPPORT
Yann Colletfba6aed2016-01-18 12:03:27 +010050# define ZSTD_LEGACY_SUPPORT 0
Yann Collet5be2dd22015-11-11 13:43:58 +010051#endif
52
53
54/* *******************************************************
55* Includes
56*********************************************************/
57#include <stdlib.h> /* calloc */
58#include <string.h> /* memcpy, memmove */
59#include <stdio.h> /* debug : printf */
60#include "mem.h" /* low level memory routines */
61#include "zstd_static.h"
62#include "zstd_internal.h"
63#include "fse_static.h"
64#include "huff0.h"
65
66#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
67# include "zstd_legacy.h"
68#endif
69
Yann Collet5be2dd22015-11-11 13:43:58 +010070/* *******************************************************
71* Compiler specifics
72*********************************************************/
Yann Collet5be2dd22015-11-11 13:43:58 +010073#ifdef _MSC_VER /* Visual Studio */
74# define FORCE_INLINE static __forceinline
75# include <intrin.h> /* For Visual 2005 */
76# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
77# pragma warning(disable : 4324) /* disable: C4324: padded structure */
78#else
79# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
80# ifdef __GNUC__
81# define FORCE_INLINE static inline __attribute__((always_inline))
82# else
83# define FORCE_INLINE static inline
84# endif
85#endif
86
87
Yann Collet14983e72015-11-11 21:38:21 +010088/* *************************************
89* Local types
90***************************************/
91typedef struct
92{
93 blockType_t blockType;
94 U32 origSize;
95} blockProperties_t;
Yann Collet5be2dd22015-11-11 13:43:58 +010096
97
98/* *******************************************************
99* Memory operations
100**********************************************************/
101static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
102
103
Yann Collet5be2dd22015-11-11 13:43:58 +0100104/* *************************************
105* Error Management
106***************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100107unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
108
Yann Collet5be2dd22015-11-11 13:43:58 +0100109/*! ZSTD_isError
110* tells if a return value is an error code */
111unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
112
113/*! ZSTD_getErrorName
114* provides error code string (useful for debugging) */
115const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
116
117
Yann Collet5be2dd22015-11-11 13:43:58 +0100118/* *************************************************************
Yann Collet5b78d2f2015-11-12 15:36:05 +0100119* Context management
Yann Collet5be2dd22015-11-11 13:43:58 +0100120***************************************************************/
Yann Collete4fdad52015-11-25 21:09:17 +0100121typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
Yann Collet88fcd292015-11-25 14:42:45 +0100122 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
123
Yann Collet5be2dd22015-11-11 13:43:58 +0100124struct ZSTD_DCtx_s
125{
126 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
127 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
128 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
Yann Collet417890c2015-12-04 17:16:37 +0100129 const void* previousDstEnd;
130 const void* base;
131 const void* vBase;
132 const void* dictEnd;
Yann Collet5be2dd22015-11-11 13:43:58 +0100133 size_t expected;
Yann Collet88fcd292015-11-25 14:42:45 +0100134 size_t headerSize;
135 ZSTD_parameters params;
Yann Collet5be2dd22015-11-11 13:43:58 +0100136 blockType_t bType;
Yann Collet88fcd292015-11-25 14:42:45 +0100137 ZSTD_dStage stage;
Yann Collet5be2dd22015-11-11 13:43:58 +0100138 const BYTE* litPtr;
139 size_t litBufSize;
140 size_t litSize;
141 BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
Yann Collet88fcd292015-11-25 14:42:45 +0100142 BYTE headerBuffer[ZSTD_frameHeaderSize_max];
Yann Collet417890c2015-12-04 17:16:37 +0100143}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
Yann Collet5be2dd22015-11-11 13:43:58 +0100144
Yann Collet5b78d2f2015-11-12 15:36:05 +0100145size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
146{
Yann Collet88fcd292015-11-25 14:42:45 +0100147 dctx->expected = ZSTD_frameHeaderSize_min;
148 dctx->stage = ZSTDds_getFrameHeaderSize;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100149 dctx->previousDstEnd = NULL;
150 dctx->base = NULL;
151 dctx->vBase = NULL;
152 dctx->dictEnd = NULL;
153 return 0;
154}
155
156ZSTD_DCtx* ZSTD_createDCtx(void)
157{
158 ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
159 if (dctx==NULL) return NULL;
160 ZSTD_resetDCtx(dctx);
161 return dctx;
162}
163
164size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
165{
166 free(dctx);
167 return 0;
168}
169
170
171/* *************************************************************
172* Decompression section
173***************************************************************/
Yann Collet88fcd292015-11-25 14:42:45 +0100174/** ZSTD_decodeFrameHeader_Part1
175* decode the 1st part of the Frame Header, which tells Frame Header size.
176* srcSize must be == ZSTD_frameHeaderSize_min
177* @return : the full size of the Frame Header */
178static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
179{
180 U32 magicNumber;
181 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
182 magicNumber = MEM_readLE32(src);
183 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
184 zc->headerSize = ZSTD_frameHeaderSize_min;
185 return zc->headerSize;
186}
187
Yann Collet88fcd292015-11-25 14:42:45 +0100188
189size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
190{
191 U32 magicNumber;
192 if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
193 magicNumber = MEM_readLE32(src);
194 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
195 memset(params, 0, sizeof(*params));
Yann Collet26415d32015-11-26 12:43:28 +0100196 params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
Yann Colletbf7aa3c2015-11-28 18:19:44 +0100197 if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */
Yann Collet88fcd292015-11-25 14:42:45 +0100198 return 0;
199}
200
Yann Collet26415d32015-11-26 12:43:28 +0100201/** ZSTD_decodeFrameHeader_Part2
202* decode the full Frame Header
203* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
204* @return : 0, or an error code, which can be tested using ZSTD_isError() */
205static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
206{
Yann Collet00fd7a22015-11-28 16:03:22 +0100207 size_t result;
Yann Collet26415d32015-11-26 12:43:28 +0100208 if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
Yann Collet00fd7a22015-11-28 16:03:22 +0100209 result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
210 if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bitsImplementation);
211 return result;
Yann Collet26415d32015-11-26 12:43:28 +0100212}
213
Yann Collet5be2dd22015-11-11 13:43:58 +0100214
215size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
216{
217 const BYTE* const in = (const BYTE* const)src;
218 BYTE headerFlags;
219 U32 cSize;
220
221 if (srcSize < 3) return ERROR(srcSize_wrong);
222
223 headerFlags = *in;
224 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
225
226 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
227 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
228
229 if (bpPtr->blockType == bt_end) return 0;
230 if (bpPtr->blockType == bt_rle) return 1;
231 return cSize;
232}
233
Yann Collet0f366c62015-11-12 16:19:30 +0100234static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100235{
236 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
237 memcpy(dst, src, srcSize);
238 return srcSize;
239}
240
241
242/** ZSTD_decompressLiterals
243 @return : nb of bytes read from src, or an error code*/
244static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
245 const void* src, size_t srcSize)
246{
247 const BYTE* ip = (const BYTE*)src;
248
249 const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
250 const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
251
252 if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
253 if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
254
255 if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
256
257 *maxDstSizePtr = litSize;
258 return litCSize + 5;
259}
260
261
262/** ZSTD_decodeLiteralsBlock
Yann Collet14983e72015-11-11 21:38:21 +0100263 @return : nb of bytes read from src (< srcSize ) */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100264size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100265 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
266{
Yann Collet5be2dd22015-11-11 13:43:58 +0100267 const BYTE* const istart = (const BYTE*) src;
268
269 /* any compressed block with literals segment must be at least this size */
270 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
271
272 switch(*istart & 3)
273 {
274 /* compressed */
275 case 0:
276 {
277 size_t litSize = BLOCKSIZE;
278 const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
279 dctx->litPtr = dctx->litBuffer;
280 dctx->litBufSize = BLOCKSIZE+8;
281 dctx->litSize = litSize;
282 return readSize; /* works if it's an error too */
283 }
284 case IS_RAW:
285 {
286 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
287 if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
288 {
289 if (litSize > srcSize-3) return ERROR(corruption_detected);
290 memcpy(dctx->litBuffer, istart, litSize);
291 dctx->litPtr = dctx->litBuffer;
292 dctx->litBufSize = BLOCKSIZE+8;
293 dctx->litSize = litSize;
294 return litSize+3;
295 }
296 /* direct reference into compressed stream */
297 dctx->litPtr = istart+3;
298 dctx->litBufSize = srcSize-3;
299 dctx->litSize = litSize;
300 return litSize+3; }
301 case IS_RLE:
302 {
303 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
304 if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
305 memset(dctx->litBuffer, istart[3], litSize);
306 dctx->litPtr = dctx->litBuffer;
307 dctx->litBufSize = BLOCKSIZE+8;
308 dctx->litSize = litSize;
309 return 4;
310 }
311 default:
312 return ERROR(corruption_detected); /* forbidden nominal case */
313 }
314}
315
316
317size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
318 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
319 const void* src, size_t srcSize)
320{
321 const BYTE* const istart = (const BYTE* const)src;
322 const BYTE* ip = istart;
323 const BYTE* const iend = istart + srcSize;
324 U32 LLtype, Offtype, MLtype;
325 U32 LLlog, Offlog, MLlog;
326 size_t dumpsLength;
327
328 /* check */
329 if (srcSize < 5) return ERROR(srcSize_wrong);
330
331 /* SeqHead */
332 *nbSeq = MEM_readLE16(ip); ip+=2;
333 LLtype = *ip >> 6;
334 Offtype = (*ip >> 4) & 3;
335 MLtype = (*ip >> 2) & 3;
336 if (*ip & 2)
337 {
338 dumpsLength = ip[2];
339 dumpsLength += ip[1] << 8;
340 ip += 3;
341 }
342 else
343 {
344 dumpsLength = ip[1];
345 dumpsLength += (ip[0] & 1) << 8;
346 ip += 2;
347 }
348 *dumpsPtr = ip;
349 ip += dumpsLength;
350 *dumpsLengthPtr = dumpsLength;
351
352 /* check */
353 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
354
355 /* sequences */
356 {
Yann Collet82368cf2015-11-16 19:10:56 +0100357 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */
Yann Collet5be2dd22015-11-11 13:43:58 +0100358 size_t headerSize;
359
360 /* Build DTables */
361 switch(LLtype)
362 {
363 U32 max;
364 case bt_rle :
365 LLlog = 0;
366 FSE_buildDTable_rle(DTableLL, *ip++); break;
367 case bt_raw :
368 LLlog = LLbits;
369 FSE_buildDTable_raw(DTableLL, LLbits); break;
370 default :
371 max = MaxLL;
372 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
373 if (FSE_isError(headerSize)) return ERROR(GENERIC);
374 if (LLlog > LLFSELog) return ERROR(corruption_detected);
375 ip += headerSize;
376 FSE_buildDTable(DTableLL, norm, max, LLlog);
377 }
378
379 switch(Offtype)
380 {
381 U32 max;
382 case bt_rle :
383 Offlog = 0;
384 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
385 FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
386 break;
387 case bt_raw :
388 Offlog = Offbits;
389 FSE_buildDTable_raw(DTableOffb, Offbits); break;
390 default :
391 max = MaxOff;
392 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
393 if (FSE_isError(headerSize)) return ERROR(GENERIC);
394 if (Offlog > OffFSELog) return ERROR(corruption_detected);
395 ip += headerSize;
396 FSE_buildDTable(DTableOffb, norm, max, Offlog);
397 }
398
399 switch(MLtype)
400 {
401 U32 max;
402 case bt_rle :
403 MLlog = 0;
404 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
405 FSE_buildDTable_rle(DTableML, *ip++); break;
406 case bt_raw :
407 MLlog = MLbits;
408 FSE_buildDTable_raw(DTableML, MLbits); break;
409 default :
410 max = MaxML;
411 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
412 if (FSE_isError(headerSize)) return ERROR(GENERIC);
413 if (MLlog > MLFSELog) return ERROR(corruption_detected);
414 ip += headerSize;
415 FSE_buildDTable(DTableML, norm, max, MLlog);
416 }
417 }
418
419 return ip-istart;
420}
421
422
423typedef struct {
424 size_t litLength;
425 size_t offset;
426 size_t matchLength;
427} seq_t;
428
429typedef struct {
430 BIT_DStream_t DStream;
431 FSE_DState_t stateLL;
432 FSE_DState_t stateOffb;
433 FSE_DState_t stateML;
434 size_t prevOffset;
435 const BYTE* dumps;
436 const BYTE* dumpsEnd;
437} seqState_t;
438
439
440static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
441{
442 size_t litLength;
443 size_t prevOffset;
444 size_t offset;
445 size_t matchLength;
446 const BYTE* dumps = seqState->dumps;
447 const BYTE* const de = seqState->dumpsEnd;
448
449 /* Literal length */
450 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
Yann Collete4fdad52015-11-25 21:09:17 +0100451 prevOffset = litLength ? seq->offset : seqState->prevOffset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100452 if (litLength == MaxLL)
453 {
454 U32 add = *dumps++;
455 if (add < 255) litLength += add;
456 else
457 {
458 litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
459 dumps += 3;
460 }
461 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
462 }
463
464 /* Offset */
465 {
466 static const U32 offsetPrefix[MaxOff+1] = {
467 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
468 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
469 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
470 U32 offsetCode, nbBits;
471 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* <= maxOff, by table construction */
472 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
473 nbBits = offsetCode - 1;
474 if (offsetCode==0) nbBits = 0; /* cmove */
475 offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
476 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
477 if (offsetCode==0) offset = prevOffset; /* cmove */
Yann Collet55aa7f92015-11-20 12:04:52 +0100478 if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */
Yann Collet5be2dd22015-11-11 13:43:58 +0100479 }
480
481 /* MatchLength */
482 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
483 if (matchLength == MaxML)
484 {
485 U32 add = *dumps++;
486 if (add < 255) matchLength += add;
487 else
488 {
489 matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
490 dumps += 3;
491 }
492 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
493 }
494 matchLength += MINMATCH;
495
496 /* save result */
497 seq->litLength = litLength;
498 seq->offset = offset;
499 seq->matchLength = matchLength;
500 seqState->dumps = dumps;
501}
502
503
Yann Collet5b78d2f2015-11-12 15:36:05 +0100504FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
Yann Colletb3a2af92015-11-19 17:13:19 +0100505 BYTE* const oend, seq_t sequence,
Yann Collet5be2dd22015-11-11 13:43:58 +0100506 const BYTE** litPtr, const BYTE* const litLimit_8,
Yann Collet417890c2015-12-04 17:16:37 +0100507 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
Yann Collet5be2dd22015-11-11 13:43:58 +0100508{
Yann Colletb3a2af92015-11-19 17:13:19 +0100509 static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
510 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100511 BYTE* const oLitEnd = op + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100512 const size_t sequenceLength = sequence.litLength + sequence.matchLength;
513 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100514 BYTE* const oend_8 = oend-8;
515 const BYTE* const litEnd = *litPtr + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100516 const BYTE* match = oLitEnd - sequence.offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100517
518 /* check */
519 if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
520 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
521 if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
522
523 /* copy Literals */
524 ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
525 op = oLitEnd;
526 *litPtr = litEnd; /* update for next sequence */
527
528 /* copy Match */
Yann Collet9f5ab1a2015-12-11 00:27:41 +0100529 if (sequence.offset > (size_t)(oLitEnd - base))
Yann Collet44287a32015-11-30 23:13:56 +0100530 {
531 /* offset beyond prefix */
Yann Collet9f5ab1a2015-12-11 00:27:41 +0100532 if (sequence.offset > (size_t)(oLitEnd - vBase))
533 return ERROR(corruption_detected);
Yann Collet44287a32015-11-30 23:13:56 +0100534 match = dictEnd - (base-match);
535 if (match + sequence.matchLength <= dictEnd)
536 {
Yann Collet4bfe4152015-12-06 13:18:37 +0100537 memmove(oLitEnd, match, sequence.matchLength);
Yann Collet44287a32015-11-30 23:13:56 +0100538 return sequenceLength;
539 }
540 /* span extDict & currentPrefixSegment */
541 {
542 size_t length1 = dictEnd - match;
Yann Collet4bfe4152015-12-06 13:18:37 +0100543 memmove(oLitEnd, match, length1);
Yann Collet44287a32015-11-30 23:13:56 +0100544 op = oLitEnd + length1;
545 sequence.matchLength -= length1;
546 match = base;
547 }
548 }
Yann Collet0f366c62015-11-12 16:19:30 +0100549
Yann Collet44287a32015-11-30 23:13:56 +0100550 /* match within prefix */
551 if (sequence.offset < 8)
552 {
553 /* close range match, overlap */
554 const int sub2 = dec64table[sequence.offset];
555 op[0] = match[0];
556 op[1] = match[1];
557 op[2] = match[2];
558 op[3] = match[3];
559 match += dec32table[sequence.offset];
560 ZSTD_copy4(op+4, match);
561 match -= sub2;
562 }
563 else
564 {
565 ZSTD_copy8(op, match);
566 }
567 op += 8; match += 8;
Yann Collet5be2dd22015-11-11 13:43:58 +0100568
Yann Collet44287a32015-11-30 23:13:56 +0100569 if (oMatchEnd > oend-12)
570 {
571 if (op < oend_8)
572 {
573 ZSTD_wildcopy(op, match, oend_8 - op);
574 match += oend_8 - op;
575 op = oend_8;
576 }
577 while (op < oMatchEnd) *op++ = *match++;
578 }
579 else
580 {
581 ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
582 }
583 return sequenceLength;
Yann Collet5be2dd22015-11-11 13:43:58 +0100584}
585
Yann Colletb3a2af92015-11-19 17:13:19 +0100586
Yann Collet5be2dd22015-11-11 13:43:58 +0100587static size_t ZSTD_decompressSequences(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100588 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100589 void* dst, size_t maxDstSize,
590 const void* seqStart, size_t seqSize)
591{
Yann Collet5be2dd22015-11-11 13:43:58 +0100592 const BYTE* ip = (const BYTE*)seqStart;
593 const BYTE* const iend = ip + seqSize;
594 BYTE* const ostart = (BYTE* const)dst;
595 BYTE* op = ostart;
596 BYTE* const oend = ostart + maxDstSize;
597 size_t errorCode, dumpsLength;
598 const BYTE* litPtr = dctx->litPtr;
599 const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
600 const BYTE* const litEnd = litPtr + dctx->litSize;
601 int nbSeq;
602 const BYTE* dumps;
603 U32* DTableLL = dctx->LLTable;
604 U32* DTableML = dctx->MLTable;
605 U32* DTableOffb = dctx->OffTable;
Yann Collet417890c2015-12-04 17:16:37 +0100606 const BYTE* const base = (const BYTE*) (dctx->base);
607 const BYTE* const vBase = (const BYTE*) (dctx->vBase);
608 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100609
610 /* Build Decoding Tables */
611 errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
612 DTableLL, DTableML, DTableOffb,
613 ip, iend-ip);
614 if (ZSTD_isError(errorCode)) return errorCode;
615 ip += errorCode;
616
617 /* Regen sequences */
618 {
619 seq_t sequence;
620 seqState_t seqState;
621
622 memset(&sequence, 0, sizeof(sequence));
623 sequence.offset = 4;
624 seqState.dumps = dumps;
625 seqState.dumpsEnd = dumps + dumpsLength;
626 seqState.prevOffset = 4;
627 errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
628 if (ERR_isError(errorCode)) return ERROR(corruption_detected);
629 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
630 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
631 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
632
Yann Collet44287a32015-11-30 23:13:56 +0100633 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; )
Yann Colletb3a2af92015-11-19 17:13:19 +0100634 {
635 size_t oneSeqSize;
636 nbSeq--;
637 ZSTD_decodeSequence(&sequence, &seqState);
638 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100639 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
640 op += oneSeqSize;
641 }
642
643 /* check if reached exact end */
Yann Collet44287a32015-11-30 23:13:56 +0100644 if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* DStream should be entirely and exactly consumed; otherwise data is corrupted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100645
646 /* last literal segment */
647 {
648 size_t lastLLSize = litEnd - litPtr;
649 if (litPtr > litEnd) return ERROR(corruption_detected);
650 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
651 if (op != litPtr) memcpy(op, litPtr, lastLLSize);
652 op += lastLLSize;
653 }
654 }
655
656 return op-ostart;
657}
658
659
Yann Colletb0125102016-01-09 02:00:10 +0100660static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
661{
662 if (dst != dctx->previousDstEnd) /* not contiguous */
663 {
664 dctx->dictEnd = dctx->previousDstEnd;
665 dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
666 dctx->base = dst;
667 dctx->previousDstEnd = dst;
668 }
669}
670
671
672static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100673 void* dst, size_t maxDstSize,
674 const void* src, size_t srcSize)
675{
676 /* blockType == blockCompressed */
677 const BYTE* ip = (const BYTE*)src;
678
679 /* Decode literals sub-block */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100680 size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100681 if (ZSTD_isError(litCSize)) return litCSize;
682 ip += litCSize;
683 srcSize -= litCSize;
684
Yann Collet5b78d2f2015-11-12 15:36:05 +0100685 return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100686}
687
688
Yann Colletb0125102016-01-09 02:00:10 +0100689size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
690 void* dst, size_t maxDstSize,
691 const void* src, size_t srcSize)
692{
693 ZSTD_checkContinuity(dctx, dst);
694 return ZSTD_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
695}
696
697
Yann Collet31683c02015-12-18 01:26:48 +0100698size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
699 void* dst, size_t maxDstSize,
700 const void* src, size_t srcSize,
701 const void* dict, size_t dictSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100702{
703 const BYTE* ip = (const BYTE*)src;
704 const BYTE* iend = ip + srcSize;
705 BYTE* const ostart = (BYTE* const)dst;
706 BYTE* op = ostart;
707 BYTE* const oend = ostart + maxDstSize;
708 size_t remainingSize = srcSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100709 blockProperties_t blockProperties;
710
Yann Colletb2549842015-11-18 11:29:32 +0100711 /* init */
Yann Collet31683c02015-12-18 01:26:48 +0100712 ZSTD_resetDCtx(ctx);
713 if (dict)
714 {
715 ZSTD_decompress_insertDictionary(ctx, dict, dictSize);
716 ctx->dictEnd = ctx->previousDstEnd;
717 ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
718 ctx->base = dst;
719 }
720 else
721 {
722 ctx->vBase = ctx->base = ctx->dictEnd = dst;
723 }
Yann Collet5b78d2f2015-11-12 15:36:05 +0100724
Yann Collet5be2dd22015-11-11 13:43:58 +0100725 /* Frame Header */
Yann Collet88fcd292015-11-25 14:42:45 +0100726 {
727 size_t frameHeaderSize;
728 if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Collet5be2dd22015-11-11 13:43:58 +0100729#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
Yann Collet88fcd292015-11-25 14:42:45 +0100730 {
731 const U32 magicNumber = MEM_readLE32(src);
732 if (ZSTD_isLegacy(magicNumber))
733 return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
734 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100735#endif
Yann Collet88fcd292015-11-25 14:42:45 +0100736 frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
737 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
738 if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
739 ip += frameHeaderSize; remainingSize -= frameHeaderSize;
740 frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
741 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
742 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100743
744 /* Loop on each block */
745 while (1)
746 {
747 size_t decodedSize=0;
748 size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
749 if (ZSTD_isError(cBlockSize)) return cBlockSize;
750
751 ip += ZSTD_blockHeaderSize;
752 remainingSize -= ZSTD_blockHeaderSize;
753 if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
754
755 switch(blockProperties.blockType)
756 {
757 case bt_compressed:
Yann Colletb0125102016-01-09 02:00:10 +0100758 decodedSize = ZSTD_decompressBlock_internal(ctx, op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100759 break;
760 case bt_raw :
Yann Collet0f366c62015-11-12 16:19:30 +0100761 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100762 break;
763 case bt_rle :
764 return ERROR(GENERIC); /* not yet supported */
765 break;
766 case bt_end :
767 /* end of frame */
768 if (remainingSize) return ERROR(srcSize_wrong);
769 break;
770 default:
771 return ERROR(GENERIC); /* impossible */
772 }
773 if (cBlockSize == 0) break; /* bt_end */
774
775 if (ZSTD_isError(decodedSize)) return decodedSize;
776 op += decodedSize;
777 ip += cBlockSize;
778 remainingSize -= cBlockSize;
779 }
780
781 return op-ostart;
782}
783
Yann Collet31683c02015-12-18 01:26:48 +0100784
785size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
786{
787 return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
788}
789
Yann Collet5be2dd22015-11-11 13:43:58 +0100790size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
791{
Yann Collet3a3b72f2016-01-11 12:56:11 +0100792#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
793 size_t regenSize;
794 ZSTD_DCtx* dctx = ZSTD_createDCtx();
795 if (dctx==NULL) return ERROR(memory_allocation);
796 regenSize = ZSTD_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
797 ZSTD_freeDCtx(dctx);
798 return regenSize;
799#else
Yann Collet31683c02015-12-18 01:26:48 +0100800 ZSTD_DCtx dctx;
801 return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
Yann Colleta768a302016-01-21 16:04:35 +0100802#endif
Yann Collet5be2dd22015-11-11 13:43:58 +0100803}
804
805
806/* ******************************
807* Streaming Decompression API
808********************************/
Yann Collet5be2dd22015-11-11 13:43:58 +0100809size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
810{
811 return dctx->expected;
812}
813
814size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
815{
816 /* Sanity check */
817 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
Yann Colletb0125102016-01-09 02:00:10 +0100818 ZSTD_checkContinuity(ctx, dst);
Yann Collet5be2dd22015-11-11 13:43:58 +0100819
Yann Collet88fcd292015-11-25 14:42:45 +0100820 /* Decompress : frame header; part 1 */
821 switch (ctx->stage)
Yann Collet5be2dd22015-11-11 13:43:58 +0100822 {
Yann Collet88fcd292015-11-25 14:42:45 +0100823 case ZSTDds_getFrameHeaderSize :
Yann Collet5be2dd22015-11-11 13:43:58 +0100824 {
Yann Collet88fcd292015-11-25 14:42:45 +0100825 /* get frame header size */
826 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */
827 ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
Yann Collete4fdad52015-11-25 21:09:17 +0100828 if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
Yann Collet88fcd292015-11-25 14:42:45 +0100829 memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
830 if (ctx->headerSize > ZSTD_frameHeaderSize_min)
831 {
832 ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min;
833 ctx->stage = ZSTDds_decodeFrameHeader;
834 return 0;
835 }
836 ctx->expected = 0; /* not necessary to copy more */
Yann Collet5be2dd22015-11-11 13:43:58 +0100837 }
Yann Collet88fcd292015-11-25 14:42:45 +0100838 case ZSTDds_decodeFrameHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100839 {
Yann Collet88fcd292015-11-25 14:42:45 +0100840 /* get frame header */
841 size_t result;
842 memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected);
843 result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
844 if (ZSTD_isError(result)) return result;
845 ctx->expected = ZSTD_blockHeaderSize;
846 ctx->stage = ZSTDds_decodeBlockHeader;
847 return 0;
Yann Collet5be2dd22015-11-11 13:43:58 +0100848 }
Yann Collet88fcd292015-11-25 14:42:45 +0100849 case ZSTDds_decodeBlockHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100850 {
Yann Collet88fcd292015-11-25 14:42:45 +0100851 /* Decode block header */
852 blockProperties_t bp;
853 size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
854 if (ZSTD_isError(blockSize)) return blockSize;
855 if (bp.blockType == bt_end)
856 {
857 ctx->expected = 0;
858 ctx->stage = ZSTDds_getFrameHeaderSize;
859 }
860 else
861 {
862 ctx->expected = blockSize;
863 ctx->bType = bp.blockType;
864 ctx->stage = ZSTDds_decompressBlock;
865 }
Yann Collet88fcd292015-11-25 14:42:45 +0100866 return 0;
867 }
Yann Collet417890c2015-12-04 17:16:37 +0100868 case ZSTDds_decompressBlock:
Yann Collet88fcd292015-11-25 14:42:45 +0100869 {
870 /* Decompress : block content */
871 size_t rSize;
872 switch(ctx->bType)
873 {
874 case bt_compressed:
Yann Colletb0125102016-01-09 02:00:10 +0100875 rSize = ZSTD_decompressBlock_internal(ctx, dst, maxDstSize, src, srcSize);
Yann Collet88fcd292015-11-25 14:42:45 +0100876 break;
877 case bt_raw :
878 rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
879 break;
880 case bt_rle :
881 return ERROR(GENERIC); /* not yet handled */
882 break;
883 case bt_end : /* should never happen (filtered at phase 1) */
884 rSize = 0;
885 break;
886 default:
887 return ERROR(GENERIC);
888 }
889 ctx->stage = ZSTDds_decodeBlockHeader;
890 ctx->expected = ZSTD_blockHeaderSize;
891 ctx->previousDstEnd = (char*)dst + rSize;
892 return rSize;
893 }
894 default:
895 return ERROR(GENERIC); /* impossible */
896 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100897}
898
899
Yann Colletb0125102016-01-09 02:00:10 +0100900void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* dict, size_t dictSize)
Yann Collet417890c2015-12-04 17:16:37 +0100901{
902 ctx->dictEnd = ctx->previousDstEnd;
Yann Colletb0125102016-01-09 02:00:10 +0100903 ctx->vBase = (const char*)dict - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
904 ctx->base = dict;
905 ctx->previousDstEnd = (const char*)dict + dictSize;
Yann Collet417890c2015-12-04 17:16:37 +0100906}