blob: 54429628d6b957cdc72a5e811d19377a8e35bc17 [file] [log] [blame]
Yann Collet5be2dd22015-11-11 13:43:58 +01001/*
2 zstd - standard compression library
3 Copyright (C) 2014-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 You can contact the author at :
29 - zstd source repository : https://github.com/Cyan4973/zstd
30 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31*/
32
33/* ***************************************************************
34* Tuning parameters
35*****************************************************************/
36/*!
Yann Collet5be2dd22015-11-11 13:43:58 +010037 * HEAPMODE :
Yann Colletb3a2af92015-11-19 17:13:19 +010038 * Select how default compression functions will allocate memory for their hash table,
Yann Collet5be2dd22015-11-11 13:43:58 +010039 * in memory stack (0, fastest), or in memory heap (1, requires malloc())
Yann Colletb3a2af92015-11-19 17:13:19 +010040 * Note that compression context is fairly large, as a consequence heap memory is recommended.
Yann Collet5be2dd22015-11-11 13:43:58 +010041 */
42#ifndef ZSTD_HEAPMODE
43# define ZSTD_HEAPMODE 1
44#endif /* ZSTD_HEAPMODE */
45
46/*!
47* LEGACY_SUPPORT :
Yann Collet14983e72015-11-11 21:38:21 +010048* ZSTD_decompress() can decode older formats (starting from zstd 0.1+)
Yann Collet5be2dd22015-11-11 13:43:58 +010049*/
50#ifndef ZSTD_LEGACY_SUPPORT
51# define ZSTD_LEGACY_SUPPORT 1
52#endif
53
54
55/* *******************************************************
56* Includes
57*********************************************************/
58#include <stdlib.h> /* calloc */
59#include <string.h> /* memcpy, memmove */
60#include <stdio.h> /* debug : printf */
61#include "mem.h" /* low level memory routines */
62#include "zstd_static.h"
63#include "zstd_internal.h"
64#include "fse_static.h"
65#include "huff0.h"
66
67#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
68# include "zstd_legacy.h"
69#endif
70
71
72/* *******************************************************
73* Compiler specifics
74*********************************************************/
Yann Collet5be2dd22015-11-11 13:43:58 +010075#ifdef _MSC_VER /* Visual Studio */
76# define FORCE_INLINE static __forceinline
77# include <intrin.h> /* For Visual 2005 */
78# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
79# pragma warning(disable : 4324) /* disable: C4324: padded structure */
80#else
81# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
82# ifdef __GNUC__
83# define FORCE_INLINE static inline __attribute__((always_inline))
84# else
85# define FORCE_INLINE static inline
86# endif
87#endif
88
89
Yann Collet14983e72015-11-11 21:38:21 +010090/* *************************************
91* Local types
92***************************************/
93typedef struct
94{
95 blockType_t blockType;
96 U32 origSize;
97} blockProperties_t;
Yann Collet5be2dd22015-11-11 13:43:58 +010098
99
100/* *******************************************************
101* Memory operations
102**********************************************************/
103static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
104
105
Yann Collet5be2dd22015-11-11 13:43:58 +0100106/* *************************************
107* Error Management
108***************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100109unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
110
Yann Collet5be2dd22015-11-11 13:43:58 +0100111/*! ZSTD_isError
112* tells if a return value is an error code */
113unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
114
115/*! ZSTD_getErrorName
116* provides error code string (useful for debugging) */
117const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
118
119
Yann Collet5be2dd22015-11-11 13:43:58 +0100120/* *************************************************************
Yann Collet5b78d2f2015-11-12 15:36:05 +0100121* Context management
Yann Collet5be2dd22015-11-11 13:43:58 +0100122***************************************************************/
Yann Collet88fcd292015-11-25 14:42:45 +0100123typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
124 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
125
Yann Collet5be2dd22015-11-11 13:43:58 +0100126struct ZSTD_DCtx_s
127{
128 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
129 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
130 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
131 void* previousDstEnd;
132 void* base;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100133 void* vBase;
134 void* dictEnd;
Yann Collet5be2dd22015-11-11 13:43:58 +0100135 size_t expected;
Yann Collet88fcd292015-11-25 14:42:45 +0100136 size_t headerSize;
137 ZSTD_parameters params;
Yann Collet5be2dd22015-11-11 13:43:58 +0100138 blockType_t bType;
Yann Collet88fcd292015-11-25 14:42:45 +0100139 ZSTD_dStage stage;
Yann Collet5be2dd22015-11-11 13:43:58 +0100140 const BYTE* litPtr;
141 size_t litBufSize;
142 size_t litSize;
143 BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
Yann Collet88fcd292015-11-25 14:42:45 +0100144 BYTE headerBuffer[ZSTD_frameHeaderSize_max];
Yann Collet5be2dd22015-11-11 13:43:58 +0100145}; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
146
Yann Collet5b78d2f2015-11-12 15:36:05 +0100147size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
148{
Yann Collet88fcd292015-11-25 14:42:45 +0100149 dctx->expected = ZSTD_frameHeaderSize_min;
150 dctx->stage = ZSTDds_getFrameHeaderSize;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100151 dctx->previousDstEnd = NULL;
152 dctx->base = NULL;
153 dctx->vBase = NULL;
154 dctx->dictEnd = NULL;
155 return 0;
156}
157
158ZSTD_DCtx* ZSTD_createDCtx(void)
159{
160 ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
161 if (dctx==NULL) return NULL;
162 ZSTD_resetDCtx(dctx);
163 return dctx;
164}
165
166size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
167{
168 free(dctx);
169 return 0;
170}
171
172
173/* *************************************************************
174* Decompression section
175***************************************************************/
Yann Collet88fcd292015-11-25 14:42:45 +0100176/** ZSTD_decodeFrameHeader_Part1
177* decode the 1st part of the Frame Header, which tells Frame Header size.
178* srcSize must be == ZSTD_frameHeaderSize_min
179* @return : the full size of the Frame Header */
180static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
181{
182 U32 magicNumber;
183 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
184 magicNumber = MEM_readLE32(src);
185 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
186 zc->headerSize = ZSTD_frameHeaderSize_min;
187 return zc->headerSize;
188}
189
190/** ZSTD_decodeFrameHeader_Part2
191* decode the full Frame Header
192* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
193* @return : 0, or an error code, which can be tested using ZSTD_isError() */
194static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
195{
196 const BYTE* ip = (const BYTE*)src;
197 if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
198 memset(&(zc->params), 0, sizeof(zc->params));
199 zc->params.windowLog = ip[4] + ZSTD_WINDOWLOG_ABSOLUTEMIN;
200 return 0;
201}
202
203
204size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
205{
206 U32 magicNumber;
207 if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
208 magicNumber = MEM_readLE32(src);
209 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
210 memset(params, 0, sizeof(*params));
211 params->windowLog = ((const BYTE*)src)[4] + ZSTD_WINDOWLOG_ABSOLUTEMIN;
212 return 0;
213}
214
Yann Collet5be2dd22015-11-11 13:43:58 +0100215
216size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
217{
218 const BYTE* const in = (const BYTE* const)src;
219 BYTE headerFlags;
220 U32 cSize;
221
222 if (srcSize < 3) return ERROR(srcSize_wrong);
223
224 headerFlags = *in;
225 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
226
227 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
228 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
229
230 if (bpPtr->blockType == bt_end) return 0;
231 if (bpPtr->blockType == bt_rle) return 1;
232 return cSize;
233}
234
Yann Collet0f366c62015-11-12 16:19:30 +0100235static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100236{
237 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
238 memcpy(dst, src, srcSize);
239 return srcSize;
240}
241
242
243/** ZSTD_decompressLiterals
244 @return : nb of bytes read from src, or an error code*/
245static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
246 const void* src, size_t srcSize)
247{
248 const BYTE* ip = (const BYTE*)src;
249
250 const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
251 const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
252
253 if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
254 if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
255
256 if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
257
258 *maxDstSizePtr = litSize;
259 return litCSize + 5;
260}
261
262
263/** ZSTD_decodeLiteralsBlock
Yann Collet14983e72015-11-11 21:38:21 +0100264 @return : nb of bytes read from src (< srcSize ) */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100265size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100266 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
267{
Yann Collet5be2dd22015-11-11 13:43:58 +0100268 const BYTE* const istart = (const BYTE*) src;
269
270 /* any compressed block with literals segment must be at least this size */
271 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
272
273 switch(*istart & 3)
274 {
275 /* compressed */
276 case 0:
277 {
278 size_t litSize = BLOCKSIZE;
279 const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
280 dctx->litPtr = dctx->litBuffer;
281 dctx->litBufSize = BLOCKSIZE+8;
282 dctx->litSize = litSize;
283 return readSize; /* works if it's an error too */
284 }
285 case IS_RAW:
286 {
287 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
288 if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
289 {
290 if (litSize > srcSize-3) return ERROR(corruption_detected);
291 memcpy(dctx->litBuffer, istart, litSize);
292 dctx->litPtr = dctx->litBuffer;
293 dctx->litBufSize = BLOCKSIZE+8;
294 dctx->litSize = litSize;
295 return litSize+3;
296 }
297 /* direct reference into compressed stream */
298 dctx->litPtr = istart+3;
299 dctx->litBufSize = srcSize-3;
300 dctx->litSize = litSize;
301 return litSize+3; }
302 case IS_RLE:
303 {
304 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
305 if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
306 memset(dctx->litBuffer, istart[3], litSize);
307 dctx->litPtr = dctx->litBuffer;
308 dctx->litBufSize = BLOCKSIZE+8;
309 dctx->litSize = litSize;
310 return 4;
311 }
312 default:
313 return ERROR(corruption_detected); /* forbidden nominal case */
314 }
315}
316
317
318size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
319 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
320 const void* src, size_t srcSize)
321{
322 const BYTE* const istart = (const BYTE* const)src;
323 const BYTE* ip = istart;
324 const BYTE* const iend = istart + srcSize;
325 U32 LLtype, Offtype, MLtype;
326 U32 LLlog, Offlog, MLlog;
327 size_t dumpsLength;
328
329 /* check */
330 if (srcSize < 5) return ERROR(srcSize_wrong);
331
332 /* SeqHead */
333 *nbSeq = MEM_readLE16(ip); ip+=2;
334 LLtype = *ip >> 6;
335 Offtype = (*ip >> 4) & 3;
336 MLtype = (*ip >> 2) & 3;
337 if (*ip & 2)
338 {
339 dumpsLength = ip[2];
340 dumpsLength += ip[1] << 8;
341 ip += 3;
342 }
343 else
344 {
345 dumpsLength = ip[1];
346 dumpsLength += (ip[0] & 1) << 8;
347 ip += 2;
348 }
349 *dumpsPtr = ip;
350 ip += dumpsLength;
351 *dumpsLengthPtr = dumpsLength;
352
353 /* check */
354 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
355
356 /* sequences */
357 {
Yann Collet82368cf2015-11-16 19:10:56 +0100358 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */
Yann Collet5be2dd22015-11-11 13:43:58 +0100359 size_t headerSize;
360
361 /* Build DTables */
362 switch(LLtype)
363 {
364 U32 max;
365 case bt_rle :
366 LLlog = 0;
367 FSE_buildDTable_rle(DTableLL, *ip++); break;
368 case bt_raw :
369 LLlog = LLbits;
370 FSE_buildDTable_raw(DTableLL, LLbits); break;
371 default :
372 max = MaxLL;
373 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
374 if (FSE_isError(headerSize)) return ERROR(GENERIC);
375 if (LLlog > LLFSELog) return ERROR(corruption_detected);
376 ip += headerSize;
377 FSE_buildDTable(DTableLL, norm, max, LLlog);
378 }
379
380 switch(Offtype)
381 {
382 U32 max;
383 case bt_rle :
384 Offlog = 0;
385 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
386 FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
387 break;
388 case bt_raw :
389 Offlog = Offbits;
390 FSE_buildDTable_raw(DTableOffb, Offbits); break;
391 default :
392 max = MaxOff;
393 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
394 if (FSE_isError(headerSize)) return ERROR(GENERIC);
395 if (Offlog > OffFSELog) return ERROR(corruption_detected);
396 ip += headerSize;
397 FSE_buildDTable(DTableOffb, norm, max, Offlog);
398 }
399
400 switch(MLtype)
401 {
402 U32 max;
403 case bt_rle :
404 MLlog = 0;
405 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
406 FSE_buildDTable_rle(DTableML, *ip++); break;
407 case bt_raw :
408 MLlog = MLbits;
409 FSE_buildDTable_raw(DTableML, MLbits); break;
410 default :
411 max = MaxML;
412 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
413 if (FSE_isError(headerSize)) return ERROR(GENERIC);
414 if (MLlog > MLFSELog) return ERROR(corruption_detected);
415 ip += headerSize;
416 FSE_buildDTable(DTableML, norm, max, MLlog);
417 }
418 }
419
420 return ip-istart;
421}
422
423
424typedef struct {
425 size_t litLength;
426 size_t offset;
427 size_t matchLength;
428} seq_t;
429
430typedef struct {
431 BIT_DStream_t DStream;
432 FSE_DState_t stateLL;
433 FSE_DState_t stateOffb;
434 FSE_DState_t stateML;
435 size_t prevOffset;
436 const BYTE* dumps;
437 const BYTE* dumpsEnd;
438} seqState_t;
439
440
441static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
442{
443 size_t litLength;
444 size_t prevOffset;
445 size_t offset;
446 size_t matchLength;
447 const BYTE* dumps = seqState->dumps;
448 const BYTE* const de = seqState->dumpsEnd;
449
450 /* Literal length */
451 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
Yann Collet55aa7f92015-11-20 12:04:52 +0100452 prevOffset = litLength ? seq->offset : seqState->prevOffset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100453 if (litLength == MaxLL)
454 {
455 U32 add = *dumps++;
456 if (add < 255) litLength += add;
457 else
458 {
459 litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
460 dumps += 3;
461 }
462 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
463 }
464
465 /* Offset */
466 {
467 static const U32 offsetPrefix[MaxOff+1] = {
468 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
469 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
470 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
471 U32 offsetCode, nbBits;
472 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* <= maxOff, by table construction */
473 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
474 nbBits = offsetCode - 1;
475 if (offsetCode==0) nbBits = 0; /* cmove */
476 offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
477 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
478 if (offsetCode==0) offset = prevOffset; /* cmove */
Yann Collet55aa7f92015-11-20 12:04:52 +0100479 if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */
Yann Collet5be2dd22015-11-11 13:43:58 +0100480 }
481
482 /* MatchLength */
483 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
484 if (matchLength == MaxML)
485 {
486 U32 add = *dumps++;
487 if (add < 255) matchLength += add;
488 else
489 {
490 matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
491 dumps += 3;
492 }
493 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
494 }
495 matchLength += MINMATCH;
496
497 /* save result */
498 seq->litLength = litLength;
499 seq->offset = offset;
500 seq->matchLength = matchLength;
501 seqState->dumps = dumps;
502}
503
504
Yann Collet5b78d2f2015-11-12 15:36:05 +0100505FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
Yann Colletb3a2af92015-11-19 17:13:19 +0100506 BYTE* const oend, seq_t sequence,
Yann Collet5be2dd22015-11-11 13:43:58 +0100507 const BYTE** litPtr, const BYTE* const litLimit_8,
Yann Colletb3a2af92015-11-19 17:13:19 +0100508 BYTE* const base, BYTE* const vBase, BYTE* const dictEnd)
Yann Collet5be2dd22015-11-11 13:43:58 +0100509{
Yann Colletb3a2af92015-11-19 17:13:19 +0100510 static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
511 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100512 BYTE* const oLitEnd = op + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100513 const size_t sequenceLength = sequence.litLength + sequence.matchLength;
514 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100515 BYTE* const oend_8 = oend-8;
516 const BYTE* const litEnd = *litPtr + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100517 const BYTE* match = oLitEnd - sequence.offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100518
519 /* check */
520 if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
521 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
522 if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
523
524 /* copy Literals */
525 ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
526 op = oLitEnd;
527 *litPtr = litEnd; /* update for next sequence */
528
529 /* copy Match */
Yann Colletb3a2af92015-11-19 17:13:19 +0100530 /* check */
531 //if (match > oLitEnd) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */
532 if (sequence.offset > (size_t)oLitEnd) return ERROR(corruption_detected); /* address space overflow test (this test seems preserved by clang optimizer) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100533
Yann Colletb3a2af92015-11-19 17:13:19 +0100534 if (match < base)
535 {
536 /* offset beyond prefix */
537 if (match < vBase) return ERROR(corruption_detected);
538 match = dictEnd - (base-match);
539 if (match + sequence.matchLength <= dictEnd)
540 {
541 memcpy(oLitEnd, match, sequence.matchLength);
542 return sequenceLength;
543 }
544 /* span extDict & currentPrefixSegment */
545 {
546 size_t length1 = dictEnd - match;
547 memcpy(oLitEnd, match, length1);
548 op = oLitEnd + length1;
549 sequence.matchLength -= length1;
550 match = base;
551 }
552 }
Yann Collet0f366c62015-11-12 16:19:30 +0100553
Yann Colletb3a2af92015-11-19 17:13:19 +0100554 /* match within prefix */
555 if (sequence.offset < 8)
556 {
557 /* close range match, overlap */
558 const int sub2 = dec64table[sequence.offset];
559 op[0] = match[0];
560 op[1] = match[1];
561 op[2] = match[2];
562 op[3] = match[3];
563 match += dec32table[sequence.offset];
564 ZSTD_copy4(op+4, match);
565 match -= sub2;
566 }
567 else
568 {
569 ZSTD_copy8(op, match);
570 }
571 op += 8; match += 8;
Yann Collet5be2dd22015-11-11 13:43:58 +0100572
Yann Colletb3a2af92015-11-19 17:13:19 +0100573 if (oMatchEnd > oend-12)
574 {
575 if (op < oend_8)
576 {
577 ZSTD_wildcopy(op, match, oend_8 - op);
578 match += oend_8 - op;
579 op = oend_8;
580 }
581 while (op < oMatchEnd) *op++ = *match++;
582 }
583 else
584 {
585 ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
586 }
587 return sequenceLength;
Yann Collet5be2dd22015-11-11 13:43:58 +0100588}
589
Yann Colletb3a2af92015-11-19 17:13:19 +0100590
Yann Collet5be2dd22015-11-11 13:43:58 +0100591static size_t ZSTD_decompressSequences(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100592 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100593 void* dst, size_t maxDstSize,
594 const void* seqStart, size_t seqSize)
595{
Yann Collet5be2dd22015-11-11 13:43:58 +0100596 const BYTE* ip = (const BYTE*)seqStart;
597 const BYTE* const iend = ip + seqSize;
598 BYTE* const ostart = (BYTE* const)dst;
599 BYTE* op = ostart;
600 BYTE* const oend = ostart + maxDstSize;
601 size_t errorCode, dumpsLength;
602 const BYTE* litPtr = dctx->litPtr;
603 const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
604 const BYTE* const litEnd = litPtr + dctx->litSize;
605 int nbSeq;
606 const BYTE* dumps;
607 U32* DTableLL = dctx->LLTable;
608 U32* DTableML = dctx->MLTable;
609 U32* DTableOffb = dctx->OffTable;
610 BYTE* const base = (BYTE*) (dctx->base);
Yann Collet5b78d2f2015-11-12 15:36:05 +0100611 BYTE* const vBase = (BYTE*) (dctx->vBase);
612 BYTE* const dictEnd = (BYTE*) (dctx->dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100613
614 /* Build Decoding Tables */
615 errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
616 DTableLL, DTableML, DTableOffb,
617 ip, iend-ip);
618 if (ZSTD_isError(errorCode)) return errorCode;
619 ip += errorCode;
620
621 /* Regen sequences */
622 {
623 seq_t sequence;
624 seqState_t seqState;
625
626 memset(&sequence, 0, sizeof(sequence));
627 sequence.offset = 4;
628 seqState.dumps = dumps;
629 seqState.dumpsEnd = dumps + dumpsLength;
630 seqState.prevOffset = 4;
631 errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
632 if (ERR_isError(errorCode)) return ERROR(corruption_detected);
633 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
634 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
635 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
636
Yann Colletb3a2af92015-11-19 17:13:19 +0100637 for ( ; (BIT_reloadDStream(&(seqState.DStream)) < BIT_DStream_completed) ; )
Yann Collet5be2dd22015-11-11 13:43:58 +0100638 {
639 size_t oneSeqSize;
640 nbSeq--;
641 ZSTD_decodeSequence(&sequence, &seqState);
Yann Colletb3a2af92015-11-19 17:13:19 +0100642 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
643 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
644 op += oneSeqSize;
645 }
646
647 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
648
649 /* now BIT_reloadDStream(&(seqState.DStream)) >= BIT_DStream_completed) */
650 for ( ; (BIT_reloadDStream(&(seqState.DStream)) == BIT_DStream_completed) && nbSeq ; )
651 {
652 size_t oneSeqSize;
653 nbSeq--;
654 ZSTD_decodeSequence(&sequence, &seqState);
655 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100656 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
657 op += oneSeqSize;
658 }
659
660 /* check if reached exact end */
Yann Colletb3a2af92015-11-19 17:13:19 +0100661 if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* DStream should be entirely and precisely consumed; otherwise data is corrupted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100662
663 /* last literal segment */
664 {
665 size_t lastLLSize = litEnd - litPtr;
666 if (litPtr > litEnd) return ERROR(corruption_detected);
667 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
668 if (op != litPtr) memcpy(op, litPtr, lastLLSize);
669 op += lastLLSize;
670 }
671 }
672
673 return op-ostart;
674}
675
676
677static size_t ZSTD_decompressBlock(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100678 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100679 void* dst, size_t maxDstSize,
680 const void* src, size_t srcSize)
681{
682 /* blockType == blockCompressed */
683 const BYTE* ip = (const BYTE*)src;
684
685 /* Decode literals sub-block */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100686 size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100687 if (ZSTD_isError(litCSize)) return litCSize;
688 ip += litCSize;
689 srcSize -= litCSize;
690
Yann Collet5b78d2f2015-11-12 15:36:05 +0100691 return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100692}
693
694
Yann Collet5b78d2f2015-11-12 15:36:05 +0100695size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100696{
697 const BYTE* ip = (const BYTE*)src;
698 const BYTE* iend = ip + srcSize;
699 BYTE* const ostart = (BYTE* const)dst;
700 BYTE* op = ostart;
701 BYTE* const oend = ostart + maxDstSize;
702 size_t remainingSize = srcSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100703 blockProperties_t blockProperties;
704
Yann Collet5b78d2f2015-11-12 15:36:05 +0100705
Yann Colletb2549842015-11-18 11:29:32 +0100706 /* init */
707 ctx->base = ctx->vBase = ctx->dictEnd = dst;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100708
Yann Collet5be2dd22015-11-11 13:43:58 +0100709 /* Frame Header */
Yann Collet88fcd292015-11-25 14:42:45 +0100710 {
711 size_t frameHeaderSize;
712 if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Collet5be2dd22015-11-11 13:43:58 +0100713#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
Yann Collet88fcd292015-11-25 14:42:45 +0100714 {
715 const U32 magicNumber = MEM_readLE32(src);
716 if (ZSTD_isLegacy(magicNumber))
717 return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
718 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100719#endif
Yann Collet88fcd292015-11-25 14:42:45 +0100720 frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
721 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
722 if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
723 ip += frameHeaderSize; remainingSize -= frameHeaderSize;
724 frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
725 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
726 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100727
728 /* Loop on each block */
729 while (1)
730 {
731 size_t decodedSize=0;
732 size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
733 if (ZSTD_isError(cBlockSize)) return cBlockSize;
734
735 ip += ZSTD_blockHeaderSize;
736 remainingSize -= ZSTD_blockHeaderSize;
737 if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
738
739 switch(blockProperties.blockType)
740 {
741 case bt_compressed:
742 decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
743 break;
744 case bt_raw :
Yann Collet0f366c62015-11-12 16:19:30 +0100745 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100746 break;
747 case bt_rle :
748 return ERROR(GENERIC); /* not yet supported */
749 break;
750 case bt_end :
751 /* end of frame */
752 if (remainingSize) return ERROR(srcSize_wrong);
753 break;
754 default:
755 return ERROR(GENERIC); /* impossible */
756 }
757 if (cBlockSize == 0) break; /* bt_end */
758
759 if (ZSTD_isError(decodedSize)) return decodedSize;
760 op += decodedSize;
761 ip += cBlockSize;
762 remainingSize -= cBlockSize;
763 }
764
765 return op-ostart;
766}
767
768size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
769{
770 ZSTD_DCtx ctx;
Yann Collet5be2dd22015-11-11 13:43:58 +0100771 return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
772}
773
774
775/* ******************************
776* Streaming Decompression API
777********************************/
Yann Collet5be2dd22015-11-11 13:43:58 +0100778size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
779{
780 return dctx->expected;
781}
782
783size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
784{
785 /* Sanity check */
786 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
Yann Collet88fcd292015-11-25 14:42:45 +0100787 if (dst != ctx->previousDstEnd) /* not contiguous */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100788 {
Yann Colletb2549842015-11-18 11:29:32 +0100789 ctx->dictEnd = ctx->previousDstEnd;
790 if ((dst > ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment right into tracked memory */
791 ctx->base = (char*)dst + maxDstSize; /* temporary affectation, for vBase calculation */
792 ctx->vBase = (char*)dst - ((char*)(ctx->dictEnd) - (char*)(ctx->base));
793 ctx->base = dst;
794 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100795
Yann Collet88fcd292015-11-25 14:42:45 +0100796 /* Decompress : frame header; part 1 */
797 switch (ctx->stage)
Yann Collet5be2dd22015-11-11 13:43:58 +0100798 {
Yann Collet88fcd292015-11-25 14:42:45 +0100799 case ZSTDds_getFrameHeaderSize :
Yann Collet5be2dd22015-11-11 13:43:58 +0100800 {
Yann Collet88fcd292015-11-25 14:42:45 +0100801 /* get frame header size */
802 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */
803 ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
804 if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
805 memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
806 if (ctx->headerSize > ZSTD_frameHeaderSize_min)
807 {
808 ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min;
809 ctx->stage = ZSTDds_decodeFrameHeader;
810 return 0;
811 }
812 ctx->expected = 0; /* not necessary to copy more */
Yann Collet5be2dd22015-11-11 13:43:58 +0100813 }
Yann Collet88fcd292015-11-25 14:42:45 +0100814 case ZSTDds_decodeFrameHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100815 {
Yann Collet88fcd292015-11-25 14:42:45 +0100816 /* get frame header */
817 size_t result;
818 memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected);
819 result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
820 if (ZSTD_isError(result)) return result;
821 ctx->expected = ZSTD_blockHeaderSize;
822 ctx->stage = ZSTDds_decodeBlockHeader;
823 return 0;
Yann Collet5be2dd22015-11-11 13:43:58 +0100824 }
Yann Collet88fcd292015-11-25 14:42:45 +0100825 case ZSTDds_decodeBlockHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100826 {
Yann Collet88fcd292015-11-25 14:42:45 +0100827 /* Decode block header */
828 blockProperties_t bp;
829 size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
830 if (ZSTD_isError(blockSize)) return blockSize;
831 if (bp.blockType == bt_end)
832 {
833 ctx->expected = 0;
834 ctx->stage = ZSTDds_getFrameHeaderSize;
835 }
836 else
837 {
838 ctx->expected = blockSize;
839 ctx->bType = bp.blockType;
840 ctx->stage = ZSTDds_decompressBlock;
841 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100842
Yann Collet88fcd292015-11-25 14:42:45 +0100843 ctx->previousDstEnd = dst;
844 return 0;
845 }
846 case 3:
847 {
848 /* Decompress : block content */
849 size_t rSize;
850 switch(ctx->bType)
851 {
852 case bt_compressed:
853 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
854 break;
855 case bt_raw :
856 rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
857 break;
858 case bt_rle :
859 return ERROR(GENERIC); /* not yet handled */
860 break;
861 case bt_end : /* should never happen (filtered at phase 1) */
862 rSize = 0;
863 break;
864 default:
865 return ERROR(GENERIC);
866 }
867 ctx->stage = ZSTDds_decodeBlockHeader;
868 ctx->expected = ZSTD_blockHeaderSize;
869 ctx->previousDstEnd = (char*)dst + rSize;
870 return rSize;
871 }
872 default:
873 return ERROR(GENERIC); /* impossible */
874 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100875}
876
877