blob: 122ff2b06a41cf9b233aa295be15b082bc79a2e2 [file] [log] [blame]
Yann Collet5be2dd22015-11-11 13:43:58 +01001/*
2 zstd - standard compression library
3 Copyright (C) 2014-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 You can contact the author at :
29 - zstd source repository : https://github.com/Cyan4973/zstd
30 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31*/
32
33/* ***************************************************************
34* Tuning parameters
35*****************************************************************/
36/*!
Yann Collet5be2dd22015-11-11 13:43:58 +010037 * HEAPMODE :
Yann Colletb3a2af92015-11-19 17:13:19 +010038 * Select how default compression functions will allocate memory for their hash table,
Yann Collet5be2dd22015-11-11 13:43:58 +010039 * in memory stack (0, fastest), or in memory heap (1, requires malloc())
Yann Colletb3a2af92015-11-19 17:13:19 +010040 * Note that compression context is fairly large, as a consequence heap memory is recommended.
Yann Collet5be2dd22015-11-11 13:43:58 +010041 */
42#ifndef ZSTD_HEAPMODE
43# define ZSTD_HEAPMODE 1
44#endif /* ZSTD_HEAPMODE */
45
46/*!
47* LEGACY_SUPPORT :
Yann Collet14983e72015-11-11 21:38:21 +010048* ZSTD_decompress() can decode older formats (starting from zstd 0.1+)
Yann Collet5be2dd22015-11-11 13:43:58 +010049*/
50#ifndef ZSTD_LEGACY_SUPPORT
51# define ZSTD_LEGACY_SUPPORT 1
52#endif
53
54
55/* *******************************************************
56* Includes
57*********************************************************/
58#include <stdlib.h> /* calloc */
59#include <string.h> /* memcpy, memmove */
60#include <stdio.h> /* debug : printf */
61#include "mem.h" /* low level memory routines */
62#include "zstd_static.h"
63#include "zstd_internal.h"
64#include "fse_static.h"
65#include "huff0.h"
66
67#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
68# include "zstd_legacy.h"
69#endif
70
Yann Collet5be2dd22015-11-11 13:43:58 +010071/* *******************************************************
72* Compiler specifics
73*********************************************************/
Yann Collet5be2dd22015-11-11 13:43:58 +010074#ifdef _MSC_VER /* Visual Studio */
75# define FORCE_INLINE static __forceinline
76# include <intrin.h> /* For Visual 2005 */
77# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
78# pragma warning(disable : 4324) /* disable: C4324: padded structure */
79#else
80# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
81# ifdef __GNUC__
82# define FORCE_INLINE static inline __attribute__((always_inline))
83# else
84# define FORCE_INLINE static inline
85# endif
86#endif
87
88
Yann Collet14983e72015-11-11 21:38:21 +010089/* *************************************
90* Local types
91***************************************/
92typedef struct
93{
94 blockType_t blockType;
95 U32 origSize;
96} blockProperties_t;
Yann Collet5be2dd22015-11-11 13:43:58 +010097
98
99/* *******************************************************
100* Memory operations
101**********************************************************/
102static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
103
104
Yann Collet5be2dd22015-11-11 13:43:58 +0100105/* *************************************
106* Error Management
107***************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100108unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
109
Yann Collet5be2dd22015-11-11 13:43:58 +0100110/*! ZSTD_isError
111* tells if a return value is an error code */
112unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
113
114/*! ZSTD_getErrorName
115* provides error code string (useful for debugging) */
116const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
117
118
Yann Collet5be2dd22015-11-11 13:43:58 +0100119/* *************************************************************
Yann Collet5b78d2f2015-11-12 15:36:05 +0100120* Context management
Yann Collet5be2dd22015-11-11 13:43:58 +0100121***************************************************************/
Yann Collete4fdad52015-11-25 21:09:17 +0100122typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
Yann Collet88fcd292015-11-25 14:42:45 +0100123 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
124
Yann Collet5be2dd22015-11-11 13:43:58 +0100125struct ZSTD_DCtx_s
126{
127 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
128 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
129 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
130 void* previousDstEnd;
131 void* base;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100132 void* vBase;
133 void* dictEnd;
Yann Collet5be2dd22015-11-11 13:43:58 +0100134 size_t expected;
Yann Collet88fcd292015-11-25 14:42:45 +0100135 size_t headerSize;
136 ZSTD_parameters params;
Yann Collet5be2dd22015-11-11 13:43:58 +0100137 blockType_t bType;
Yann Collet88fcd292015-11-25 14:42:45 +0100138 ZSTD_dStage stage;
Yann Collet5be2dd22015-11-11 13:43:58 +0100139 const BYTE* litPtr;
140 size_t litBufSize;
141 size_t litSize;
142 BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
Yann Collet88fcd292015-11-25 14:42:45 +0100143 BYTE headerBuffer[ZSTD_frameHeaderSize_max];
Yann Collet5be2dd22015-11-11 13:43:58 +0100144}; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
145
Yann Collet5b78d2f2015-11-12 15:36:05 +0100146size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
147{
Yann Collet88fcd292015-11-25 14:42:45 +0100148 dctx->expected = ZSTD_frameHeaderSize_min;
149 dctx->stage = ZSTDds_getFrameHeaderSize;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100150 dctx->previousDstEnd = NULL;
151 dctx->base = NULL;
152 dctx->vBase = NULL;
153 dctx->dictEnd = NULL;
154 return 0;
155}
156
157ZSTD_DCtx* ZSTD_createDCtx(void)
158{
159 ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
160 if (dctx==NULL) return NULL;
161 ZSTD_resetDCtx(dctx);
162 return dctx;
163}
164
165size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
166{
167 free(dctx);
168 return 0;
169}
170
171
172/* *************************************************************
173* Decompression section
174***************************************************************/
Yann Collet88fcd292015-11-25 14:42:45 +0100175/** ZSTD_decodeFrameHeader_Part1
176* decode the 1st part of the Frame Header, which tells Frame Header size.
177* srcSize must be == ZSTD_frameHeaderSize_min
178* @return : the full size of the Frame Header */
179static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
180{
181 U32 magicNumber;
182 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
183 magicNumber = MEM_readLE32(src);
184 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
185 zc->headerSize = ZSTD_frameHeaderSize_min;
186 return zc->headerSize;
187}
188
Yann Collet88fcd292015-11-25 14:42:45 +0100189
190size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
191{
192 U32 magicNumber;
193 if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
194 magicNumber = MEM_readLE32(src);
195 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
196 memset(params, 0, sizeof(*params));
Yann Collet26415d32015-11-26 12:43:28 +0100197 params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
Yann Collet88fcd292015-11-25 14:42:45 +0100198 return 0;
199}
200
Yann Collet26415d32015-11-26 12:43:28 +0100201/** ZSTD_decodeFrameHeader_Part2
202* decode the full Frame Header
203* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
204* @return : 0, or an error code, which can be tested using ZSTD_isError() */
205static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
206{
207 if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
208 return ZSTD_getFrameParams(&(zc->params), src, srcSize);
209}
210
Yann Collet5be2dd22015-11-11 13:43:58 +0100211
212size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
213{
214 const BYTE* const in = (const BYTE* const)src;
215 BYTE headerFlags;
216 U32 cSize;
217
218 if (srcSize < 3) return ERROR(srcSize_wrong);
219
220 headerFlags = *in;
221 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
222
223 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
224 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
225
226 if (bpPtr->blockType == bt_end) return 0;
227 if (bpPtr->blockType == bt_rle) return 1;
228 return cSize;
229}
230
Yann Collet0f366c62015-11-12 16:19:30 +0100231static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100232{
233 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
234 memcpy(dst, src, srcSize);
235 return srcSize;
236}
237
238
239/** ZSTD_decompressLiterals
240 @return : nb of bytes read from src, or an error code*/
241static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
242 const void* src, size_t srcSize)
243{
244 const BYTE* ip = (const BYTE*)src;
245
246 const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
247 const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
248
249 if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
250 if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
251
252 if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
253
254 *maxDstSizePtr = litSize;
255 return litCSize + 5;
256}
257
258
259/** ZSTD_decodeLiteralsBlock
Yann Collet14983e72015-11-11 21:38:21 +0100260 @return : nb of bytes read from src (< srcSize ) */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100261size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100262 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
263{
Yann Collet5be2dd22015-11-11 13:43:58 +0100264 const BYTE* const istart = (const BYTE*) src;
265
266 /* any compressed block with literals segment must be at least this size */
267 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
268
269 switch(*istart & 3)
270 {
271 /* compressed */
272 case 0:
273 {
274 size_t litSize = BLOCKSIZE;
275 const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
276 dctx->litPtr = dctx->litBuffer;
277 dctx->litBufSize = BLOCKSIZE+8;
278 dctx->litSize = litSize;
279 return readSize; /* works if it's an error too */
280 }
281 case IS_RAW:
282 {
283 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
284 if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
285 {
286 if (litSize > srcSize-3) return ERROR(corruption_detected);
287 memcpy(dctx->litBuffer, istart, litSize);
288 dctx->litPtr = dctx->litBuffer;
289 dctx->litBufSize = BLOCKSIZE+8;
290 dctx->litSize = litSize;
291 return litSize+3;
292 }
293 /* direct reference into compressed stream */
294 dctx->litPtr = istart+3;
295 dctx->litBufSize = srcSize-3;
296 dctx->litSize = litSize;
297 return litSize+3; }
298 case IS_RLE:
299 {
300 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
301 if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
302 memset(dctx->litBuffer, istart[3], litSize);
303 dctx->litPtr = dctx->litBuffer;
304 dctx->litBufSize = BLOCKSIZE+8;
305 dctx->litSize = litSize;
306 return 4;
307 }
308 default:
309 return ERROR(corruption_detected); /* forbidden nominal case */
310 }
311}
312
313
314size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
315 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
316 const void* src, size_t srcSize)
317{
318 const BYTE* const istart = (const BYTE* const)src;
319 const BYTE* ip = istart;
320 const BYTE* const iend = istart + srcSize;
321 U32 LLtype, Offtype, MLtype;
322 U32 LLlog, Offlog, MLlog;
323 size_t dumpsLength;
324
325 /* check */
326 if (srcSize < 5) return ERROR(srcSize_wrong);
327
328 /* SeqHead */
329 *nbSeq = MEM_readLE16(ip); ip+=2;
330 LLtype = *ip >> 6;
331 Offtype = (*ip >> 4) & 3;
332 MLtype = (*ip >> 2) & 3;
333 if (*ip & 2)
334 {
335 dumpsLength = ip[2];
336 dumpsLength += ip[1] << 8;
337 ip += 3;
338 }
339 else
340 {
341 dumpsLength = ip[1];
342 dumpsLength += (ip[0] & 1) << 8;
343 ip += 2;
344 }
345 *dumpsPtr = ip;
346 ip += dumpsLength;
347 *dumpsLengthPtr = dumpsLength;
348
349 /* check */
350 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
351
352 /* sequences */
353 {
Yann Collet82368cf2015-11-16 19:10:56 +0100354 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */
Yann Collet5be2dd22015-11-11 13:43:58 +0100355 size_t headerSize;
356
357 /* Build DTables */
358 switch(LLtype)
359 {
360 U32 max;
361 case bt_rle :
362 LLlog = 0;
363 FSE_buildDTable_rle(DTableLL, *ip++); break;
364 case bt_raw :
365 LLlog = LLbits;
366 FSE_buildDTable_raw(DTableLL, LLbits); break;
367 default :
368 max = MaxLL;
369 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
370 if (FSE_isError(headerSize)) return ERROR(GENERIC);
371 if (LLlog > LLFSELog) return ERROR(corruption_detected);
372 ip += headerSize;
373 FSE_buildDTable(DTableLL, norm, max, LLlog);
374 }
375
376 switch(Offtype)
377 {
378 U32 max;
379 case bt_rle :
380 Offlog = 0;
381 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
382 FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
383 break;
384 case bt_raw :
385 Offlog = Offbits;
386 FSE_buildDTable_raw(DTableOffb, Offbits); break;
387 default :
388 max = MaxOff;
389 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
390 if (FSE_isError(headerSize)) return ERROR(GENERIC);
391 if (Offlog > OffFSELog) return ERROR(corruption_detected);
392 ip += headerSize;
393 FSE_buildDTable(DTableOffb, norm, max, Offlog);
394 }
395
396 switch(MLtype)
397 {
398 U32 max;
399 case bt_rle :
400 MLlog = 0;
401 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
402 FSE_buildDTable_rle(DTableML, *ip++); break;
403 case bt_raw :
404 MLlog = MLbits;
405 FSE_buildDTable_raw(DTableML, MLbits); break;
406 default :
407 max = MaxML;
408 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
409 if (FSE_isError(headerSize)) return ERROR(GENERIC);
410 if (MLlog > MLFSELog) return ERROR(corruption_detected);
411 ip += headerSize;
412 FSE_buildDTable(DTableML, norm, max, MLlog);
413 }
414 }
415
416 return ip-istart;
417}
418
419
420typedef struct {
421 size_t litLength;
422 size_t offset;
423 size_t matchLength;
424} seq_t;
425
426typedef struct {
427 BIT_DStream_t DStream;
428 FSE_DState_t stateLL;
429 FSE_DState_t stateOffb;
430 FSE_DState_t stateML;
431 size_t prevOffset;
432 const BYTE* dumps;
433 const BYTE* dumpsEnd;
434} seqState_t;
435
436
437static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
438{
439 size_t litLength;
440 size_t prevOffset;
441 size_t offset;
442 size_t matchLength;
443 const BYTE* dumps = seqState->dumps;
444 const BYTE* const de = seqState->dumpsEnd;
445
446 /* Literal length */
447 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
Yann Collete4fdad52015-11-25 21:09:17 +0100448 prevOffset = litLength ? seq->offset : seqState->prevOffset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100449 if (litLength == MaxLL)
450 {
451 U32 add = *dumps++;
452 if (add < 255) litLength += add;
453 else
454 {
455 litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
456 dumps += 3;
457 }
458 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
459 }
460
461 /* Offset */
462 {
463 static const U32 offsetPrefix[MaxOff+1] = {
464 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
465 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
466 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
467 U32 offsetCode, nbBits;
468 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* <= maxOff, by table construction */
469 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
470 nbBits = offsetCode - 1;
471 if (offsetCode==0) nbBits = 0; /* cmove */
472 offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
473 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
474 if (offsetCode==0) offset = prevOffset; /* cmove */
Yann Collet55aa7f92015-11-20 12:04:52 +0100475 if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */
Yann Collet5be2dd22015-11-11 13:43:58 +0100476 }
477
478 /* MatchLength */
479 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
480 if (matchLength == MaxML)
481 {
482 U32 add = *dumps++;
483 if (add < 255) matchLength += add;
484 else
485 {
486 matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
487 dumps += 3;
488 }
489 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
490 }
491 matchLength += MINMATCH;
492
493 /* save result */
494 seq->litLength = litLength;
495 seq->offset = offset;
496 seq->matchLength = matchLength;
497 seqState->dumps = dumps;
498}
499
500
Yann Collet5b78d2f2015-11-12 15:36:05 +0100501FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
Yann Colletb3a2af92015-11-19 17:13:19 +0100502 BYTE* const oend, seq_t sequence,
Yann Collet5be2dd22015-11-11 13:43:58 +0100503 const BYTE** litPtr, const BYTE* const litLimit_8,
Yann Colletb3a2af92015-11-19 17:13:19 +0100504 BYTE* const base, BYTE* const vBase, BYTE* const dictEnd)
Yann Collet5be2dd22015-11-11 13:43:58 +0100505{
Yann Colletb3a2af92015-11-19 17:13:19 +0100506 static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
507 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100508 BYTE* const oLitEnd = op + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100509 const size_t sequenceLength = sequence.litLength + sequence.matchLength;
510 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100511 BYTE* const oend_8 = oend-8;
512 const BYTE* const litEnd = *litPtr + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100513 const BYTE* match = oLitEnd - sequence.offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100514
515 /* check */
516 if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
517 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
518 if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
519
520 /* copy Literals */
521 ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
522 op = oLitEnd;
523 *litPtr = litEnd; /* update for next sequence */
524
525 /* copy Match */
Yann Colletb3a2af92015-11-19 17:13:19 +0100526 /* check */
527 //if (match > oLitEnd) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */
528 if (sequence.offset > (size_t)oLitEnd) return ERROR(corruption_detected); /* address space overflow test (this test seems preserved by clang optimizer) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100529
Yann Colletb3a2af92015-11-19 17:13:19 +0100530 if (match < base)
531 {
532 /* offset beyond prefix */
533 if (match < vBase) return ERROR(corruption_detected);
534 match = dictEnd - (base-match);
535 if (match + sequence.matchLength <= dictEnd)
536 {
537 memcpy(oLitEnd, match, sequence.matchLength);
538 return sequenceLength;
539 }
540 /* span extDict & currentPrefixSegment */
541 {
542 size_t length1 = dictEnd - match;
543 memcpy(oLitEnd, match, length1);
544 op = oLitEnd + length1;
545 sequence.matchLength -= length1;
546 match = base;
547 }
548 }
Yann Collet0f366c62015-11-12 16:19:30 +0100549
Yann Colletb3a2af92015-11-19 17:13:19 +0100550 /* match within prefix */
551 if (sequence.offset < 8)
552 {
553 /* close range match, overlap */
554 const int sub2 = dec64table[sequence.offset];
555 op[0] = match[0];
556 op[1] = match[1];
557 op[2] = match[2];
558 op[3] = match[3];
559 match += dec32table[sequence.offset];
560 ZSTD_copy4(op+4, match);
561 match -= sub2;
562 }
563 else
564 {
565 ZSTD_copy8(op, match);
566 }
567 op += 8; match += 8;
Yann Collet5be2dd22015-11-11 13:43:58 +0100568
Yann Colletb3a2af92015-11-19 17:13:19 +0100569 if (oMatchEnd > oend-12)
570 {
571 if (op < oend_8)
572 {
573 ZSTD_wildcopy(op, match, oend_8 - op);
574 match += oend_8 - op;
575 op = oend_8;
576 }
577 while (op < oMatchEnd) *op++ = *match++;
578 }
579 else
580 {
581 ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
582 }
583 return sequenceLength;
Yann Collet5be2dd22015-11-11 13:43:58 +0100584}
585
Yann Colletb3a2af92015-11-19 17:13:19 +0100586
Yann Collet5be2dd22015-11-11 13:43:58 +0100587static size_t ZSTD_decompressSequences(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100588 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100589 void* dst, size_t maxDstSize,
590 const void* seqStart, size_t seqSize)
591{
Yann Collet5be2dd22015-11-11 13:43:58 +0100592 const BYTE* ip = (const BYTE*)seqStart;
593 const BYTE* const iend = ip + seqSize;
594 BYTE* const ostart = (BYTE* const)dst;
595 BYTE* op = ostart;
596 BYTE* const oend = ostart + maxDstSize;
597 size_t errorCode, dumpsLength;
598 const BYTE* litPtr = dctx->litPtr;
599 const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
600 const BYTE* const litEnd = litPtr + dctx->litSize;
601 int nbSeq;
602 const BYTE* dumps;
603 U32* DTableLL = dctx->LLTable;
604 U32* DTableML = dctx->MLTable;
605 U32* DTableOffb = dctx->OffTable;
606 BYTE* const base = (BYTE*) (dctx->base);
Yann Collet5b78d2f2015-11-12 15:36:05 +0100607 BYTE* const vBase = (BYTE*) (dctx->vBase);
608 BYTE* const dictEnd = (BYTE*) (dctx->dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100609
610 /* Build Decoding Tables */
611 errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
612 DTableLL, DTableML, DTableOffb,
613 ip, iend-ip);
614 if (ZSTD_isError(errorCode)) return errorCode;
615 ip += errorCode;
616
617 /* Regen sequences */
618 {
619 seq_t sequence;
620 seqState_t seqState;
621
622 memset(&sequence, 0, sizeof(sequence));
623 sequence.offset = 4;
624 seqState.dumps = dumps;
625 seqState.dumpsEnd = dumps + dumpsLength;
626 seqState.prevOffset = 4;
627 errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
628 if (ERR_isError(errorCode)) return ERROR(corruption_detected);
629 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
630 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
631 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
632
Yann Colletb3a2af92015-11-19 17:13:19 +0100633 for ( ; (BIT_reloadDStream(&(seqState.DStream)) < BIT_DStream_completed) ; )
Yann Collet5be2dd22015-11-11 13:43:58 +0100634 {
635 size_t oneSeqSize;
636 nbSeq--;
637 ZSTD_decodeSequence(&sequence, &seqState);
Yann Colletb3a2af92015-11-19 17:13:19 +0100638 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
639 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
640 op += oneSeqSize;
641 }
642
643 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
644
645 /* now BIT_reloadDStream(&(seqState.DStream)) >= BIT_DStream_completed) */
646 for ( ; (BIT_reloadDStream(&(seqState.DStream)) == BIT_DStream_completed) && nbSeq ; )
647 {
648 size_t oneSeqSize;
649 nbSeq--;
650 ZSTD_decodeSequence(&sequence, &seqState);
651 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100652 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
653 op += oneSeqSize;
654 }
655
656 /* check if reached exact end */
Yann Colletb3a2af92015-11-19 17:13:19 +0100657 if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* DStream should be entirely and precisely consumed; otherwise data is corrupted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100658
659 /* last literal segment */
660 {
661 size_t lastLLSize = litEnd - litPtr;
662 if (litPtr > litEnd) return ERROR(corruption_detected);
663 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
664 if (op != litPtr) memcpy(op, litPtr, lastLLSize);
665 op += lastLLSize;
666 }
667 }
668
669 return op-ostart;
670}
671
672
673static size_t ZSTD_decompressBlock(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100674 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100675 void* dst, size_t maxDstSize,
676 const void* src, size_t srcSize)
677{
678 /* blockType == blockCompressed */
679 const BYTE* ip = (const BYTE*)src;
680
681 /* Decode literals sub-block */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100682 size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100683 if (ZSTD_isError(litCSize)) return litCSize;
684 ip += litCSize;
685 srcSize -= litCSize;
686
Yann Collet5b78d2f2015-11-12 15:36:05 +0100687 return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100688}
689
690
Yann Collet5b78d2f2015-11-12 15:36:05 +0100691size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100692{
693 const BYTE* ip = (const BYTE*)src;
694 const BYTE* iend = ip + srcSize;
695 BYTE* const ostart = (BYTE* const)dst;
696 BYTE* op = ostart;
697 BYTE* const oend = ostart + maxDstSize;
698 size_t remainingSize = srcSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100699 blockProperties_t blockProperties;
700
Yann Collet5b78d2f2015-11-12 15:36:05 +0100701
Yann Colletb2549842015-11-18 11:29:32 +0100702 /* init */
703 ctx->base = ctx->vBase = ctx->dictEnd = dst;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100704
Yann Collet5be2dd22015-11-11 13:43:58 +0100705 /* Frame Header */
Yann Collet88fcd292015-11-25 14:42:45 +0100706 {
707 size_t frameHeaderSize;
708 if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Collet5be2dd22015-11-11 13:43:58 +0100709#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
Yann Collet88fcd292015-11-25 14:42:45 +0100710 {
711 const U32 magicNumber = MEM_readLE32(src);
712 if (ZSTD_isLegacy(magicNumber))
713 return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
714 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100715#endif
Yann Collet88fcd292015-11-25 14:42:45 +0100716 frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
717 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
718 if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
719 ip += frameHeaderSize; remainingSize -= frameHeaderSize;
720 frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
721 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
722 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100723
724 /* Loop on each block */
725 while (1)
726 {
727 size_t decodedSize=0;
728 size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
729 if (ZSTD_isError(cBlockSize)) return cBlockSize;
730
731 ip += ZSTD_blockHeaderSize;
732 remainingSize -= ZSTD_blockHeaderSize;
733 if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
734
735 switch(blockProperties.blockType)
736 {
737 case bt_compressed:
738 decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
739 break;
740 case bt_raw :
Yann Collet0f366c62015-11-12 16:19:30 +0100741 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100742 break;
743 case bt_rle :
744 return ERROR(GENERIC); /* not yet supported */
745 break;
746 case bt_end :
747 /* end of frame */
748 if (remainingSize) return ERROR(srcSize_wrong);
749 break;
750 default:
751 return ERROR(GENERIC); /* impossible */
752 }
753 if (cBlockSize == 0) break; /* bt_end */
754
755 if (ZSTD_isError(decodedSize)) return decodedSize;
756 op += decodedSize;
757 ip += cBlockSize;
758 remainingSize -= cBlockSize;
759 }
760
761 return op-ostart;
762}
763
764size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
765{
766 ZSTD_DCtx ctx;
Yann Collet5be2dd22015-11-11 13:43:58 +0100767 return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
768}
769
770
771/* ******************************
772* Streaming Decompression API
773********************************/
Yann Collet5be2dd22015-11-11 13:43:58 +0100774size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
775{
776 return dctx->expected;
777}
778
779size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
780{
781 /* Sanity check */
782 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
Yann Collet88fcd292015-11-25 14:42:45 +0100783 if (dst != ctx->previousDstEnd) /* not contiguous */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100784 {
Yann Colletb2549842015-11-18 11:29:32 +0100785 ctx->dictEnd = ctx->previousDstEnd;
786 if ((dst > ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment right into tracked memory */
787 ctx->base = (char*)dst + maxDstSize; /* temporary affectation, for vBase calculation */
788 ctx->vBase = (char*)dst - ((char*)(ctx->dictEnd) - (char*)(ctx->base));
789 ctx->base = dst;
790 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100791
Yann Collet88fcd292015-11-25 14:42:45 +0100792 /* Decompress : frame header; part 1 */
793 switch (ctx->stage)
Yann Collet5be2dd22015-11-11 13:43:58 +0100794 {
Yann Collet88fcd292015-11-25 14:42:45 +0100795 case ZSTDds_getFrameHeaderSize :
Yann Collet5be2dd22015-11-11 13:43:58 +0100796 {
Yann Collet88fcd292015-11-25 14:42:45 +0100797 /* get frame header size */
798 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */
799 ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
Yann Collete4fdad52015-11-25 21:09:17 +0100800 if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
Yann Collet88fcd292015-11-25 14:42:45 +0100801 memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
802 if (ctx->headerSize > ZSTD_frameHeaderSize_min)
803 {
804 ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min;
805 ctx->stage = ZSTDds_decodeFrameHeader;
806 return 0;
807 }
808 ctx->expected = 0; /* not necessary to copy more */
Yann Collet5be2dd22015-11-11 13:43:58 +0100809 }
Yann Collet88fcd292015-11-25 14:42:45 +0100810 case ZSTDds_decodeFrameHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100811 {
Yann Collet88fcd292015-11-25 14:42:45 +0100812 /* get frame header */
813 size_t result;
814 memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected);
815 result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
816 if (ZSTD_isError(result)) return result;
817 ctx->expected = ZSTD_blockHeaderSize;
818 ctx->stage = ZSTDds_decodeBlockHeader;
819 return 0;
Yann Collet5be2dd22015-11-11 13:43:58 +0100820 }
Yann Collet88fcd292015-11-25 14:42:45 +0100821 case ZSTDds_decodeBlockHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100822 {
Yann Collet88fcd292015-11-25 14:42:45 +0100823 /* Decode block header */
824 blockProperties_t bp;
825 size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
826 if (ZSTD_isError(blockSize)) return blockSize;
827 if (bp.blockType == bt_end)
828 {
829 ctx->expected = 0;
830 ctx->stage = ZSTDds_getFrameHeaderSize;
831 }
832 else
833 {
834 ctx->expected = blockSize;
835 ctx->bType = bp.blockType;
836 ctx->stage = ZSTDds_decompressBlock;
837 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100838
Yann Collet88fcd292015-11-25 14:42:45 +0100839 ctx->previousDstEnd = dst;
840 return 0;
841 }
842 case 3:
843 {
844 /* Decompress : block content */
845 size_t rSize;
846 switch(ctx->bType)
847 {
848 case bt_compressed:
849 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
850 break;
851 case bt_raw :
852 rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
853 break;
854 case bt_rle :
855 return ERROR(GENERIC); /* not yet handled */
856 break;
857 case bt_end : /* should never happen (filtered at phase 1) */
858 rSize = 0;
859 break;
860 default:
861 return ERROR(GENERIC);
862 }
863 ctx->stage = ZSTDds_decodeBlockHeader;
864 ctx->expected = ZSTD_blockHeaderSize;
865 ctx->previousDstEnd = (char*)dst + rSize;
866 return rSize;
867 }
868 default:
869 return ERROR(GENERIC); /* impossible */
870 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100871}
872
873