blob: 66de4bc99de808401dffdfb03584f5f79ece04f7 [file] [log] [blame]
Yann Collet5be2dd22015-11-11 13:43:58 +01001/*
2 zstd - standard compression library
3 Copyright (C) 2014-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 You can contact the author at :
29 - zstd source repository : https://github.com/Cyan4973/zstd
30 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31*/
32
33/* ***************************************************************
34* Tuning parameters
35*****************************************************************/
36/*!
Yann Collet5be2dd22015-11-11 13:43:58 +010037 * HEAPMODE :
Yann Colletb3a2af92015-11-19 17:13:19 +010038 * Select how default compression functions will allocate memory for their hash table,
Yann Collet5be2dd22015-11-11 13:43:58 +010039 * in memory stack (0, fastest), or in memory heap (1, requires malloc())
Yann Colletb3a2af92015-11-19 17:13:19 +010040 * Note that compression context is fairly large, as a consequence heap memory is recommended.
Yann Collet5be2dd22015-11-11 13:43:58 +010041 */
42#ifndef ZSTD_HEAPMODE
43# define ZSTD_HEAPMODE 1
44#endif /* ZSTD_HEAPMODE */
45
46/*!
47* LEGACY_SUPPORT :
Yann Collet14983e72015-11-11 21:38:21 +010048* ZSTD_decompress() can decode older formats (starting from zstd 0.1+)
Yann Collet5be2dd22015-11-11 13:43:58 +010049*/
50#ifndef ZSTD_LEGACY_SUPPORT
51# define ZSTD_LEGACY_SUPPORT 1
52#endif
53
54
55/* *******************************************************
56* Includes
57*********************************************************/
58#include <stdlib.h> /* calloc */
59#include <string.h> /* memcpy, memmove */
60#include <stdio.h> /* debug : printf */
61#include "mem.h" /* low level memory routines */
62#include "zstd_static.h"
63#include "zstd_internal.h"
64#include "fse_static.h"
65#include "huff0.h"
66
67#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
68# include "zstd_legacy.h"
69#endif
70
Yann Collet5be2dd22015-11-11 13:43:58 +010071/* *******************************************************
72* Compiler specifics
73*********************************************************/
Yann Collet5be2dd22015-11-11 13:43:58 +010074#ifdef _MSC_VER /* Visual Studio */
75# define FORCE_INLINE static __forceinline
76# include <intrin.h> /* For Visual 2005 */
77# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
78# pragma warning(disable : 4324) /* disable: C4324: padded structure */
79#else
80# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
81# ifdef __GNUC__
82# define FORCE_INLINE static inline __attribute__((always_inline))
83# else
84# define FORCE_INLINE static inline
85# endif
86#endif
87
88
Yann Collet14983e72015-11-11 21:38:21 +010089/* *************************************
90* Local types
91***************************************/
92typedef struct
93{
94 blockType_t blockType;
95 U32 origSize;
96} blockProperties_t;
Yann Collet5be2dd22015-11-11 13:43:58 +010097
98
99/* *******************************************************
100* Memory operations
101**********************************************************/
102static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
103
104
Yann Collet5be2dd22015-11-11 13:43:58 +0100105/* *************************************
106* Error Management
107***************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100108unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
109
Yann Collet5be2dd22015-11-11 13:43:58 +0100110/*! ZSTD_isError
111* tells if a return value is an error code */
112unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
113
114/*! ZSTD_getErrorName
115* provides error code string (useful for debugging) */
116const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
117
118
Yann Collet5be2dd22015-11-11 13:43:58 +0100119/* *************************************************************
Yann Collet5b78d2f2015-11-12 15:36:05 +0100120* Context management
Yann Collet5be2dd22015-11-11 13:43:58 +0100121***************************************************************/
Yann Collete4fdad52015-11-25 21:09:17 +0100122typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
Yann Collet88fcd292015-11-25 14:42:45 +0100123 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
124
Yann Collet5be2dd22015-11-11 13:43:58 +0100125struct ZSTD_DCtx_s
126{
127 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
128 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
129 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
130 void* previousDstEnd;
131 void* base;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100132 void* vBase;
133 void* dictEnd;
Yann Collet5be2dd22015-11-11 13:43:58 +0100134 size_t expected;
Yann Collet88fcd292015-11-25 14:42:45 +0100135 size_t headerSize;
136 ZSTD_parameters params;
Yann Collet5be2dd22015-11-11 13:43:58 +0100137 blockType_t bType;
Yann Collet88fcd292015-11-25 14:42:45 +0100138 ZSTD_dStage stage;
Yann Collet5be2dd22015-11-11 13:43:58 +0100139 const BYTE* litPtr;
140 size_t litBufSize;
141 size_t litSize;
142 BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
Yann Collet88fcd292015-11-25 14:42:45 +0100143 BYTE headerBuffer[ZSTD_frameHeaderSize_max];
Yann Collet5be2dd22015-11-11 13:43:58 +0100144}; /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
145
Yann Collet5b78d2f2015-11-12 15:36:05 +0100146size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
147{
Yann Collet88fcd292015-11-25 14:42:45 +0100148 dctx->expected = ZSTD_frameHeaderSize_min;
149 dctx->stage = ZSTDds_getFrameHeaderSize;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100150 dctx->previousDstEnd = NULL;
151 dctx->base = NULL;
152 dctx->vBase = NULL;
153 dctx->dictEnd = NULL;
154 return 0;
155}
156
157ZSTD_DCtx* ZSTD_createDCtx(void)
158{
159 ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
160 if (dctx==NULL) return NULL;
161 ZSTD_resetDCtx(dctx);
162 return dctx;
163}
164
165size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
166{
167 free(dctx);
168 return 0;
169}
170
171
172/* *************************************************************
173* Decompression section
174***************************************************************/
Yann Collet88fcd292015-11-25 14:42:45 +0100175/** ZSTD_decodeFrameHeader_Part1
176* decode the 1st part of the Frame Header, which tells Frame Header size.
177* srcSize must be == ZSTD_frameHeaderSize_min
178* @return : the full size of the Frame Header */
179static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
180{
181 U32 magicNumber;
182 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
183 magicNumber = MEM_readLE32(src);
184 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
185 zc->headerSize = ZSTD_frameHeaderSize_min;
186 return zc->headerSize;
187}
188
Yann Collet88fcd292015-11-25 14:42:45 +0100189
190size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
191{
192 U32 magicNumber;
193 if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
194 magicNumber = MEM_readLE32(src);
195 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
196 memset(params, 0, sizeof(*params));
Yann Collet26415d32015-11-26 12:43:28 +0100197 params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
Yann Collet88fcd292015-11-25 14:42:45 +0100198 return 0;
199}
200
Yann Collet26415d32015-11-26 12:43:28 +0100201/** ZSTD_decodeFrameHeader_Part2
202* decode the full Frame Header
203* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
204* @return : 0, or an error code, which can be tested using ZSTD_isError() */
205static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
206{
Yann Collet00fd7a22015-11-28 16:03:22 +0100207 size_t result;
Yann Collet26415d32015-11-26 12:43:28 +0100208 if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
Yann Collet00fd7a22015-11-28 16:03:22 +0100209 result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
210 if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bitsImplementation);
211 return result;
Yann Collet26415d32015-11-26 12:43:28 +0100212}
213
Yann Collet5be2dd22015-11-11 13:43:58 +0100214
215size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
216{
217 const BYTE* const in = (const BYTE* const)src;
218 BYTE headerFlags;
219 U32 cSize;
220
221 if (srcSize < 3) return ERROR(srcSize_wrong);
222
223 headerFlags = *in;
224 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
225
226 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
227 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
228
229 if (bpPtr->blockType == bt_end) return 0;
230 if (bpPtr->blockType == bt_rle) return 1;
231 return cSize;
232}
233
Yann Collet0f366c62015-11-12 16:19:30 +0100234static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100235{
236 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
237 memcpy(dst, src, srcSize);
238 return srcSize;
239}
240
241
242/** ZSTD_decompressLiterals
243 @return : nb of bytes read from src, or an error code*/
244static size_t ZSTD_decompressLiterals(void* dst, size_t* maxDstSizePtr,
245 const void* src, size_t srcSize)
246{
247 const BYTE* ip = (const BYTE*)src;
248
249 const size_t litSize = (MEM_readLE32(src) & 0x1FFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
250 const size_t litCSize = (MEM_readLE32(ip+2) & 0xFFFFFF) >> 5; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
251
252 if (litSize > *maxDstSizePtr) return ERROR(corruption_detected);
253 if (litCSize + 5 > srcSize) return ERROR(corruption_detected);
254
255 if (HUF_isError(HUF_decompress(dst, litSize, ip+5, litCSize))) return ERROR(corruption_detected);
256
257 *maxDstSizePtr = litSize;
258 return litCSize + 5;
259}
260
261
262/** ZSTD_decodeLiteralsBlock
Yann Collet14983e72015-11-11 21:38:21 +0100263 @return : nb of bytes read from src (< srcSize ) */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100264size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100265 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
266{
Yann Collet5be2dd22015-11-11 13:43:58 +0100267 const BYTE* const istart = (const BYTE*) src;
268
269 /* any compressed block with literals segment must be at least this size */
270 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
271
272 switch(*istart & 3)
273 {
274 /* compressed */
275 case 0:
276 {
277 size_t litSize = BLOCKSIZE;
278 const size_t readSize = ZSTD_decompressLiterals(dctx->litBuffer, &litSize, src, srcSize);
279 dctx->litPtr = dctx->litBuffer;
280 dctx->litBufSize = BLOCKSIZE+8;
281 dctx->litSize = litSize;
282 return readSize; /* works if it's an error too */
283 }
284 case IS_RAW:
285 {
286 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
287 if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
288 {
289 if (litSize > srcSize-3) return ERROR(corruption_detected);
290 memcpy(dctx->litBuffer, istart, litSize);
291 dctx->litPtr = dctx->litBuffer;
292 dctx->litBufSize = BLOCKSIZE+8;
293 dctx->litSize = litSize;
294 return litSize+3;
295 }
296 /* direct reference into compressed stream */
297 dctx->litPtr = istart+3;
298 dctx->litBufSize = srcSize-3;
299 dctx->litSize = litSize;
300 return litSize+3; }
301 case IS_RLE:
302 {
303 const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
304 if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
305 memset(dctx->litBuffer, istart[3], litSize);
306 dctx->litPtr = dctx->litBuffer;
307 dctx->litBufSize = BLOCKSIZE+8;
308 dctx->litSize = litSize;
309 return 4;
310 }
311 default:
312 return ERROR(corruption_detected); /* forbidden nominal case */
313 }
314}
315
316
317size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
318 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
319 const void* src, size_t srcSize)
320{
321 const BYTE* const istart = (const BYTE* const)src;
322 const BYTE* ip = istart;
323 const BYTE* const iend = istart + srcSize;
324 U32 LLtype, Offtype, MLtype;
325 U32 LLlog, Offlog, MLlog;
326 size_t dumpsLength;
327
328 /* check */
329 if (srcSize < 5) return ERROR(srcSize_wrong);
330
331 /* SeqHead */
332 *nbSeq = MEM_readLE16(ip); ip+=2;
333 LLtype = *ip >> 6;
334 Offtype = (*ip >> 4) & 3;
335 MLtype = (*ip >> 2) & 3;
336 if (*ip & 2)
337 {
338 dumpsLength = ip[2];
339 dumpsLength += ip[1] << 8;
340 ip += 3;
341 }
342 else
343 {
344 dumpsLength = ip[1];
345 dumpsLength += (ip[0] & 1) << 8;
346 ip += 2;
347 }
348 *dumpsPtr = ip;
349 ip += dumpsLength;
350 *dumpsLengthPtr = dumpsLength;
351
352 /* check */
353 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
354
355 /* sequences */
356 {
Yann Collet82368cf2015-11-16 19:10:56 +0100357 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */
Yann Collet5be2dd22015-11-11 13:43:58 +0100358 size_t headerSize;
359
360 /* Build DTables */
361 switch(LLtype)
362 {
363 U32 max;
364 case bt_rle :
365 LLlog = 0;
366 FSE_buildDTable_rle(DTableLL, *ip++); break;
367 case bt_raw :
368 LLlog = LLbits;
369 FSE_buildDTable_raw(DTableLL, LLbits); break;
370 default :
371 max = MaxLL;
372 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
373 if (FSE_isError(headerSize)) return ERROR(GENERIC);
374 if (LLlog > LLFSELog) return ERROR(corruption_detected);
375 ip += headerSize;
376 FSE_buildDTable(DTableLL, norm, max, LLlog);
377 }
378
379 switch(Offtype)
380 {
381 U32 max;
382 case bt_rle :
383 Offlog = 0;
384 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
385 FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
386 break;
387 case bt_raw :
388 Offlog = Offbits;
389 FSE_buildDTable_raw(DTableOffb, Offbits); break;
390 default :
391 max = MaxOff;
392 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
393 if (FSE_isError(headerSize)) return ERROR(GENERIC);
394 if (Offlog > OffFSELog) return ERROR(corruption_detected);
395 ip += headerSize;
396 FSE_buildDTable(DTableOffb, norm, max, Offlog);
397 }
398
399 switch(MLtype)
400 {
401 U32 max;
402 case bt_rle :
403 MLlog = 0;
404 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
405 FSE_buildDTable_rle(DTableML, *ip++); break;
406 case bt_raw :
407 MLlog = MLbits;
408 FSE_buildDTable_raw(DTableML, MLbits); break;
409 default :
410 max = MaxML;
411 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
412 if (FSE_isError(headerSize)) return ERROR(GENERIC);
413 if (MLlog > MLFSELog) return ERROR(corruption_detected);
414 ip += headerSize;
415 FSE_buildDTable(DTableML, norm, max, MLlog);
416 }
417 }
418
419 return ip-istart;
420}
421
422
423typedef struct {
424 size_t litLength;
425 size_t offset;
426 size_t matchLength;
427} seq_t;
428
429typedef struct {
430 BIT_DStream_t DStream;
431 FSE_DState_t stateLL;
432 FSE_DState_t stateOffb;
433 FSE_DState_t stateML;
434 size_t prevOffset;
435 const BYTE* dumps;
436 const BYTE* dumpsEnd;
437} seqState_t;
438
439
440static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
441{
442 size_t litLength;
443 size_t prevOffset;
444 size_t offset;
445 size_t matchLength;
446 const BYTE* dumps = seqState->dumps;
447 const BYTE* const de = seqState->dumpsEnd;
448
449 /* Literal length */
450 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
Yann Collete4fdad52015-11-25 21:09:17 +0100451 prevOffset = litLength ? seq->offset : seqState->prevOffset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100452 if (litLength == MaxLL)
453 {
454 U32 add = *dumps++;
455 if (add < 255) litLength += add;
456 else
457 {
458 litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
459 dumps += 3;
460 }
461 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
462 }
463
464 /* Offset */
465 {
466 static const U32 offsetPrefix[MaxOff+1] = {
467 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
468 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
469 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
470 U32 offsetCode, nbBits;
471 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* <= maxOff, by table construction */
472 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
473 nbBits = offsetCode - 1;
474 if (offsetCode==0) nbBits = 0; /* cmove */
475 offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
476 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
477 if (offsetCode==0) offset = prevOffset; /* cmove */
Yann Collet55aa7f92015-11-20 12:04:52 +0100478 if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */
Yann Collet5be2dd22015-11-11 13:43:58 +0100479 }
480
481 /* MatchLength */
482 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
483 if (matchLength == MaxML)
484 {
485 U32 add = *dumps++;
486 if (add < 255) matchLength += add;
487 else
488 {
489 matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
490 dumps += 3;
491 }
492 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
493 }
494 matchLength += MINMATCH;
495
496 /* save result */
497 seq->litLength = litLength;
498 seq->offset = offset;
499 seq->matchLength = matchLength;
500 seqState->dumps = dumps;
501}
502
503
Yann Collet5b78d2f2015-11-12 15:36:05 +0100504FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
Yann Colletb3a2af92015-11-19 17:13:19 +0100505 BYTE* const oend, seq_t sequence,
Yann Collet5be2dd22015-11-11 13:43:58 +0100506 const BYTE** litPtr, const BYTE* const litLimit_8,
Yann Colletb3a2af92015-11-19 17:13:19 +0100507 BYTE* const base, BYTE* const vBase, BYTE* const dictEnd)
Yann Collet5be2dd22015-11-11 13:43:58 +0100508{
Yann Colletb3a2af92015-11-19 17:13:19 +0100509 static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
510 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100511 BYTE* const oLitEnd = op + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100512 const size_t sequenceLength = sequence.litLength + sequence.matchLength;
513 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100514 BYTE* const oend_8 = oend-8;
515 const BYTE* const litEnd = *litPtr + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100516 const BYTE* match = oLitEnd - sequence.offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100517
518 /* check */
519 if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
520 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
521 if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
522
523 /* copy Literals */
524 ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
525 op = oLitEnd;
526 *litPtr = litEnd; /* update for next sequence */
527
528 /* copy Match */
Yann Colletb3a2af92015-11-19 17:13:19 +0100529 /* check */
530 //if (match > oLitEnd) return ERROR(corruption_detected); /* address space overflow test (is clang optimizer wrongly removing this test ?) */
531 if (sequence.offset > (size_t)oLitEnd) return ERROR(corruption_detected); /* address space overflow test (this test seems preserved by clang optimizer) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100532
Yann Colletb3a2af92015-11-19 17:13:19 +0100533 if (match < base)
534 {
535 /* offset beyond prefix */
536 if (match < vBase) return ERROR(corruption_detected);
537 match = dictEnd - (base-match);
538 if (match + sequence.matchLength <= dictEnd)
539 {
540 memcpy(oLitEnd, match, sequence.matchLength);
541 return sequenceLength;
542 }
543 /* span extDict & currentPrefixSegment */
544 {
545 size_t length1 = dictEnd - match;
546 memcpy(oLitEnd, match, length1);
547 op = oLitEnd + length1;
548 sequence.matchLength -= length1;
549 match = base;
550 }
551 }
Yann Collet0f366c62015-11-12 16:19:30 +0100552
Yann Colletb3a2af92015-11-19 17:13:19 +0100553 /* match within prefix */
554 if (sequence.offset < 8)
555 {
556 /* close range match, overlap */
557 const int sub2 = dec64table[sequence.offset];
558 op[0] = match[0];
559 op[1] = match[1];
560 op[2] = match[2];
561 op[3] = match[3];
562 match += dec32table[sequence.offset];
563 ZSTD_copy4(op+4, match);
564 match -= sub2;
565 }
566 else
567 {
568 ZSTD_copy8(op, match);
569 }
570 op += 8; match += 8;
Yann Collet5be2dd22015-11-11 13:43:58 +0100571
Yann Colletb3a2af92015-11-19 17:13:19 +0100572 if (oMatchEnd > oend-12)
573 {
574 if (op < oend_8)
575 {
576 ZSTD_wildcopy(op, match, oend_8 - op);
577 match += oend_8 - op;
578 op = oend_8;
579 }
580 while (op < oMatchEnd) *op++ = *match++;
581 }
582 else
583 {
584 ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
585 }
586 return sequenceLength;
Yann Collet5be2dd22015-11-11 13:43:58 +0100587}
588
Yann Colletb3a2af92015-11-19 17:13:19 +0100589
Yann Collet5be2dd22015-11-11 13:43:58 +0100590static size_t ZSTD_decompressSequences(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100591 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100592 void* dst, size_t maxDstSize,
593 const void* seqStart, size_t seqSize)
594{
Yann Collet5be2dd22015-11-11 13:43:58 +0100595 const BYTE* ip = (const BYTE*)seqStart;
596 const BYTE* const iend = ip + seqSize;
597 BYTE* const ostart = (BYTE* const)dst;
598 BYTE* op = ostart;
599 BYTE* const oend = ostart + maxDstSize;
600 size_t errorCode, dumpsLength;
601 const BYTE* litPtr = dctx->litPtr;
602 const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
603 const BYTE* const litEnd = litPtr + dctx->litSize;
604 int nbSeq;
605 const BYTE* dumps;
606 U32* DTableLL = dctx->LLTable;
607 U32* DTableML = dctx->MLTable;
608 U32* DTableOffb = dctx->OffTable;
609 BYTE* const base = (BYTE*) (dctx->base);
Yann Collet5b78d2f2015-11-12 15:36:05 +0100610 BYTE* const vBase = (BYTE*) (dctx->vBase);
611 BYTE* const dictEnd = (BYTE*) (dctx->dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100612
613 /* Build Decoding Tables */
614 errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
615 DTableLL, DTableML, DTableOffb,
616 ip, iend-ip);
617 if (ZSTD_isError(errorCode)) return errorCode;
618 ip += errorCode;
619
620 /* Regen sequences */
621 {
622 seq_t sequence;
623 seqState_t seqState;
624
625 memset(&sequence, 0, sizeof(sequence));
626 sequence.offset = 4;
627 seqState.dumps = dumps;
628 seqState.dumpsEnd = dumps + dumpsLength;
629 seqState.prevOffset = 4;
630 errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
631 if (ERR_isError(errorCode)) return ERROR(corruption_detected);
632 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
633 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
634 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
635
Yann Colletb3a2af92015-11-19 17:13:19 +0100636 for ( ; (BIT_reloadDStream(&(seqState.DStream)) < BIT_DStream_completed) ; )
Yann Collet5be2dd22015-11-11 13:43:58 +0100637 {
638 size_t oneSeqSize;
639 nbSeq--;
640 ZSTD_decodeSequence(&sequence, &seqState);
Yann Colletb3a2af92015-11-19 17:13:19 +0100641 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
642 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
643 op += oneSeqSize;
644 }
645
646 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
647
648 /* now BIT_reloadDStream(&(seqState.DStream)) >= BIT_DStream_completed) */
649 for ( ; (BIT_reloadDStream(&(seqState.DStream)) == BIT_DStream_completed) && nbSeq ; )
650 {
651 size_t oneSeqSize;
652 nbSeq--;
653 ZSTD_decodeSequence(&sequence, &seqState);
654 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100655 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
656 op += oneSeqSize;
657 }
658
659 /* check if reached exact end */
Yann Colletb3a2af92015-11-19 17:13:19 +0100660 if ( !BIT_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* DStream should be entirely and precisely consumed; otherwise data is corrupted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100661
662 /* last literal segment */
663 {
664 size_t lastLLSize = litEnd - litPtr;
665 if (litPtr > litEnd) return ERROR(corruption_detected);
666 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
667 if (op != litPtr) memcpy(op, litPtr, lastLLSize);
668 op += lastLLSize;
669 }
670 }
671
672 return op-ostart;
673}
674
675
676static size_t ZSTD_decompressBlock(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100677 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100678 void* dst, size_t maxDstSize,
679 const void* src, size_t srcSize)
680{
681 /* blockType == blockCompressed */
682 const BYTE* ip = (const BYTE*)src;
683
684 /* Decode literals sub-block */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100685 size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100686 if (ZSTD_isError(litCSize)) return litCSize;
687 ip += litCSize;
688 srcSize -= litCSize;
689
Yann Collet5b78d2f2015-11-12 15:36:05 +0100690 return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100691}
692
693
Yann Collet5b78d2f2015-11-12 15:36:05 +0100694size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100695{
696 const BYTE* ip = (const BYTE*)src;
697 const BYTE* iend = ip + srcSize;
698 BYTE* const ostart = (BYTE* const)dst;
699 BYTE* op = ostart;
700 BYTE* const oend = ostart + maxDstSize;
701 size_t remainingSize = srcSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100702 blockProperties_t blockProperties;
703
Yann Collet5b78d2f2015-11-12 15:36:05 +0100704
Yann Colletb2549842015-11-18 11:29:32 +0100705 /* init */
706 ctx->base = ctx->vBase = ctx->dictEnd = dst;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100707
Yann Collet5be2dd22015-11-11 13:43:58 +0100708 /* Frame Header */
Yann Collet88fcd292015-11-25 14:42:45 +0100709 {
710 size_t frameHeaderSize;
711 if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Collet5be2dd22015-11-11 13:43:58 +0100712#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
Yann Collet88fcd292015-11-25 14:42:45 +0100713 {
714 const U32 magicNumber = MEM_readLE32(src);
715 if (ZSTD_isLegacy(magicNumber))
716 return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
717 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100718#endif
Yann Collet88fcd292015-11-25 14:42:45 +0100719 frameHeaderSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
720 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
721 if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
722 ip += frameHeaderSize; remainingSize -= frameHeaderSize;
723 frameHeaderSize = ZSTD_decodeFrameHeader_Part2(ctx, src, frameHeaderSize);
724 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
725 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100726
727 /* Loop on each block */
728 while (1)
729 {
730 size_t decodedSize=0;
731 size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
732 if (ZSTD_isError(cBlockSize)) return cBlockSize;
733
734 ip += ZSTD_blockHeaderSize;
735 remainingSize -= ZSTD_blockHeaderSize;
736 if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
737
738 switch(blockProperties.blockType)
739 {
740 case bt_compressed:
741 decodedSize = ZSTD_decompressBlock(ctx, op, oend-op, ip, cBlockSize);
742 break;
743 case bt_raw :
Yann Collet0f366c62015-11-12 16:19:30 +0100744 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100745 break;
746 case bt_rle :
747 return ERROR(GENERIC); /* not yet supported */
748 break;
749 case bt_end :
750 /* end of frame */
751 if (remainingSize) return ERROR(srcSize_wrong);
752 break;
753 default:
754 return ERROR(GENERIC); /* impossible */
755 }
756 if (cBlockSize == 0) break; /* bt_end */
757
758 if (ZSTD_isError(decodedSize)) return decodedSize;
759 op += decodedSize;
760 ip += cBlockSize;
761 remainingSize -= cBlockSize;
762 }
763
764 return op-ostart;
765}
766
767size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
768{
769 ZSTD_DCtx ctx;
Yann Collet5be2dd22015-11-11 13:43:58 +0100770 return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
771}
772
773
774/* ******************************
775* Streaming Decompression API
776********************************/
Yann Collet5be2dd22015-11-11 13:43:58 +0100777size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
778{
779 return dctx->expected;
780}
781
782size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
783{
784 /* Sanity check */
785 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
Yann Collet88fcd292015-11-25 14:42:45 +0100786 if (dst != ctx->previousDstEnd) /* not contiguous */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100787 {
Yann Colletad50c592015-11-28 17:09:28 +0100788 if (((char*)dst + maxDstSize > (char*)ctx->base) && (dst < ctx->previousDstEnd)) /* rolling buffer : new segment into dictionary */
Yann Colletb2549842015-11-18 11:29:32 +0100789 ctx->base = (char*)dst + maxDstSize; /* temporary affectation, for vBase calculation */
Yann Colletad50c592015-11-28 17:09:28 +0100790 ctx->dictEnd = ctx->previousDstEnd;
791 ctx->vBase = (char*)dst - ((char*)(ctx->previousDstEnd) - (char*)(ctx->base));
Yann Colletb2549842015-11-18 11:29:32 +0100792 ctx->base = dst;
Yann Colletad50c592015-11-28 17:09:28 +0100793 ctx->previousDstEnd = dst;
Yann Colletb2549842015-11-18 11:29:32 +0100794 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100795
Yann Collet88fcd292015-11-25 14:42:45 +0100796 /* Decompress : frame header; part 1 */
797 switch (ctx->stage)
Yann Collet5be2dd22015-11-11 13:43:58 +0100798 {
Yann Collet88fcd292015-11-25 14:42:45 +0100799 case ZSTDds_getFrameHeaderSize :
Yann Collet5be2dd22015-11-11 13:43:58 +0100800 {
Yann Collet88fcd292015-11-25 14:42:45 +0100801 /* get frame header size */
802 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */
803 ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
Yann Collete4fdad52015-11-25 21:09:17 +0100804 if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
Yann Collet88fcd292015-11-25 14:42:45 +0100805 memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
806 if (ctx->headerSize > ZSTD_frameHeaderSize_min)
807 {
808 ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min;
809 ctx->stage = ZSTDds_decodeFrameHeader;
810 return 0;
811 }
812 ctx->expected = 0; /* not necessary to copy more */
Yann Collet5be2dd22015-11-11 13:43:58 +0100813 }
Yann Collet88fcd292015-11-25 14:42:45 +0100814 case ZSTDds_decodeFrameHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100815 {
Yann Collet88fcd292015-11-25 14:42:45 +0100816 /* get frame header */
817 size_t result;
818 memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected);
819 result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
820 if (ZSTD_isError(result)) return result;
821 ctx->expected = ZSTD_blockHeaderSize;
822 ctx->stage = ZSTDds_decodeBlockHeader;
823 return 0;
Yann Collet5be2dd22015-11-11 13:43:58 +0100824 }
Yann Collet88fcd292015-11-25 14:42:45 +0100825 case ZSTDds_decodeBlockHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +0100826 {
Yann Collet88fcd292015-11-25 14:42:45 +0100827 /* Decode block header */
828 blockProperties_t bp;
829 size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
830 if (ZSTD_isError(blockSize)) return blockSize;
831 if (bp.blockType == bt_end)
832 {
833 ctx->expected = 0;
834 ctx->stage = ZSTDds_getFrameHeaderSize;
835 }
836 else
837 {
838 ctx->expected = blockSize;
839 ctx->bType = bp.blockType;
840 ctx->stage = ZSTDds_decompressBlock;
841 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100842
Yann Collet88fcd292015-11-25 14:42:45 +0100843 return 0;
844 }
845 case 3:
846 {
847 /* Decompress : block content */
848 size_t rSize;
849 switch(ctx->bType)
850 {
851 case bt_compressed:
852 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
853 break;
854 case bt_raw :
855 rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
856 break;
857 case bt_rle :
858 return ERROR(GENERIC); /* not yet handled */
859 break;
860 case bt_end : /* should never happen (filtered at phase 1) */
861 rSize = 0;
862 break;
863 default:
864 return ERROR(GENERIC);
865 }
866 ctx->stage = ZSTDds_decodeBlockHeader;
867 ctx->expected = ZSTD_blockHeaderSize;
868 ctx->previousDstEnd = (char*)dst + rSize;
869 return rSize;
870 }
871 default:
872 return ERROR(GENERIC); /* impossible */
873 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100874}
875
876