blob: cdc7d3074ace01f00310390300fb0093d7e49d0c [file] [log] [blame]
Yann Collet5be2dd22015-11-11 13:43:58 +01001/*
2 zstd - standard compression library
3 Copyright (C) 2014-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 You can contact the author at :
29 - zstd source repository : https://github.com/Cyan4973/zstd
30 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31*/
32
33/* ***************************************************************
34* Tuning parameters
35*****************************************************************/
36/*!
Yann Collet5be2dd22015-11-11 13:43:58 +010037 * HEAPMODE :
Yann Collet3a3b72f2016-01-11 12:56:11 +010038 * Select how default decompression function ZSTD_decompress() will allocate memory,
39 * in memory stack (0), or in memory heap (1, requires malloc())
Yann Collet5be2dd22015-11-11 13:43:58 +010040 */
41#ifndef ZSTD_HEAPMODE
42# define ZSTD_HEAPMODE 1
Yann Collet3a3b72f2016-01-11 12:56:11 +010043#endif
Yann Collet5be2dd22015-11-11 13:43:58 +010044
45/*!
46* LEGACY_SUPPORT :
Yann Colletfba6aed2016-01-18 12:03:27 +010047* ZSTD_decompress() can decode older formats (v0.1+) if set to 1
Yann Collet5be2dd22015-11-11 13:43:58 +010048*/
49#ifndef ZSTD_LEGACY_SUPPORT
Yann Colletfba6aed2016-01-18 12:03:27 +010050# define ZSTD_LEGACY_SUPPORT 0
Yann Collet5be2dd22015-11-11 13:43:58 +010051#endif
52
53
Yann Colletfb810d62016-01-28 00:18:06 +010054/*-*******************************************************
Yann Collet5be2dd22015-11-11 13:43:58 +010055* Includes
56*********************************************************/
57#include <stdlib.h> /* calloc */
58#include <string.h> /* memcpy, memmove */
59#include <stdio.h> /* debug : printf */
60#include "mem.h" /* low level memory routines */
61#include "zstd_static.h"
62#include "zstd_internal.h"
63#include "fse_static.h"
Yann Colletafe07092016-01-25 04:10:46 +010064#include "huff0_static.h"
Yann Collet5be2dd22015-11-11 13:43:58 +010065
66#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
67# include "zstd_legacy.h"
68#endif
69
Yann Colletfb810d62016-01-28 00:18:06 +010070
71/*-*******************************************************
Yann Collet5be2dd22015-11-11 13:43:58 +010072* Compiler specifics
73*********************************************************/
Yann Collet5be2dd22015-11-11 13:43:58 +010074#ifdef _MSC_VER /* Visual Studio */
75# define FORCE_INLINE static __forceinline
76# include <intrin.h> /* For Visual 2005 */
77# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
78# pragma warning(disable : 4324) /* disable: C4324: padded structure */
79#else
80# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
81# ifdef __GNUC__
82# define FORCE_INLINE static inline __attribute__((always_inline))
83# else
84# define FORCE_INLINE static inline
85# endif
86#endif
87
88
Yann Colletfb810d62016-01-28 00:18:06 +010089/*-*************************************
Yann Collet14983e72015-11-11 21:38:21 +010090* Local types
91***************************************/
92typedef struct
93{
94 blockType_t blockType;
95 U32 origSize;
96} blockProperties_t;
Yann Collet5be2dd22015-11-11 13:43:58 +010097
98
99/* *******************************************************
100* Memory operations
101**********************************************************/
102static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
103
104
Yann Collet5be2dd22015-11-11 13:43:58 +0100105/* *************************************
106* Error Management
107***************************************/
Yann Collet14983e72015-11-11 21:38:21 +0100108unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
109
Yann Collet5be2dd22015-11-11 13:43:58 +0100110/*! ZSTD_isError
111* tells if a return value is an error code */
112unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
113
114/*! ZSTD_getErrorName
115* provides error code string (useful for debugging) */
116const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
117
118
Yann Collet5be2dd22015-11-11 13:43:58 +0100119/* *************************************************************
Yann Collet5b78d2f2015-11-12 15:36:05 +0100120* Context management
Yann Collet5be2dd22015-11-11 13:43:58 +0100121***************************************************************/
Yann Collete4fdad52015-11-25 21:09:17 +0100122typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
Yann Collet88fcd292015-11-25 14:42:45 +0100123 ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock } ZSTD_dStage;
124
Yann Collet5be2dd22015-11-11 13:43:58 +0100125struct ZSTD_DCtx_s
126{
Yann Colletfb810d62016-01-28 00:18:06 +0100127 FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
128 FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
129 FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
130 unsigned hufTableX4[HUF_DTABLE_SIZE(HufLog)];
Yann Collet417890c2015-12-04 17:16:37 +0100131 const void* previousDstEnd;
132 const void* base;
133 const void* vBase;
134 const void* dictEnd;
Yann Collet5be2dd22015-11-11 13:43:58 +0100135 size_t expected;
Yann Collet88fcd292015-11-25 14:42:45 +0100136 size_t headerSize;
137 ZSTD_parameters params;
Yann Colletfb810d62016-01-28 00:18:06 +0100138 blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
Yann Collet88fcd292015-11-25 14:42:45 +0100139 ZSTD_dStage stage;
Yann Colletfb810d62016-01-28 00:18:06 +0100140 U32 flagStaticTables;
Yann Collet5be2dd22015-11-11 13:43:58 +0100141 const BYTE* litPtr;
142 size_t litBufSize;
143 size_t litSize;
Yann Colletb923f652016-01-26 03:14:20 +0100144 BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
Yann Collet88fcd292015-11-25 14:42:45 +0100145 BYTE headerBuffer[ZSTD_frameHeaderSize_max];
Yann Collet417890c2015-12-04 17:16:37 +0100146}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
Yann Collet5be2dd22015-11-11 13:43:58 +0100147
Yann Colletfb810d62016-01-28 00:18:06 +0100148size_t sizeofDCtx (void) { return sizeof(ZSTD_DCtx); }
149
Yann Collet7b51a292016-01-26 15:58:49 +0100150size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
Yann Collet5b78d2f2015-11-12 15:36:05 +0100151{
Yann Collet88fcd292015-11-25 14:42:45 +0100152 dctx->expected = ZSTD_frameHeaderSize_min;
153 dctx->stage = ZSTDds_getFrameHeaderSize;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100154 dctx->previousDstEnd = NULL;
155 dctx->base = NULL;
156 dctx->vBase = NULL;
157 dctx->dictEnd = NULL;
Yann Colletb923f652016-01-26 03:14:20 +0100158 dctx->hufTableX4[0] = HufLog;
Yann Colletfb810d62016-01-28 00:18:06 +0100159 dctx->flagStaticTables = 0;
Yann Collet5b78d2f2015-11-12 15:36:05 +0100160 return 0;
161}
162
163ZSTD_DCtx* ZSTD_createDCtx(void)
164{
165 ZSTD_DCtx* dctx = (ZSTD_DCtx*)malloc(sizeof(ZSTD_DCtx));
166 if (dctx==NULL) return NULL;
Yann Collet7b51a292016-01-26 15:58:49 +0100167 ZSTD_decompressBegin(dctx);
Yann Collet5b78d2f2015-11-12 15:36:05 +0100168 return dctx;
169}
170
171size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
172{
173 free(dctx);
174 return 0;
175}
176
Yann Collet7b51a292016-01-26 15:58:49 +0100177void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
178{
179 memcpy(dstDCtx, srcDCtx,
180 sizeof(ZSTD_DCtx) - (BLOCKSIZE+WILDCOPY_OVERLENGTH + ZSTD_frameHeaderSize_max)); /* no need to copy workspace */
181}
182
Yann Collet5b78d2f2015-11-12 15:36:05 +0100183
184/* *************************************************************
185* Decompression section
186***************************************************************/
Yann Collet59d1f792016-01-23 19:28:41 +0100187
188/* Frame format description
189 Frame Header - [ Block Header - Block ] - Frame End
190 1) Frame Header
191 - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_internal.h)
192 - 1 byte - Window Descriptor
193 2) Block Header
194 - 3 bytes, starting with a 2-bits descriptor
195 Uncompressed, Compressed, Frame End, unused
196 3) Block
197 See Block Format Description
198 4) Frame End
199 - 3 bytes, compatible with Block Header
200*/
201
202/* Block format description
Yann Colletfb810d62016-01-28 00:18:06 +0100203
204 Block = Literal Section - Sequences Section
205 Prerequisite : size of (compressed) block, maximum size of regenerated data
206
Yann Collet59d1f792016-01-23 19:28:41 +0100207 1) Literal Section
Yann Colletfb810d62016-01-28 00:18:06 +0100208
209 1.1) Header : 1-5 bytes
210 flags: 2 bits
Yann Collet59d1f792016-01-23 19:28:41 +0100211 00 compressed by Huff0
Yann Colletfb810d62016-01-28 00:18:06 +0100212 01 unused
213 10 is Raw (uncompressed)
214 11 is Rle
215 Note : using 01 => Huff0 with precomputed table ?
Yann Collet59d1f792016-01-23 19:28:41 +0100216 Note : delta map ? => compressed ?
Yann Colletfb810d62016-01-28 00:18:06 +0100217
218 1.1.1) Huff0-compressed literal block : 3-5 bytes
219 srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
220 srcSize < 1 KB => 3 bytes (2-2-10-10)
221 srcSize < 16KB => 4 bytes (2-2-14-14)
222 else => 5 bytes (2-2-18-18)
223 big endian convention
224
225 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes
226 size : 5 bits: (IS_RAW<<6) + (0<<4) + size
227 12 bits: (IS_RAW<<6) + (2<<4) + (size>>8)
228 size&255
229 20 bits: (IS_RAW<<6) + (3<<4) + (size>>16)
230 size>>8&255
231 size&255
232
233 1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes
234 size : 5 bits: (IS_RLE<<6) + (0<<4) + size
235 12 bits: (IS_RLE<<6) + (2<<4) + (size>>8)
236 size&255
237 20 bits: (IS_RLE<<6) + (3<<4) + (size>>16)
238 size>>8&255
239 size&255
240
241 1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes
242 srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream
243 srcSize < 1 KB => 3 bytes (2-2-10-10)
244 srcSize < 16KB => 4 bytes (2-2-14-14)
245 else => 5 bytes (2-2-18-18)
246 big endian convention
247
248 1- CTable available (stored into workspace ?)
249 2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
250
251
252 1.2) Literal block content
Yann Collet59d1f792016-01-23 19:28:41 +0100253
254 1.2.1) Huff0 block, using sizes from header
255 See Huff0 format
256
Yann Colletfb810d62016-01-28 00:18:06 +0100257 1.2.2) Huff0 block, using prepared table
Yann Collet59d1f792016-01-23 19:28:41 +0100258
Yann Colletfb810d62016-01-28 00:18:06 +0100259 1.2.3) Raw content
260
261 1.2.4) single byte
262
Yann Collet59d1f792016-01-23 19:28:41 +0100263
264 2) Sequences section
265 TO DO
266*/
267
268
Yann Collet88fcd292015-11-25 14:42:45 +0100269/** ZSTD_decodeFrameHeader_Part1
270* decode the 1st part of the Frame Header, which tells Frame Header size.
271* srcSize must be == ZSTD_frameHeaderSize_min
272* @return : the full size of the Frame Header */
273static size_t ZSTD_decodeFrameHeader_Part1(ZSTD_DCtx* zc, const void* src, size_t srcSize)
274{
275 U32 magicNumber;
276 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
277 magicNumber = MEM_readLE32(src);
278 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
279 zc->headerSize = ZSTD_frameHeaderSize_min;
280 return zc->headerSize;
281}
282
Yann Collet88fcd292015-11-25 14:42:45 +0100283
284size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
285{
286 U32 magicNumber;
287 if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_max;
288 magicNumber = MEM_readLE32(src);
289 if (magicNumber != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
290 memset(params, 0, sizeof(*params));
Yann Collet26415d32015-11-26 12:43:28 +0100291 params->windowLog = (((const BYTE*)src)[4] & 15) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
Yann Colletbf7aa3c2015-11-28 18:19:44 +0100292 if ((((const BYTE*)src)[4] >> 4) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */
Yann Collet88fcd292015-11-25 14:42:45 +0100293 return 0;
294}
295
Yann Collet26415d32015-11-26 12:43:28 +0100296/** ZSTD_decodeFrameHeader_Part2
297* decode the full Frame Header
298* srcSize must be the size provided by ZSTD_decodeFrameHeader_Part1
299* @return : 0, or an error code, which can be tested using ZSTD_isError() */
300static size_t ZSTD_decodeFrameHeader_Part2(ZSTD_DCtx* zc, const void* src, size_t srcSize)
301{
Yann Collet00fd7a22015-11-28 16:03:22 +0100302 size_t result;
Yann Collet26415d32015-11-26 12:43:28 +0100303 if (srcSize != zc->headerSize) return ERROR(srcSize_wrong);
Yann Collet00fd7a22015-11-28 16:03:22 +0100304 result = ZSTD_getFrameParams(&(zc->params), src, srcSize);
305 if ((MEM_32bits()) && (zc->params.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bitsImplementation);
306 return result;
Yann Collet26415d32015-11-26 12:43:28 +0100307}
308
Yann Collet5be2dd22015-11-11 13:43:58 +0100309
310size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
311{
312 const BYTE* const in = (const BYTE* const)src;
313 BYTE headerFlags;
314 U32 cSize;
315
316 if (srcSize < 3) return ERROR(srcSize_wrong);
317
318 headerFlags = *in;
319 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
320
321 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
322 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
323
324 if (bpPtr->blockType == bt_end) return 0;
325 if (bpPtr->blockType == bt_rle) return 1;
326 return cSize;
327}
328
Yann Collet59d1f792016-01-23 19:28:41 +0100329
Yann Collet0f366c62015-11-12 16:19:30 +0100330static size_t ZSTD_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100331{
332 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
333 memcpy(dst, src, srcSize);
334 return srcSize;
335}
336
337
Yann Collet5be2dd22015-11-11 13:43:58 +0100338/** ZSTD_decodeLiteralsBlock
Yann Collet14983e72015-11-11 21:38:21 +0100339 @return : nb of bytes read from src (< srcSize ) */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100340size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100341 const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
342{
Yann Collet5be2dd22015-11-11 13:43:58 +0100343 const BYTE* const istart = (const BYTE*) src;
344
345 /* any compressed block with literals segment must be at least this size */
346 if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
347
Yann Collet59d1f792016-01-23 19:28:41 +0100348 switch(istart[0]>> 6)
Yann Collet5be2dd22015-11-11 13:43:58 +0100349 {
Yann Collet59d1f792016-01-23 19:28:41 +0100350 case IS_HUF:
Yann Collet5be2dd22015-11-11 13:43:58 +0100351 {
Yann Colletafe07092016-01-25 04:10:46 +0100352 size_t litSize, litCSize, singleStream=0;
Yann Collet59d1f792016-01-23 19:28:41 +0100353 U32 lhSize = ((istart[0]) >> 4) & 3;
354 switch(lhSize)
355 {
356 case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */
357 /* 2 - 2 - 10 - 10 */
358 lhSize=3;
Yann Colletafe07092016-01-25 04:10:46 +0100359 singleStream = istart[0] & 16;
Yann Collet59d1f792016-01-23 19:28:41 +0100360 litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
361 litCSize = ((istart[1] & 3) << 8) + istart[2];
362 break;
363 case 2:
364 /* 2 - 2 - 14 - 14 */
365 lhSize=4;
366 litSize = ((istart[0] & 15) << 10) + (istart[1] << 2) + (istart[2] >> 6);
367 litCSize = ((istart[2] & 63) << 8) + istart[3];
368 break;
369 case 3:
370 /* 2 - 2 - 18 - 18 */
371 lhSize=5;
372 litSize = ((istart[0] & 15) << 14) + (istart[1] << 6) + (istart[2] >> 2);
373 litCSize = ((istart[2] & 3) << 16) + (istart[3] << 8) + istart[4];
374 break;
375 }
Yann Colletfb810d62016-01-28 00:18:06 +0100376 if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
Yann Collet59d1f792016-01-23 19:28:41 +0100377
Yann Colletafe07092016-01-25 04:10:46 +0100378 if (HUF_isError(singleStream ?
379 HUF_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) :
380 HUF_decompress (dctx->litBuffer, litSize, istart+lhSize, litCSize) ))
Yann Collet59d1f792016-01-23 19:28:41 +0100381 return ERROR(corruption_detected);
382
Yann Collet5be2dd22015-11-11 13:43:58 +0100383 dctx->litPtr = dctx->litBuffer;
384 dctx->litBufSize = BLOCKSIZE+8;
385 dctx->litSize = litSize;
Yann Collet59d1f792016-01-23 19:28:41 +0100386 return litCSize + lhSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100387 }
Yann Colletb923f652016-01-26 03:14:20 +0100388 case IS_PCH:
389 {
390 size_t errorCode;
391 size_t litSize, litCSize;
392 U32 lhSize = ((istart[0]) >> 4) & 3;
393 if (lhSize != 1) /* only case supported for now : small litSize, single stream */
394 return ERROR(corruption_detected);
Yann Colletfb810d62016-01-28 00:18:06 +0100395 if (!dctx->flagStaticTables)
Yann Collet7b51a292016-01-26 15:58:49 +0100396 return ERROR(dictionary_corrupted);
Yann Colletb923f652016-01-26 03:14:20 +0100397
398 /* 2 - 2 - 10 - 10 */
399 lhSize=3;
400 litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
401 litCSize = ((istart[1] & 3) << 8) + istart[2];
402
403 errorCode = HUF_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
404 if (HUF_isError(errorCode)) return ERROR(corruption_detected);
405
406 dctx->litPtr = dctx->litBuffer;
407 dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
408 dctx->litSize = litSize;
409 return litCSize + lhSize;
410 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100411 case IS_RAW:
412 {
Yann Collet59d1f792016-01-23 19:28:41 +0100413 size_t litSize;
414 U32 lhSize = ((istart[0]) >> 4) & 3;
415 switch(lhSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100416 {
Yann Collet59d1f792016-01-23 19:28:41 +0100417 case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */
418 lhSize=1;
419 litSize = istart[0] & 31;
420 break;
421 case 2:
422 litSize = ((istart[0] & 15) << 8) + istart[1];
423 break;
424 case 3:
425 litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
426 break;
427 }
428
Yann Collete93d6ce2016-01-31 00:58:06 +0100429 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) /* risk reading beyond src buffer with wildcopy */
Yann Collet59d1f792016-01-23 19:28:41 +0100430 {
Yann Collet37422192016-01-25 16:54:05 +0100431 if (litSize > srcSize-lhSize) return ERROR(corruption_detected);
Yann Collet59d1f792016-01-23 19:28:41 +0100432 memcpy(dctx->litBuffer, istart+lhSize, litSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100433 dctx->litPtr = dctx->litBuffer;
434 dctx->litBufSize = BLOCKSIZE+8;
435 dctx->litSize = litSize;
Yann Colletbc4c8aa2016-01-25 17:26:01 +0100436 return lhSize+litSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100437 }
438 /* direct reference into compressed stream */
Yann Collet59d1f792016-01-23 19:28:41 +0100439 dctx->litPtr = istart+lhSize;
440 dctx->litBufSize = srcSize-lhSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100441 dctx->litSize = litSize;
Yann Collet59d1f792016-01-23 19:28:41 +0100442 return lhSize+litSize;
443 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100444 case IS_RLE:
445 {
Yann Collet59d1f792016-01-23 19:28:41 +0100446 size_t litSize;
447 U32 lhSize = ((istart[0]) >> 4) & 3;
448 switch(lhSize)
449 {
450 case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */
451 lhSize = 1;
452 litSize = istart[0] & 31;
453 break;
454 case 2:
455 litSize = ((istart[0] & 15) << 8) + istart[1];
456 break;
457 case 3:
458 litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2];
459 break;
460 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100461 if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
Yann Collet59d1f792016-01-23 19:28:41 +0100462 memset(dctx->litBuffer, istart[lhSize], litSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100463 dctx->litPtr = dctx->litBuffer;
Yann Colletb923f652016-01-26 03:14:20 +0100464 dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
Yann Collet5be2dd22015-11-11 13:43:58 +0100465 dctx->litSize = litSize;
Yann Collet59d1f792016-01-23 19:28:41 +0100466 return lhSize+1;
Yann Collet5be2dd22015-11-11 13:43:58 +0100467 }
Yann Colletb923f652016-01-26 03:14:20 +0100468 default:
469 return ERROR(corruption_detected); /* impossible */
Yann Collet5be2dd22015-11-11 13:43:58 +0100470 }
471}
472
473
474size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
475 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
476 const void* src, size_t srcSize)
477{
478 const BYTE* const istart = (const BYTE* const)src;
479 const BYTE* ip = istart;
480 const BYTE* const iend = istart + srcSize;
481 U32 LLtype, Offtype, MLtype;
482 U32 LLlog, Offlog, MLlog;
483 size_t dumpsLength;
484
485 /* check */
Yann Collete93d6ce2016-01-31 00:58:06 +0100486 if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
Yann Collet5be2dd22015-11-11 13:43:58 +0100487
488 /* SeqHead */
489 *nbSeq = MEM_readLE16(ip); ip+=2;
Yann Collete93d6ce2016-01-31 00:58:06 +0100490 if (*nbSeq==0) return 2;
491
Yann Collet5be2dd22015-11-11 13:43:58 +0100492 LLtype = *ip >> 6;
493 Offtype = (*ip >> 4) & 3;
494 MLtype = (*ip >> 2) & 3;
Yann Colletfb810d62016-01-28 00:18:06 +0100495 if (*ip & 2) {
Yann Collet5be2dd22015-11-11 13:43:58 +0100496 dumpsLength = ip[2];
497 dumpsLength += ip[1] << 8;
498 ip += 3;
Yann Colletfb810d62016-01-28 00:18:06 +0100499 } else {
Yann Collet5be2dd22015-11-11 13:43:58 +0100500 dumpsLength = ip[1];
501 dumpsLength += (ip[0] & 1) << 8;
502 ip += 2;
503 }
504 *dumpsPtr = ip;
505 ip += dumpsLength;
506 *dumpsLengthPtr = dumpsLength;
507
508 /* check */
509 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
510
511 /* sequences */
512 {
Yann Collet82368cf2015-11-16 19:10:56 +0100513 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL >= MaxOff */
Yann Collet5be2dd22015-11-11 13:43:58 +0100514 size_t headerSize;
515
516 /* Build DTables */
517 switch(LLtype)
518 {
519 U32 max;
Yann Colletfb810d62016-01-28 00:18:06 +0100520 case FSE_ENCODING_RLE :
Yann Collet5be2dd22015-11-11 13:43:58 +0100521 LLlog = 0;
Yann Colletfb810d62016-01-28 00:18:06 +0100522 FSE_buildDTable_rle(DTableLL, *ip++);
523 break;
524 case FSE_ENCODING_RAW :
Yann Collet5be2dd22015-11-11 13:43:58 +0100525 LLlog = LLbits;
Yann Colletfb810d62016-01-28 00:18:06 +0100526 FSE_buildDTable_raw(DTableLL, LLbits);
527 break;
528 case FSE_ENCODING_STATIC:
529 break;
530 case FSE_ENCODING_DYNAMIC :
531 default : /* impossible */
Yann Collet5be2dd22015-11-11 13:43:58 +0100532 max = MaxLL;
533 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
534 if (FSE_isError(headerSize)) return ERROR(GENERIC);
535 if (LLlog > LLFSELog) return ERROR(corruption_detected);
536 ip += headerSize;
537 FSE_buildDTable(DTableLL, norm, max, LLlog);
538 }
539
540 switch(Offtype)
541 {
542 U32 max;
Yann Colletfb810d62016-01-28 00:18:06 +0100543 case FSE_ENCODING_RLE :
Yann Collet5be2dd22015-11-11 13:43:58 +0100544 Offlog = 0;
545 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
546 FSE_buildDTable_rle(DTableOffb, *ip++ & MaxOff); /* if *ip > MaxOff, data is corrupted */
547 break;
Yann Colletfb810d62016-01-28 00:18:06 +0100548 case FSE_ENCODING_RAW :
Yann Collet5be2dd22015-11-11 13:43:58 +0100549 Offlog = Offbits;
Yann Colletfb810d62016-01-28 00:18:06 +0100550 FSE_buildDTable_raw(DTableOffb, Offbits);
551 break;
552 case FSE_ENCODING_STATIC:
553 break;
554 case FSE_ENCODING_DYNAMIC :
555 default : /* impossible */
Yann Collet5be2dd22015-11-11 13:43:58 +0100556 max = MaxOff;
557 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
558 if (FSE_isError(headerSize)) return ERROR(GENERIC);
559 if (Offlog > OffFSELog) return ERROR(corruption_detected);
560 ip += headerSize;
561 FSE_buildDTable(DTableOffb, norm, max, Offlog);
562 }
563
564 switch(MLtype)
565 {
566 U32 max;
Yann Colletfb810d62016-01-28 00:18:06 +0100567 case FSE_ENCODING_RLE :
Yann Collet5be2dd22015-11-11 13:43:58 +0100568 MLlog = 0;
569 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
Yann Colletfb810d62016-01-28 00:18:06 +0100570 FSE_buildDTable_rle(DTableML, *ip++);
571 break;
572 case FSE_ENCODING_RAW :
Yann Collet5be2dd22015-11-11 13:43:58 +0100573 MLlog = MLbits;
Yann Colletfb810d62016-01-28 00:18:06 +0100574 FSE_buildDTable_raw(DTableML, MLbits);
575 break;
576 case FSE_ENCODING_STATIC:
577 break;
578 case FSE_ENCODING_DYNAMIC :
579 default : /* impossible */
Yann Collet5be2dd22015-11-11 13:43:58 +0100580 max = MaxML;
581 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
582 if (FSE_isError(headerSize)) return ERROR(GENERIC);
583 if (MLlog > MLFSELog) return ERROR(corruption_detected);
584 ip += headerSize;
585 FSE_buildDTable(DTableML, norm, max, MLlog);
Yann Colletfb810d62016-01-28 00:18:06 +0100586 } }
Yann Collet5be2dd22015-11-11 13:43:58 +0100587
588 return ip-istart;
589}
590
591
592typedef struct {
593 size_t litLength;
Yann Collet5be2dd22015-11-11 13:43:58 +0100594 size_t matchLength;
Yann Collete93d6ce2016-01-31 00:58:06 +0100595 size_t offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100596} seq_t;
597
598typedef struct {
599 BIT_DStream_t DStream;
600 FSE_DState_t stateLL;
601 FSE_DState_t stateOffb;
602 FSE_DState_t stateML;
603 size_t prevOffset;
604 const BYTE* dumps;
605 const BYTE* dumpsEnd;
606} seqState_t;
607
Yann Collet5be2dd22015-11-11 13:43:58 +0100608static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
609{
610 size_t litLength;
611 size_t prevOffset;
612 size_t offset;
613 size_t matchLength;
614 const BYTE* dumps = seqState->dumps;
615 const BYTE* const de = seqState->dumpsEnd;
616
617 /* Literal length */
Yann Collete93d6ce2016-01-31 00:58:06 +0100618 litLength = FSE_peakSymbol(&(seqState->stateLL));
Yann Collete4fdad52015-11-25 21:09:17 +0100619 prevOffset = litLength ? seq->offset : seqState->prevOffset;
Yann Colletfb810d62016-01-28 00:18:06 +0100620 if (litLength == MaxLL) {
Yann Collet5be2dd22015-11-11 13:43:58 +0100621 U32 add = *dumps++;
622 if (add < 255) litLength += add;
Yann Colletfb810d62016-01-28 00:18:06 +0100623 else {
Yann Collet5be2dd22015-11-11 13:43:58 +0100624 litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
625 dumps += 3;
626 }
627 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
628 }
629
630 /* Offset */
631 {
632 static const U32 offsetPrefix[MaxOff+1] = {
633 1 /*fake*/, 1, 2, 4, 8, 16, 32, 64, 128, 256,
634 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
635 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, /*fake*/ 1, 1, 1, 1, 1 };
Yann Collete93d6ce2016-01-31 00:58:06 +0100636 U32 offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */
637 U32 nbBits = offsetCode - 1;
Yann Collet5be2dd22015-11-11 13:43:58 +0100638 if (offsetCode==0) nbBits = 0; /* cmove */
639 offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits);
640 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
Yann Collete93d6ce2016-01-31 00:58:06 +0100641 if (offsetCode==0) offset = prevOffset; /* repcode, cmove */
Yann Collet55aa7f92015-11-20 12:04:52 +0100642 if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */
Yann Collete93d6ce2016-01-31 00:58:06 +0100643 FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */
Yann Collet5be2dd22015-11-11 13:43:58 +0100644 }
645
Yann Collete93d6ce2016-01-31 00:58:06 +0100646 /* Literal length update */
647 FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */
648 if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
649
Yann Collet5be2dd22015-11-11 13:43:58 +0100650 /* MatchLength */
651 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
Yann Colletfb810d62016-01-28 00:18:06 +0100652 if (matchLength == MaxML) {
Yann Collet5be2dd22015-11-11 13:43:58 +0100653 U32 add = *dumps++;
654 if (add < 255) matchLength += add;
Yann Colletfb810d62016-01-28 00:18:06 +0100655 else {
Yann Collet5be2dd22015-11-11 13:43:58 +0100656 matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
657 dumps += 3;
658 }
659 if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
660 }
661 matchLength += MINMATCH;
662
663 /* save result */
664 seq->litLength = litLength;
665 seq->offset = offset;
666 seq->matchLength = matchLength;
667 seqState->dumps = dumps;
Yann Colletfb810d62016-01-28 00:18:06 +0100668
669#if 0
670 {
671 static U64 totalDecoded = 0;
672 printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
673 (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
674 totalDecoded += litLength + matchLength;
675 }
676#endif
Yann Collet5be2dd22015-11-11 13:43:58 +0100677}
678
679
Yann Collet5b78d2f2015-11-12 15:36:05 +0100680FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
Yann Colletb3a2af92015-11-19 17:13:19 +0100681 BYTE* const oend, seq_t sequence,
Yann Collet5be2dd22015-11-11 13:43:58 +0100682 const BYTE** litPtr, const BYTE* const litLimit_8,
Yann Collet417890c2015-12-04 17:16:37 +0100683 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
Yann Collet5be2dd22015-11-11 13:43:58 +0100684{
Yann Colletb3a2af92015-11-19 17:13:19 +0100685 static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
686 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
Yann Collet5be2dd22015-11-11 13:43:58 +0100687 BYTE* const oLitEnd = op + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100688 const size_t sequenceLength = sequence.litLength + sequence.matchLength;
689 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
Yann Collet5be2dd22015-11-11 13:43:58 +0100690 BYTE* const oend_8 = oend-8;
691 const BYTE* const litEnd = *litPtr + sequence.litLength;
Yann Colletb3a2af92015-11-19 17:13:19 +0100692 const BYTE* match = oLitEnd - sequence.offset;
Yann Collet5be2dd22015-11-11 13:43:58 +0100693
694 /* check */
695 if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
696 if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
697 if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
698
699 /* copy Literals */
700 ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
701 op = oLitEnd;
702 *litPtr = litEnd; /* update for next sequence */
703
704 /* copy Match */
Yann Collet7b51a292016-01-26 15:58:49 +0100705 if (sequence.offset > (size_t)(oLitEnd - base)) {
Yann Collet44287a32015-11-30 23:13:56 +0100706 /* offset beyond prefix */
Yann Collet9f5ab1a2015-12-11 00:27:41 +0100707 if (sequence.offset > (size_t)(oLitEnd - vBase))
708 return ERROR(corruption_detected);
Yann Collet44287a32015-11-30 23:13:56 +0100709 match = dictEnd - (base-match);
Yann Collet7b51a292016-01-26 15:58:49 +0100710 if (match + sequence.matchLength <= dictEnd) {
Yann Collet4bfe4152015-12-06 13:18:37 +0100711 memmove(oLitEnd, match, sequence.matchLength);
Yann Collet44287a32015-11-30 23:13:56 +0100712 return sequenceLength;
713 }
714 /* span extDict & currentPrefixSegment */
715 {
716 size_t length1 = dictEnd - match;
Yann Collet4bfe4152015-12-06 13:18:37 +0100717 memmove(oLitEnd, match, length1);
Yann Collet44287a32015-11-30 23:13:56 +0100718 op = oLitEnd + length1;
719 sequence.matchLength -= length1;
720 match = base;
Yann Colletfb810d62016-01-28 00:18:06 +0100721 } }
Yann Collet0f366c62015-11-12 16:19:30 +0100722
Yann Collet44287a32015-11-30 23:13:56 +0100723 /* match within prefix */
Yann Collet7b51a292016-01-26 15:58:49 +0100724 if (sequence.offset < 8) {
Yann Collet44287a32015-11-30 23:13:56 +0100725 /* close range match, overlap */
726 const int sub2 = dec64table[sequence.offset];
727 op[0] = match[0];
728 op[1] = match[1];
729 op[2] = match[2];
730 op[3] = match[3];
731 match += dec32table[sequence.offset];
732 ZSTD_copy4(op+4, match);
733 match -= sub2;
Yann Colletfb810d62016-01-28 00:18:06 +0100734 } else {
Yann Collet44287a32015-11-30 23:13:56 +0100735 ZSTD_copy8(op, match);
736 }
737 op += 8; match += 8;
Yann Collet5be2dd22015-11-11 13:43:58 +0100738
Yann Collet7b51a292016-01-26 15:58:49 +0100739 if (oMatchEnd > oend-12) {
740 if (op < oend_8) {
Yann Collet44287a32015-11-30 23:13:56 +0100741 ZSTD_wildcopy(op, match, oend_8 - op);
742 match += oend_8 - op;
743 op = oend_8;
744 }
Yann Colletfb810d62016-01-28 00:18:06 +0100745 while (op < oMatchEnd)
746 *op++ = *match++;
747 } else {
Yann Collet44287a32015-11-30 23:13:56 +0100748 ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
749 }
750 return sequenceLength;
Yann Collet5be2dd22015-11-11 13:43:58 +0100751}
752
Yann Colletb3a2af92015-11-19 17:13:19 +0100753
Yann Collet5be2dd22015-11-11 13:43:58 +0100754static size_t ZSTD_decompressSequences(
Yann Collet5b78d2f2015-11-12 15:36:05 +0100755 ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100756 void* dst, size_t maxDstSize,
757 const void* seqStart, size_t seqSize)
758{
Yann Collet5be2dd22015-11-11 13:43:58 +0100759 const BYTE* ip = (const BYTE*)seqStart;
760 const BYTE* const iend = ip + seqSize;
761 BYTE* const ostart = (BYTE* const)dst;
762 BYTE* op = ostart;
763 BYTE* const oend = ostart + maxDstSize;
764 size_t errorCode, dumpsLength;
765 const BYTE* litPtr = dctx->litPtr;
766 const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
767 const BYTE* const litEnd = litPtr + dctx->litSize;
768 int nbSeq;
769 const BYTE* dumps;
770 U32* DTableLL = dctx->LLTable;
771 U32* DTableML = dctx->MLTable;
772 U32* DTableOffb = dctx->OffTable;
Yann Collet417890c2015-12-04 17:16:37 +0100773 const BYTE* const base = (const BYTE*) (dctx->base);
774 const BYTE* const vBase = (const BYTE*) (dctx->vBase);
775 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100776
777 /* Build Decoding Tables */
778 errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
779 DTableLL, DTableML, DTableOffb,
Yann Colletfb810d62016-01-28 00:18:06 +0100780 ip, seqSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100781 if (ZSTD_isError(errorCode)) return errorCode;
782 ip += errorCode;
783
784 /* Regen sequences */
Yann Collete93d6ce2016-01-31 00:58:06 +0100785 if (nbSeq) {
Yann Collet5be2dd22015-11-11 13:43:58 +0100786 seq_t sequence;
787 seqState_t seqState;
788
789 memset(&sequence, 0, sizeof(sequence));
790 sequence.offset = 4;
791 seqState.dumps = dumps;
792 seqState.dumpsEnd = dumps + dumpsLength;
793 seqState.prevOffset = 4;
794 errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip);
795 if (ERR_isError(errorCode)) return ERROR(corruption_detected);
796 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
797 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
798 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
799
Yann Collet7b51a292016-01-26 15:58:49 +0100800 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
Yann Colletb3a2af92015-11-19 17:13:19 +0100801 size_t oneSeqSize;
802 nbSeq--;
803 ZSTD_decodeSequence(&sequence, &seqState);
804 oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
Yann Collet5be2dd22015-11-11 13:43:58 +0100805 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
806 op += oneSeqSize;
807 }
808
809 /* check if reached exact end */
Yann Collete93d6ce2016-01-31 00:58:06 +0100810 if (nbSeq)
811 return ERROR(corruption_detected); /* DStream should be entirely and exactly consumed; otherwise data is corrupted */
812 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100813
Yann Collete93d6ce2016-01-31 00:58:06 +0100814 /* last literal segment */
815 {
816 size_t lastLLSize = litEnd - litPtr;
817 if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
818 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
819 memcpy(op, litPtr, lastLLSize);
820 op += lastLLSize;
821 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100822
823 return op-ostart;
824}
825
826
Yann Colletb0125102016-01-09 02:00:10 +0100827static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
828{
Yann Collet7b51a292016-01-26 15:58:49 +0100829 if (dst != dctx->previousDstEnd) { /* not contiguous */
Yann Colletb0125102016-01-09 02:00:10 +0100830 dctx->dictEnd = dctx->previousDstEnd;
831 dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
832 dctx->base = dst;
833 dctx->previousDstEnd = dst;
834 }
835}
836
837
838static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
Yann Collet5be2dd22015-11-11 13:43:58 +0100839 void* dst, size_t maxDstSize,
840 const void* src, size_t srcSize)
841{
842 /* blockType == blockCompressed */
843 const BYTE* ip = (const BYTE*)src;
844
845 /* Decode literals sub-block */
Yann Collet5b78d2f2015-11-12 15:36:05 +0100846 size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100847 if (ZSTD_isError(litCSize)) return litCSize;
848 ip += litCSize;
849 srcSize -= litCSize;
850
Yann Collet5b78d2f2015-11-12 15:36:05 +0100851 return ZSTD_decompressSequences(dctx, dst, maxDstSize, ip, srcSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100852}
853
854
Yann Colletb0125102016-01-09 02:00:10 +0100855size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
856 void* dst, size_t maxDstSize,
857 const void* src, size_t srcSize)
858{
859 ZSTD_checkContinuity(dctx, dst);
860 return ZSTD_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
861}
862
863
Yann Collet7b51a292016-01-26 15:58:49 +0100864/*! ZSTD_decompress_continueDCtx
865* dctx must have been properly initialized */
866static size_t ZSTD_decompress_continueDCtx(ZSTD_DCtx* dctx,
Yann Collet31683c02015-12-18 01:26:48 +0100867 void* dst, size_t maxDstSize,
Yann Collet7b51a292016-01-26 15:58:49 +0100868 const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100869{
870 const BYTE* ip = (const BYTE*)src;
871 const BYTE* iend = ip + srcSize;
872 BYTE* const ostart = (BYTE* const)dst;
873 BYTE* op = ostart;
874 BYTE* const oend = ostart + maxDstSize;
875 size_t remainingSize = srcSize;
Yann Collet5be2dd22015-11-11 13:43:58 +0100876 blockProperties_t blockProperties;
877
878 /* Frame Header */
Yann Collet88fcd292015-11-25 14:42:45 +0100879 {
880 size_t frameHeaderSize;
881 if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Collet5be2dd22015-11-11 13:43:58 +0100882#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
Yann Collet88fcd292015-11-25 14:42:45 +0100883 {
884 const U32 magicNumber = MEM_readLE32(src);
885 if (ZSTD_isLegacy(magicNumber))
886 return ZSTD_decompressLegacy(dst, maxDstSize, src, srcSize, magicNumber);
887 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100888#endif
Yann Collet37422192016-01-25 16:54:05 +0100889 frameHeaderSize = ZSTD_decodeFrameHeader_Part1(dctx, src, ZSTD_frameHeaderSize_min);
Yann Collet88fcd292015-11-25 14:42:45 +0100890 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
891 if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
892 ip += frameHeaderSize; remainingSize -= frameHeaderSize;
Yann Collet37422192016-01-25 16:54:05 +0100893 frameHeaderSize = ZSTD_decodeFrameHeader_Part2(dctx, src, frameHeaderSize);
Yann Collet88fcd292015-11-25 14:42:45 +0100894 if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
895 }
Yann Collet5be2dd22015-11-11 13:43:58 +0100896
897 /* Loop on each block */
898 while (1)
899 {
900 size_t decodedSize=0;
901 size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
902 if (ZSTD_isError(cBlockSize)) return cBlockSize;
903
904 ip += ZSTD_blockHeaderSize;
905 remainingSize -= ZSTD_blockHeaderSize;
906 if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
907
908 switch(blockProperties.blockType)
909 {
910 case bt_compressed:
Yann Collet37422192016-01-25 16:54:05 +0100911 decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100912 break;
913 case bt_raw :
Yann Collet0f366c62015-11-12 16:19:30 +0100914 decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
Yann Collet5be2dd22015-11-11 13:43:58 +0100915 break;
916 case bt_rle :
917 return ERROR(GENERIC); /* not yet supported */
918 break;
919 case bt_end :
920 /* end of frame */
921 if (remainingSize) return ERROR(srcSize_wrong);
922 break;
923 default:
924 return ERROR(GENERIC); /* impossible */
925 }
926 if (cBlockSize == 0) break; /* bt_end */
927
928 if (ZSTD_isError(decodedSize)) return decodedSize;
929 op += decodedSize;
930 ip += cBlockSize;
931 remainingSize -= cBlockSize;
932 }
933
934 return op-ostart;
935}
936
Yann Collet31683c02015-12-18 01:26:48 +0100937
Yann Collet7b51a292016-01-26 15:58:49 +0100938size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx,
939 void* dst, size_t maxDstSize,
940 const void* src, size_t srcSize)
941{
942 ZSTD_copyDCtx(dctx, refDCtx);
943 ZSTD_checkContinuity(dctx, dst);
944 return ZSTD_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
945}
946
947
948size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
949 void* dst, size_t maxDstSize,
950 const void* src, size_t srcSize,
951 const void* dict, size_t dictSize)
952{
953 ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
954 ZSTD_checkContinuity(dctx, dst);
955 return ZSTD_decompress_continueDCtx(dctx, dst, maxDstSize, src, srcSize);
956}
957
958
Yann Collet31683c02015-12-18 01:26:48 +0100959size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
960{
961 return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
962}
963
Yann Collet5be2dd22015-11-11 13:43:58 +0100964size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
965{
Yann Collet3a3b72f2016-01-11 12:56:11 +0100966#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1)
967 size_t regenSize;
968 ZSTD_DCtx* dctx = ZSTD_createDCtx();
969 if (dctx==NULL) return ERROR(memory_allocation);
970 regenSize = ZSTD_decompressDCtx(dctx, dst, maxDstSize, src, srcSize);
971 ZSTD_freeDCtx(dctx);
972 return regenSize;
973#else
Yann Collet31683c02015-12-18 01:26:48 +0100974 ZSTD_DCtx dctx;
975 return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
Yann Colleta768a302016-01-21 16:04:35 +0100976#endif
Yann Collet5be2dd22015-11-11 13:43:58 +0100977}
978
979
980/* ******************************
981* Streaming Decompression API
982********************************/
Yann Collet5be2dd22015-11-11 13:43:58 +0100983size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
984{
985 return dctx->expected;
986}
987
Yann Collet37422192016-01-25 16:54:05 +0100988size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
Yann Collet5be2dd22015-11-11 13:43:58 +0100989{
990 /* Sanity check */
Yann Collet37422192016-01-25 16:54:05 +0100991 if (srcSize != dctx->expected) return ERROR(srcSize_wrong);
992 ZSTD_checkContinuity(dctx, dst);
Yann Collet5be2dd22015-11-11 13:43:58 +0100993
Yann Collet88fcd292015-11-25 14:42:45 +0100994 /* Decompress : frame header; part 1 */
Yann Collet37422192016-01-25 16:54:05 +0100995 switch (dctx->stage)
Yann Collet5be2dd22015-11-11 13:43:58 +0100996 {
Yann Collet88fcd292015-11-25 14:42:45 +0100997 case ZSTDds_getFrameHeaderSize :
Yann Collet5be2dd22015-11-11 13:43:58 +0100998 {
Yann Collet88fcd292015-11-25 14:42:45 +0100999 /* get frame header size */
1000 if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */
Yann Collet37422192016-01-25 16:54:05 +01001001 dctx->headerSize = ZSTD_decodeFrameHeader_Part1(dctx, src, ZSTD_frameHeaderSize_min);
1002 if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
1003 memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
Yann Collet7b51a292016-01-26 15:58:49 +01001004 if (dctx->headerSize > ZSTD_frameHeaderSize_min) {
Yann Collet37422192016-01-25 16:54:05 +01001005 dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_min;
1006 dctx->stage = ZSTDds_decodeFrameHeader;
Yann Collet88fcd292015-11-25 14:42:45 +01001007 return 0;
1008 }
Yann Collet37422192016-01-25 16:54:05 +01001009 dctx->expected = 0; /* not necessary to copy more */
Yann Collet5be2dd22015-11-11 13:43:58 +01001010 }
Yann Collet88fcd292015-11-25 14:42:45 +01001011 case ZSTDds_decodeFrameHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +01001012 {
Yann Collet88fcd292015-11-25 14:42:45 +01001013 /* get frame header */
1014 size_t result;
Yann Collet37422192016-01-25 16:54:05 +01001015 memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected);
1016 result = ZSTD_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
Yann Collet88fcd292015-11-25 14:42:45 +01001017 if (ZSTD_isError(result)) return result;
Yann Collet37422192016-01-25 16:54:05 +01001018 dctx->expected = ZSTD_blockHeaderSize;
1019 dctx->stage = ZSTDds_decodeBlockHeader;
Yann Collet88fcd292015-11-25 14:42:45 +01001020 return 0;
Yann Collet5be2dd22015-11-11 13:43:58 +01001021 }
Yann Collet88fcd292015-11-25 14:42:45 +01001022 case ZSTDds_decodeBlockHeader:
Yann Collet5be2dd22015-11-11 13:43:58 +01001023 {
Yann Collet88fcd292015-11-25 14:42:45 +01001024 /* Decode block header */
1025 blockProperties_t bp;
1026 size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
1027 if (ZSTD_isError(blockSize)) return blockSize;
Yann Collet7b51a292016-01-26 15:58:49 +01001028 if (bp.blockType == bt_end) {
Yann Collet37422192016-01-25 16:54:05 +01001029 dctx->expected = 0;
1030 dctx->stage = ZSTDds_getFrameHeaderSize;
Yann Collet88fcd292015-11-25 14:42:45 +01001031 }
Yann Collet7b51a292016-01-26 15:58:49 +01001032 else {
Yann Collet37422192016-01-25 16:54:05 +01001033 dctx->expected = blockSize;
1034 dctx->bType = bp.blockType;
1035 dctx->stage = ZSTDds_decompressBlock;
Yann Collet88fcd292015-11-25 14:42:45 +01001036 }
Yann Collet88fcd292015-11-25 14:42:45 +01001037 return 0;
1038 }
Yann Collet417890c2015-12-04 17:16:37 +01001039 case ZSTDds_decompressBlock:
Yann Collet88fcd292015-11-25 14:42:45 +01001040 {
1041 /* Decompress : block content */
1042 size_t rSize;
Yann Collet37422192016-01-25 16:54:05 +01001043 switch(dctx->bType)
Yann Collet88fcd292015-11-25 14:42:45 +01001044 {
1045 case bt_compressed:
Yann Collet37422192016-01-25 16:54:05 +01001046 rSize = ZSTD_decompressBlock_internal(dctx, dst, maxDstSize, src, srcSize);
Yann Collet88fcd292015-11-25 14:42:45 +01001047 break;
1048 case bt_raw :
1049 rSize = ZSTD_copyRawBlock(dst, maxDstSize, src, srcSize);
1050 break;
1051 case bt_rle :
1052 return ERROR(GENERIC); /* not yet handled */
1053 break;
1054 case bt_end : /* should never happen (filtered at phase 1) */
1055 rSize = 0;
1056 break;
1057 default:
Yann Collet7b51a292016-01-26 15:58:49 +01001058 return ERROR(GENERIC); /* impossible */
Yann Collet88fcd292015-11-25 14:42:45 +01001059 }
Yann Collet37422192016-01-25 16:54:05 +01001060 dctx->stage = ZSTDds_decodeBlockHeader;
1061 dctx->expected = ZSTD_blockHeaderSize;
1062 dctx->previousDstEnd = (char*)dst + rSize;
Yann Collet88fcd292015-11-25 14:42:45 +01001063 return rSize;
1064 }
1065 default:
1066 return ERROR(GENERIC); /* impossible */
1067 }
Yann Collet5be2dd22015-11-11 13:43:58 +01001068}
1069
1070
Yann Colletb923f652016-01-26 03:14:20 +01001071static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
Yann Collet417890c2015-12-04 17:16:37 +01001072{
Yann Collet37422192016-01-25 16:54:05 +01001073 dctx->dictEnd = dctx->previousDstEnd;
1074 dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
1075 dctx->base = dict;
1076 dctx->previousDstEnd = (const char*)dict + dictSize;
Yann Collet417890c2015-12-04 17:16:37 +01001077}
Yann Colletb923f652016-01-26 03:14:20 +01001078
Yann Colletb923f652016-01-26 03:14:20 +01001079static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
1080{
Yann Colletfb810d62016-01-28 00:18:06 +01001081 size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
1082 short offcodeNCount[MaxOff+1];
1083 U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
1084 short matchlengthNCount[MaxML+1];
1085 unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
1086 short litlengthNCount[MaxLL+1];
1087 unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
1088
1089 hSize = HUF_readDTableX4(dctx->hufTableX4, dict, dictSize);
Yann Colletb923f652016-01-26 03:14:20 +01001090 if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
Yann Colletfb810d62016-01-28 00:18:06 +01001091 dict = (const char*)dict + hSize;
1092 dictSize -= hSize;
1093
1094 offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
1095 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
1096 errorCode = FSE_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
1097 if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
1098 dict = (const char*)dict + offcodeHeaderSize;
1099 dictSize -= offcodeHeaderSize;
1100
1101 matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
1102 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
1103 errorCode = FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
1104 if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
1105 dict = (const char*)dict + matchlengthHeaderSize;
1106 dictSize -= matchlengthHeaderSize;
1107
1108 litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
1109 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
1110 errorCode = FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
1111 if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
1112
1113 dctx->flagStaticTables = 1;
1114 return hSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
Yann Colletb923f652016-01-26 03:14:20 +01001115}
1116
Yann Collet7b51a292016-01-26 15:58:49 +01001117static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
Yann Colletb923f652016-01-26 03:14:20 +01001118{
1119 size_t eSize;
1120 U32 magic = MEM_readLE32(dict);
1121 if (magic != ZSTD_DICT_MAGIC) {
1122 /* pure content mode */
1123 ZSTD_refDictContent(dctx, dict, dictSize);
1124 return 0;
1125 }
1126 /* load entropy tables */
1127 dict = (const char*)dict + 4;
1128 dictSize -= 4;
1129 eSize = ZSTD_loadEntropy(dctx, dict, dictSize);
1130 if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
1131
1132 /* reference dictionary content */
1133 dict = (const char*)dict + eSize;
1134 dictSize -= eSize;
1135 ZSTD_refDictContent(dctx, dict, dictSize);
1136
1137 return 0;
1138}
1139
Yann Collet7b51a292016-01-26 15:58:49 +01001140
1141size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
1142{
1143 size_t errorCode;
1144 errorCode = ZSTD_decompressBegin(dctx);
1145 if (ZSTD_isError(errorCode)) return errorCode;
1146
1147 if (dict && dictSize) {
1148 errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize);
1149 if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted);
1150 }
1151
1152 return 0;
1153}
1154