blob: fee3804885eb9de52e7a38c807401794aef473d6 [file] [log] [blame]
Yann Collet4ded9e52016-08-30 10:04:33 -07001/**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01009
Yann Colletb1f3f4b2015-10-18 22:18:32 +010010
11/******************************************
12* Includes
13******************************************/
14#include <stddef.h> /* size_t, ptrdiff_t */
15#include "zstd_v01.h"
inikep8161e732016-09-05 12:29:51 +020016#include "error_private.h"
Yann Colletb1f3f4b2015-10-18 22:18:32 +010017
18
19/******************************************
20* Static allocation
21******************************************/
22/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
23#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
24
25/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
26#define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog))
27#define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
28 unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
29
30
31/******************************************
32* Error Management
33******************************************/
34#define FSE_LIST_ERRORS(ITEM) \
35 ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
36 ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
37 ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
38 ITEM(FSE_ERROR_corruptionDetected) \
39 ITEM(FSE_ERROR_maxCode)
40
41#define FSE_GENERATE_ENUM(ENUM) ENUM,
42typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
43
44
45/******************************************
46* FSE symbol compression API
47******************************************/
48/*
49 This API consists of small unitary functions, which highly benefit from being inlined.
50 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
51 Visual seems to do it automatically.
52 For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
53 If none of these solutions is applicable, include "fse.c" directly.
54*/
55
56typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */
57typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
58
59typedef struct
60{
61 size_t bitContainer;
62 int bitPos;
63 char* startPtr;
64 char* ptr;
65 char* endPtr;
66} FSE_CStream_t;
67
68typedef struct
69{
70 ptrdiff_t value;
71 const void* stateTable;
72 const void* symbolTT;
73 unsigned stateLog;
74} FSE_CState_t;
75
76typedef struct
77{
78 size_t bitContainer;
79 unsigned bitsConsumed;
80 const char* ptr;
81 const char* start;
82} FSE_DStream_t;
83
84typedef struct
85{
86 size_t state;
87 const void* table; /* precise table may vary, depending on U16 */
88} FSE_DState_t;
89
90typedef enum { FSE_DStream_unfinished = 0,
91 FSE_DStream_endOfBuffer = 1,
92 FSE_DStream_completed = 2,
93 FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */
94 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
95
96
97/****************************************************************
98* Tuning parameters
99****************************************************************/
100/* MEMORY_USAGE :
101* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
102* Increasing memory usage improves compression ratio
103* Reduced memory usage can improve speed, due to cache effect
104* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
105#define FSE_MAX_MEMORY_USAGE 14
106#define FSE_DEFAULT_MEMORY_USAGE 13
107
108/* FSE_MAX_SYMBOL_VALUE :
109* Maximum symbol value authorized.
110* Required for proper stack allocation */
111#define FSE_MAX_SYMBOL_VALUE 255
112
113
114/****************************************************************
115* template functions type & suffix
116****************************************************************/
117#define FSE_FUNCTION_TYPE BYTE
118#define FSE_FUNCTION_EXTENSION
119
120
121/****************************************************************
122* Byte symbol type
123****************************************************************/
124typedef struct
125{
126 unsigned short newState;
127 unsigned char symbol;
128 unsigned char nbBits;
129} FSE_decode_t; /* size == U32 */
130
131
132
133/****************************************************************
134* Compiler specifics
135****************************************************************/
136#ifdef _MSC_VER /* Visual Studio */
137# define FORCE_INLINE static __forceinline
138# include <intrin.h> /* For Visual 2005 */
139# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
140# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
141#else
142# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
Yann Collet1563bfe2016-09-02 11:44:21 -0700143# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
144# ifdef __GNUC__
145# define FORCE_INLINE static inline __attribute__((always_inline))
146# else
147# define FORCE_INLINE static inline
148# endif
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100149# else
Yann Collet1563bfe2016-09-02 11:44:21 -0700150# define FORCE_INLINE static
151# endif /* __STDC_VERSION__ */
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100152#endif
153
154
155/****************************************************************
156* Includes
157****************************************************************/
158#include <stdlib.h> /* malloc, free, qsort */
159#include <string.h> /* memcpy, memset */
160#include <stdio.h> /* printf (debug) */
161
162
163#ifndef MEM_ACCESS_MODULE
164#define MEM_ACCESS_MODULE
165/****************************************************************
166* Basic Types
167*****************************************************************/
168#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
169# include <stdint.h>
170typedef uint8_t BYTE;
171typedef uint16_t U16;
172typedef int16_t S16;
173typedef uint32_t U32;
174typedef int32_t S32;
175typedef uint64_t U64;
176typedef int64_t S64;
177#else
178typedef unsigned char BYTE;
179typedef unsigned short U16;
180typedef signed short S16;
181typedef unsigned int U32;
182typedef signed int S32;
183typedef unsigned long long U64;
184typedef signed long long S64;
185#endif
186
187#endif /* MEM_ACCESS_MODULE */
188
189/****************************************************************
190* Memory I/O
191*****************************************************************/
192/* FSE_FORCE_MEMORY_ACCESS
193 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
194 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
195 * The below switch allow to select different access method for improved performance.
196 * Method 0 (default) : use `memcpy()`. Safe and portable.
197 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
198 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
199 * Method 2 : direct access. This method is portable but violate C standard.
200 * It can generate buggy code on targets generating assembly depending on alignment.
201 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
202 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
203 * Prefer these methods in priority order (0 > 1 > 2)
204 */
205#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
206# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
207# define FSE_FORCE_MEMORY_ACCESS 2
inikep48849f82016-08-10 14:26:35 +0200208# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100209 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
210# define FSE_FORCE_MEMORY_ACCESS 1
211# endif
212#endif
213
214
215static unsigned FSE_32bits(void)
216{
217 return sizeof(void*)==4;
218}
219
220static unsigned FSE_isLittleEndian(void)
221{
222 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
223 return one.c[0];
224}
225
226#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
227
228static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
229static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
230static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
231
232#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
233
234/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
235/* currently only defined for gcc and icc */
236typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
237
238static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
239static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
240static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
241
242#else
243
244static U16 FSE_read16(const void* memPtr)
245{
246 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
247}
248
249static U32 FSE_read32(const void* memPtr)
250{
251 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
252}
253
254static U64 FSE_read64(const void* memPtr)
255{
256 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
257}
258
259#endif // FSE_FORCE_MEMORY_ACCESS
260
261static U16 FSE_readLE16(const void* memPtr)
262{
263 if (FSE_isLittleEndian())
264 return FSE_read16(memPtr);
265 else
266 {
267 const BYTE* p = (const BYTE*)memPtr;
268 return (U16)(p[0] + (p[1]<<8));
269 }
270}
271
272static U32 FSE_readLE32(const void* memPtr)
273{
274 if (FSE_isLittleEndian())
275 return FSE_read32(memPtr);
276 else
277 {
278 const BYTE* p = (const BYTE*)memPtr;
279 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
280 }
281}
282
283
284static U64 FSE_readLE64(const void* memPtr)
285{
286 if (FSE_isLittleEndian())
287 return FSE_read64(memPtr);
288 else
289 {
290 const BYTE* p = (const BYTE*)memPtr;
291 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
292 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
293 }
294}
295
296static size_t FSE_readLEST(const void* memPtr)
297{
298 if (FSE_32bits())
299 return (size_t)FSE_readLE32(memPtr);
300 else
301 return (size_t)FSE_readLE64(memPtr);
302}
303
304
305
306/****************************************************************
307* Constants
308*****************************************************************/
309#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
310#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
311#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
312#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
313#define FSE_MIN_TABLELOG 5
314
315#define FSE_TABLELOG_ABSOLUTE_MAX 15
316#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
317#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
318#endif
319
320
321/****************************************************************
322* Error Management
323****************************************************************/
324#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
325
326
327/****************************************************************
328* Complex types
329****************************************************************/
330typedef struct
331{
332 int deltaFindState;
333 U32 deltaNbBits;
334} FSE_symbolCompressionTransform; /* total 8 bytes */
335
336typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
337
338/****************************************************************
339* Internal functions
340****************************************************************/
341FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
342{
343# if defined(_MSC_VER) /* Visual */
344 unsigned long r;
345 _BitScanReverse ( &r, val );
346 return (unsigned) r;
347# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
348 return 31 - __builtin_clz (val);
349# else /* Software version */
350 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
351 U32 v = val;
352 unsigned r;
353 v |= v >> 1;
354 v |= v >> 2;
355 v |= v >> 4;
356 v |= v >> 8;
357 v |= v >> 16;
358 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
359 return r;
360# endif
361}
362
363
364/****************************************************************
365* Templates
366****************************************************************/
367/*
368 designed to be included
369 for type-specific functions (template emulation in C)
370 Objective is to write these functions only once, for improved maintenance
371*/
372
373/* safety checks */
374#ifndef FSE_FUNCTION_EXTENSION
375# error "FSE_FUNCTION_EXTENSION must be defined"
376#endif
377#ifndef FSE_FUNCTION_TYPE
378# error "FSE_FUNCTION_TYPE must be defined"
379#endif
380
381/* Function names */
382#define FSE_CAT(X,Y) X##Y
383#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
384#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
385
386
387
388static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
389
Yann Colletddb8ebd2016-05-05 04:59:53 +0200390#define FSE_DECODE_TYPE FSE_decode_t
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100391
392
393typedef struct {
394 U16 tableLog;
395 U16 fastMode;
396} FSE_DTableHeader; /* sizeof U32 */
397
Yann Colletddb8ebd2016-05-05 04:59:53 +0200398static size_t FSE_buildDTable
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100399(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
400{
Yann Collet1fdd8232016-01-06 12:35:42 +0100401 void* ptr = dt;
402 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
403 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100404 const U32 tableSize = 1 << tableLog;
405 const U32 tableMask = tableSize-1;
406 const U32 step = FSE_tableStep(tableSize);
407 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
408 U32 position = 0;
409 U32 highThreshold = tableSize-1;
410 const S16 largeLimit= (S16)(1 << (tableLog-1));
411 U32 noLarge = 1;
412 U32 s;
413
414 /* Sanity Checks */
415 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
416 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
417
418 /* Init, lay down lowprob symbols */
419 DTableH[0].tableLog = (U16)tableLog;
420 for (s=0; s<=maxSymbolValue; s++)
421 {
422 if (normalizedCounter[s]==-1)
423 {
424 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
425 symbolNext[s] = 1;
426 }
427 else
428 {
429 if (normalizedCounter[s] >= largeLimit) noLarge=0;
430 symbolNext[s] = normalizedCounter[s];
431 }
432 }
433
434 /* Spread symbols */
435 for (s=0; s<=maxSymbolValue; s++)
436 {
437 int i;
438 for (i=0; i<normalizedCounter[s]; i++)
439 {
440 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
441 position = (position + step) & tableMask;
442 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
443 }
444 }
445
446 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
447
448 /* Build Decoding table */
449 {
450 U32 i;
451 for (i=0; i<tableSize; i++)
452 {
453 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
454 U16 nextState = symbolNext[symbol]++;
455 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
456 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
457 }
458 }
459
460 DTableH->fastMode = (U16)noLarge;
461 return 0;
462}
463
464
465/******************************************
466* FSE byte symbol
467******************************************/
468#ifndef FSE_COMMONDEFS_ONLY
469
470static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
471
472static short FSE_abs(short a)
473{
474 return a<0? -a : a;
475}
476
477
478/****************************************************************
479* Header bitstream management
480****************************************************************/
481static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
482 const void* headerBuffer, size_t hbSize)
483{
484 const BYTE* const istart = (const BYTE*) headerBuffer;
485 const BYTE* const iend = istart + hbSize;
486 const BYTE* ip = istart;
487 int nbBits;
488 int remaining;
489 int threshold;
490 U32 bitStream;
491 int bitCount;
492 unsigned charnum = 0;
493 int previous0 = 0;
494
495 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
496 bitStream = FSE_readLE32(ip);
497 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
498 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
499 bitStream >>= 4;
500 bitCount = 4;
501 *tableLogPtr = nbBits;
502 remaining = (1<<nbBits)+1;
503 threshold = 1<<nbBits;
504 nbBits++;
505
506 while ((remaining>1) && (charnum<=*maxSVPtr))
507 {
508 if (previous0)
509 {
510 unsigned n0 = charnum;
511 while ((bitStream & 0xFFFF) == 0xFFFF)
512 {
513 n0+=24;
514 if (ip < iend-5)
515 {
516 ip+=2;
517 bitStream = FSE_readLE32(ip) >> bitCount;
518 }
519 else
520 {
521 bitStream >>= 16;
522 bitCount+=16;
523 }
524 }
525 while ((bitStream & 3) == 3)
526 {
527 n0+=3;
528 bitStream>>=2;
529 bitCount+=2;
530 }
531 n0 += bitStream & 3;
532 bitCount += 2;
533 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
534 while (charnum < n0) normalizedCounter[charnum++] = 0;
535 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
536 {
537 ip += bitCount>>3;
538 bitCount &= 7;
539 bitStream = FSE_readLE32(ip) >> bitCount;
540 }
541 else
542 bitStream >>= 2;
543 }
544 {
545 const short max = (short)((2*threshold-1)-remaining);
546 short count;
547
548 if ((bitStream & (threshold-1)) < (U32)max)
549 {
550 count = (short)(bitStream & (threshold-1));
551 bitCount += nbBits-1;
552 }
553 else
554 {
555 count = (short)(bitStream & (2*threshold-1));
556 if (count >= threshold) count -= max;
557 bitCount += nbBits;
558 }
559
560 count--; /* extra accuracy */
561 remaining -= FSE_abs(count);
562 normalizedCounter[charnum++] = count;
563 previous0 = !count;
564 while (remaining < threshold)
565 {
566 nbBits--;
567 threshold >>= 1;
568 }
569
570 {
571 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
572 {
573 ip += bitCount>>3;
574 bitCount &= 7;
575 }
576 else
577 {
578 bitCount -= (int)(8 * (iend - 4 - ip));
579 ip = iend - 4;
580 }
581 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
582 }
583 }
584 }
585 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
586 *maxSVPtr = charnum-1;
587
588 ip += (bitCount+7)>>3;
589 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
590 return ip-istart;
591}
592
593
594/*********************************************************
595* Decompression (Byte symbols)
596*********************************************************/
597static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
598{
Yann Collet1fdd8232016-01-06 12:35:42 +0100599 void* ptr = dt;
600 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
601 FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100602
603 DTableH->tableLog = 0;
604 DTableH->fastMode = 0;
605
606 cell->newState = 0;
607 cell->symbol = symbolValue;
608 cell->nbBits = 0;
609
610 return 0;
611}
612
613
614static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
615{
Yann Collet1fdd8232016-01-06 12:35:42 +0100616 void* ptr = dt;
617 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
618 FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100619 const unsigned tableSize = 1 << nbBits;
620 const unsigned tableMask = tableSize - 1;
621 const unsigned maxSymbolValue = tableMask;
622 unsigned s;
623
624 /* Sanity checks */
625 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
626
627 /* Build Decoding Table */
628 DTableH->tableLog = (U16)nbBits;
629 DTableH->fastMode = 1;
630 for (s=0; s<=maxSymbolValue; s++)
631 {
632 dinfo[s].newState = 0;
633 dinfo[s].symbol = (BYTE)s;
634 dinfo[s].nbBits = (BYTE)nbBits;
635 }
636
637 return 0;
638}
639
640
641/* FSE_initDStream
642 * Initialize a FSE_DStream_t.
643 * srcBuffer must point at the beginning of an FSE block.
644 * The function result is the size of the FSE_block (== srcSize).
645 * If srcSize is too small, the function will return an errorCode;
646 */
647static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
648{
649 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
650
651 if (srcSize >= sizeof(size_t))
652 {
653 U32 contain32;
654 bitD->start = (const char*)srcBuffer;
655 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
656 bitD->bitContainer = FSE_readLEST(bitD->ptr);
657 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
658 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
659 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
660 }
661 else
662 {
663 U32 contain32;
664 bitD->start = (const char*)srcBuffer;
665 bitD->ptr = bitD->start;
666 bitD->bitContainer = *(const BYTE*)(bitD->start);
667 switch(srcSize)
668 {
669 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
670 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
671 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
672 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
673 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
674 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
675 default:;
676 }
677 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
678 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
679 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
680 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
681 }
682
683 return srcSize;
684}
685
686
Yann Collet1fdd8232016-01-06 12:35:42 +0100687/*!FSE_lookBits
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100688 * Provides next n bits from the bitContainer.
689 * bitContainer is not modified (bits are still present for next read/look)
690 * On 32-bits, maxNbBits==25
691 * On 64-bits, maxNbBits==57
692 * return : value extracted.
693 */
694static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
695{
696 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
697 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
698}
699
700static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
701{
702 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
703 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
704}
705
706static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
707{
708 bitD->bitsConsumed += nbBits;
709}
710
711
Yann Collet1fdd8232016-01-06 12:35:42 +0100712/*!FSE_readBits
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100713 * Read next n bits from the bitContainer.
714 * On 32-bits, don't read more than maxNbBits==25
715 * On 64-bits, don't read more than maxNbBits==57
716 * Use the fast variant *only* if n >= 1.
717 * return : value extracted.
718 */
719static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
720{
721 size_t value = FSE_lookBits(bitD, nbBits);
722 FSE_skipBits(bitD, nbBits);
723 return value;
724}
725
726static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
727{
728 size_t value = FSE_lookBitsFast(bitD, nbBits);
729 FSE_skipBits(bitD, nbBits);
730 return value;
731}
732
733static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
734{
735 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
736 return FSE_DStream_tooFar;
737
738 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
739 {
740 bitD->ptr -= bitD->bitsConsumed >> 3;
741 bitD->bitsConsumed &= 7;
742 bitD->bitContainer = FSE_readLEST(bitD->ptr);
743 return FSE_DStream_unfinished;
744 }
745 if (bitD->ptr == bitD->start)
746 {
747 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
748 return FSE_DStream_completed;
749 }
750 {
751 U32 nbBytes = bitD->bitsConsumed >> 3;
752 U32 result = FSE_DStream_unfinished;
753 if (bitD->ptr - nbBytes < bitD->start)
754 {
755 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
756 result = FSE_DStream_endOfBuffer;
757 }
758 bitD->ptr -= nbBytes;
759 bitD->bitsConsumed -= nbBytes*8;
760 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
761 return result;
762 }
763}
764
765
766static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
767{
Yann Collet1fdd8232016-01-06 12:35:42 +0100768 const void* ptr = dt;
769 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100770 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
771 FSE_reloadDStream(bitD);
772 DStatePtr->table = dt + 1;
773}
774
775static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
776{
777 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
778 const U32 nbBits = DInfo.nbBits;
779 BYTE symbol = DInfo.symbol;
780 size_t lowBits = FSE_readBits(bitD, nbBits);
781
782 DStatePtr->state = DInfo.newState + lowBits;
783 return symbol;
784}
785
786static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
787{
788 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
789 const U32 nbBits = DInfo.nbBits;
790 BYTE symbol = DInfo.symbol;
791 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
792
793 DStatePtr->state = DInfo.newState + lowBits;
794 return symbol;
795}
796
797/* FSE_endOfDStream
798 Tells if bitD has reached end of bitStream or not */
799
800static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
801{
802 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
803}
804
805static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
806{
807 return DStatePtr->state == 0;
808}
809
810
811FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
812 void* dst, size_t maxDstSize,
813 const void* cSrc, size_t cSrcSize,
814 const FSE_DTable* dt, const unsigned fast)
815{
816 BYTE* const ostart = (BYTE*) dst;
817 BYTE* op = ostart;
818 BYTE* const omax = op + maxDstSize;
819 BYTE* const olimit = omax-3;
820
821 FSE_DStream_t bitD;
822 FSE_DState_t state1;
823 FSE_DState_t state2;
824 size_t errorCode;
825
826 /* Init */
827 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
828 if (FSE_isError(errorCode)) return errorCode;
829
830 FSE_initDState(&state1, &bitD, dt);
831 FSE_initDState(&state2, &bitD, dt);
832
833#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
834
835 /* 4 symbols per loop */
836 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
837 {
838 op[0] = FSE_GETSYMBOL(&state1);
839
840 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
841 FSE_reloadDStream(&bitD);
842
843 op[1] = FSE_GETSYMBOL(&state2);
844
845 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
846 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
847
848 op[2] = FSE_GETSYMBOL(&state1);
849
850 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
851 FSE_reloadDStream(&bitD);
852
853 op[3] = FSE_GETSYMBOL(&state2);
854 }
855
856 /* tail */
857 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
858 while (1)
859 {
860 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
861 break;
862
863 *op++ = FSE_GETSYMBOL(&state1);
864
865 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
866 break;
867
868 *op++ = FSE_GETSYMBOL(&state2);
869 }
870
871 /* end ? */
872 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
873 return op-ostart;
874
875 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
876
877 return (size_t)-FSE_ERROR_corruptionDetected;
878}
879
880
881static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
882 const void* cSrc, size_t cSrcSize,
883 const FSE_DTable* dt)
884{
Yann Collet494c7862016-01-06 12:54:02 +0100885 FSE_DTableHeader DTableH;
886 memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100887
888 /* select fast mode (static) */
Yann Collet494c7862016-01-06 12:54:02 +0100889 if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100890 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
891}
892
893
894static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
895{
896 const BYTE* const istart = (const BYTE*)cSrc;
897 const BYTE* ip = istart;
898 short counting[FSE_MAX_SYMBOL_VALUE+1];
899 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
900 unsigned tableLog;
901 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
902 size_t errorCode;
903
904 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
905
906 /* normal FSE decoding mode */
907 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
908 if (FSE_isError(errorCode)) return errorCode;
909 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
910 ip += errorCode;
911 cSrcSize -= errorCode;
912
913 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
914 if (FSE_isError(errorCode)) return errorCode;
915
916 /* always return, even if it is an error code */
917 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
918}
919
920
921
Yann Collet1fdd8232016-01-06 12:35:42 +0100922/* *******************************************************
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100923* Huff0 : Huffman block compression
924*********************************************************/
925#define HUF_MAX_SYMBOL_VALUE 255
926#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
927#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
928#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
929#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
930# error "HUF_MAX_TABLELOG is too large !"
931#endif
932
933typedef struct HUF_CElt_s {
934 U16 val;
935 BYTE nbBits;
936} HUF_CElt ;
937
938typedef struct nodeElt_s {
939 U32 count;
940 U16 parent;
941 BYTE byte;
942 BYTE nbBits;
943} nodeElt;
944
945
Yann Collet1fdd8232016-01-06 12:35:42 +0100946/* *******************************************************
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100947* Huff0 : Huffman block decompression
948*********************************************************/
949typedef struct {
950 BYTE byte;
951 BYTE nbBits;
952} HUF_DElt;
953
954static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
955{
956 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
957 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
958 U32 weightTotal;
959 U32 maxBits;
960 const BYTE* ip = (const BYTE*) src;
961 size_t iSize = ip[0];
962 size_t oSize;
963 U32 n;
964 U32 nextRankStart;
Yann Collet1fdd8232016-01-06 12:35:42 +0100965 void* ptr = DTable+1;
966 HUF_DElt* const dt = (HUF_DElt*)ptr;
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100967
968 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
969 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
970 if (iSize >= 128) /* special header */
971 {
972 if (iSize >= (242)) /* RLE */
973 {
974 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
975 oSize = l[iSize-242];
Yann Colletaa074052015-10-30 11:21:50 +0100976 memset(huffWeight, 1, sizeof(huffWeight));
Yann Colletb1f3f4b2015-10-18 22:18:32 +0100977 iSize = 0;
978 }
979 else /* Incompressible */
980 {
981 oSize = iSize - 127;
982 iSize = ((oSize+1)/2);
983 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
984 ip += 1;
985 for (n=0; n<oSize; n+=2)
986 {
987 huffWeight[n] = ip[n/2] >> 4;
988 huffWeight[n+1] = ip[n/2] & 15;
989 }
990 }
991 }
992 else /* header compressed with FSE (normal case) */
993 {
994 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
995 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
996 if (FSE_isError(oSize)) return oSize;
997 }
998
999 /* collect weight stats */
1000 memset(rankVal, 0, sizeof(rankVal));
1001 weightTotal = 0;
1002 for (n=0; n<oSize; n++)
1003 {
1004 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1005 rankVal[huffWeight[n]]++;
1006 weightTotal += (1 << huffWeight[n]) >> 1;
1007 }
1008
1009 /* get last non-null symbol weight (implied, total must be 2^n) */
1010 maxBits = FSE_highbit32(weightTotal) + 1;
1011 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
1012 DTable[0] = (U16)maxBits;
1013 {
1014 U32 total = 1 << maxBits;
1015 U32 rest = total - weightTotal;
1016 U32 verif = 1 << FSE_highbit32(rest);
1017 U32 lastWeight = FSE_highbit32(rest) + 1;
1018 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
1019 huffWeight[oSize] = (BYTE)lastWeight;
1020 rankVal[lastWeight]++;
1021 }
1022
1023 /* check tree construction validity */
1024 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
1025
1026 /* Prepare ranks */
1027 nextRankStart = 0;
1028 for (n=1; n<=maxBits; n++)
1029 {
1030 U32 current = nextRankStart;
1031 nextRankStart += (rankVal[n] << (n-1));
1032 rankVal[n] = current;
1033 }
1034
1035 /* fill DTable */
1036 for (n=0; n<=oSize; n++)
1037 {
1038 const U32 w = huffWeight[n];
1039 const U32 length = (1 << w) >> 1;
1040 U32 i;
1041 HUF_DElt D;
1042 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1043 for (i = rankVal[w]; i < rankVal[w] + length; i++)
1044 dt[i] = D;
1045 rankVal[w] += length;
1046 }
1047
1048 return iSize+1;
1049}
1050
1051
1052static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1053{
1054 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1055 const BYTE c = dt[val].byte;
1056 FSE_skipBits(Dstream, dt[val].nbBits);
1057 return c;
1058}
1059
1060static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
1061 void* dst, size_t maxDstSize,
1062 const void* cSrc, size_t cSrcSize,
1063 const U16* DTable)
1064{
1065 BYTE* const ostart = (BYTE*) dst;
1066 BYTE* op = ostart;
1067 BYTE* const omax = op + maxDstSize;
1068 BYTE* const olimit = omax-15;
1069
Yann Collet1fdd8232016-01-06 12:35:42 +01001070 const void* ptr = DTable;
1071 const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001072 const U32 dtLog = DTable[0];
1073 size_t errorCode;
1074 U32 reloadStatus;
1075
1076 /* Init */
1077
1078 const U16* jumpTable = (const U16*)cSrc;
1079 const size_t length1 = FSE_readLE16(jumpTable);
1080 const size_t length2 = FSE_readLE16(jumpTable+1);
1081 const size_t length3 = FSE_readLE16(jumpTable+2);
1082 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
1083 const char* const start1 = (const char*)(cSrc) + 6;
1084 const char* const start2 = start1 + length1;
1085 const char* const start3 = start2 + length2;
1086 const char* const start4 = start3 + length3;
1087 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1088
1089 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1090
1091 errorCode = FSE_initDStream(&bitD1, start1, length1);
1092 if (FSE_isError(errorCode)) return errorCode;
1093 errorCode = FSE_initDStream(&bitD2, start2, length2);
1094 if (FSE_isError(errorCode)) return errorCode;
1095 errorCode = FSE_initDStream(&bitD3, start3, length3);
1096 if (FSE_isError(errorCode)) return errorCode;
1097 errorCode = FSE_initDStream(&bitD4, start4, length4);
1098 if (FSE_isError(errorCode)) return errorCode;
1099
1100 reloadStatus=FSE_reloadDStream(&bitD2);
1101
1102 /* 16 symbols per loop */
1103 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
1104 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1105 {
1106#define HUF_DECODE_SYMBOL_0(n, Dstream) \
1107 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1108
1109#define HUF_DECODE_SYMBOL_1(n, Dstream) \
1110 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1111 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1112
1113#define HUF_DECODE_SYMBOL_2(n, Dstream) \
1114 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1115 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1116
1117 HUF_DECODE_SYMBOL_1( 0, bitD1);
1118 HUF_DECODE_SYMBOL_1( 1, bitD2);
1119 HUF_DECODE_SYMBOL_1( 2, bitD3);
1120 HUF_DECODE_SYMBOL_1( 3, bitD4);
1121 HUF_DECODE_SYMBOL_2( 4, bitD1);
1122 HUF_DECODE_SYMBOL_2( 5, bitD2);
1123 HUF_DECODE_SYMBOL_2( 6, bitD3);
1124 HUF_DECODE_SYMBOL_2( 7, bitD4);
1125 HUF_DECODE_SYMBOL_1( 8, bitD1);
1126 HUF_DECODE_SYMBOL_1( 9, bitD2);
1127 HUF_DECODE_SYMBOL_1(10, bitD3);
1128 HUF_DECODE_SYMBOL_1(11, bitD4);
1129 HUF_DECODE_SYMBOL_0(12, bitD1);
1130 HUF_DECODE_SYMBOL_0(13, bitD2);
1131 HUF_DECODE_SYMBOL_0(14, bitD3);
1132 HUF_DECODE_SYMBOL_0(15, bitD4);
1133 }
1134
1135 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
1136 return (size_t)-FSE_ERROR_corruptionDetected;
1137
1138 /* tail */
1139 {
1140 // bitTail = bitD1; // *much* slower : -20% !??!
1141 FSE_DStream_t bitTail;
1142 bitTail.ptr = bitD1.ptr;
1143 bitTail.bitsConsumed = bitD1.bitsConsumed;
1144 bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
1145 bitTail.start = start1;
1146 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1147 {
1148 HUF_DECODE_SYMBOL_0(0, bitTail);
1149 }
1150
1151 if (FSE_endOfDStream(&bitTail))
1152 return op-ostart;
1153 }
1154
1155 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1156
1157 return (size_t)-FSE_ERROR_corruptionDetected;
1158}
1159
1160
1161static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1162{
1163 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1164 const BYTE* ip = (const BYTE*) cSrc;
1165 size_t errorCode;
1166
1167 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1168 if (FSE_isError(errorCode)) return errorCode;
1169 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1170 ip += errorCode;
1171 cSrcSize -= errorCode;
1172
1173 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1174}
1175
1176
1177#endif /* FSE_COMMONDEFS_ONLY */
1178
1179/*
1180 zstd - standard compression library
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001181 Copyright (C) 2014-2015, Yann Collet.
1182
1183 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1184
1185 Redistribution and use in source and binary forms, with or without
1186 modification, are permitted provided that the following conditions are
1187 met:
1188 * Redistributions of source code must retain the above copyright
1189 notice, this list of conditions and the following disclaimer.
1190 * Redistributions in binary form must reproduce the above
1191 copyright notice, this list of conditions and the following disclaimer
1192 in the documentation and/or other materials provided with the
1193 distribution.
1194 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1195 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1196 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1197 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1198 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1199 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1200 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1201 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1202 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1203 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1204 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1205
1206 You can contact the author at :
1207 - zstd source repository : https://github.com/Cyan4973/zstd
1208 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1209*/
1210
1211/****************************************************************
1212* Tuning parameters
1213*****************************************************************/
1214/* MEMORY_USAGE :
1215* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1216* Increasing memory usage improves compression ratio
1217* Reduced memory usage can improve speed, due to cache effect */
1218#define ZSTD_MEMORY_USAGE 17
1219
1220
1221/**************************************
1222 CPU Feature Detection
1223**************************************/
1224/*
1225 * Automated efficient unaligned memory access detection
1226 * Based on known hardware architectures
1227 * This list will be updated thanks to feedbacks
1228 */
1229#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1230 || defined(__ARM_FEATURE_UNALIGNED) \
1231 || defined(__i386__) || defined(__x86_64__) \
1232 || defined(_M_IX86) || defined(_M_X64) \
1233 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1234 || (defined(_M_ARM) && (_M_ARM >= 7))
1235# define ZSTD_UNALIGNED_ACCESS 1
1236#else
1237# define ZSTD_UNALIGNED_ACCESS 0
1238#endif
1239
1240
1241/********************************************************
1242* Includes
1243*********************************************************/
1244#include <stdlib.h> /* calloc */
1245#include <string.h> /* memcpy, memmove */
1246#include <stdio.h> /* debug : printf */
1247
1248
1249/********************************************************
1250* Compiler specifics
1251*********************************************************/
1252#ifdef __AVX2__
1253# include <immintrin.h> /* AVX2 intrinsics */
1254#endif
1255
1256#ifdef _MSC_VER /* Visual Studio */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001257# include <intrin.h> /* For Visual 2005 */
1258# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1259# pragma warning(disable : 4324) /* disable: C4324: padded structure */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001260#endif
1261
1262
1263#ifndef MEM_ACCESS_MODULE
1264#define MEM_ACCESS_MODULE
1265/********************************************************
1266* Basic Types
1267*********************************************************/
1268#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
1269# include <stdint.h>
1270typedef uint8_t BYTE;
1271typedef uint16_t U16;
1272typedef int16_t S16;
1273typedef uint32_t U32;
1274typedef int32_t S32;
1275typedef uint64_t U64;
1276#else
1277typedef unsigned char BYTE;
1278typedef unsigned short U16;
1279typedef signed short S16;
1280typedef unsigned int U32;
1281typedef signed int S32;
1282typedef unsigned long long U64;
1283#endif
1284
1285#endif /* MEM_ACCESS_MODULE */
1286
1287
1288/********************************************************
1289* Constants
1290*********************************************************/
1291static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */
1292
1293#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1294#define HASH_TABLESIZE (1 << HASH_LOG)
1295#define HASH_MASK (HASH_TABLESIZE - 1)
1296
1297#define KNUTH 2654435761
1298
1299#define BIT7 128
1300#define BIT6 64
1301#define BIT5 32
1302#define BIT4 16
1303
1304#define KB *(1 <<10)
1305#define MB *(1 <<20)
1306#define GB *(1U<<30)
1307
1308#define BLOCKSIZE (128 KB) /* define, for static allocation */
1309
1310#define WORKPLACESIZE (BLOCKSIZE*3)
1311#define MINMATCH 4
1312#define MLbits 7
1313#define LLbits 6
1314#define Offbits 5
1315#define MaxML ((1<<MLbits )-1)
1316#define MaxLL ((1<<LLbits )-1)
1317#define MaxOff ((1<<Offbits)-1)
1318#define LitFSELog 11
1319#define MLFSELog 10
1320#define LLFSELog 10
1321#define OffFSELog 9
1322#define MAX(a,b) ((a)<(b)?(b):(a))
1323#define MaxSeq MAX(MaxLL, MaxML)
1324
1325#define LITERAL_NOENTROPY 63
1326#define COMMAND_NOENTROPY 7 /* to remove */
1327
1328static const size_t ZSTD_blockHeaderSize = 3;
1329static const size_t ZSTD_frameHeaderSize = 4;
1330
1331
1332/********************************************************
1333* Memory operations
1334*********************************************************/
1335static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1336
1337static unsigned ZSTD_isLittleEndian(void)
1338{
1339 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
1340 return one.c[0];
1341}
1342
1343static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1344
1345static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
1346
1347static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1348
1349static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1350
1351#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
1352
1353static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
1354{
1355 const BYTE* ip = (const BYTE*)src;
1356 BYTE* op = (BYTE*)dst;
1357 BYTE* const oend = op + length;
1358 while (op < oend) COPY8(op, ip);
1359}
1360
1361static U16 ZSTD_readLE16(const void* memPtr)
1362{
1363 if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1364 else
1365 {
1366 const BYTE* p = (const BYTE*)memPtr;
1367 return (U16)((U16)p[0] + ((U16)p[1]<<8));
1368 }
1369}
1370
1371
1372static U32 ZSTD_readLE32(const void* memPtr)
1373{
1374 if (ZSTD_isLittleEndian())
1375 return ZSTD_read32(memPtr);
1376 else
1377 {
1378 const BYTE* p = (const BYTE*)memPtr;
1379 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
1380 }
1381}
1382
1383static U32 ZSTD_readBE32(const void* memPtr)
1384{
1385 const BYTE* p = (const BYTE*)memPtr;
1386 return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1387}
1388
1389
1390/**************************************
1391* Local structures
1392***************************************/
1393typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1394
1395typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1396
1397typedef struct
1398{
1399 blockType_t blockType;
1400 U32 origSize;
1401} blockProperties_t;
1402
1403typedef struct {
1404 void* buffer;
1405 U32* offsetStart;
1406 U32* offset;
1407 BYTE* offCodeStart;
1408 BYTE* offCode;
1409 BYTE* litStart;
1410 BYTE* lit;
1411 BYTE* litLengthStart;
1412 BYTE* litLength;
1413 BYTE* matchLengthStart;
1414 BYTE* matchLength;
1415 BYTE* dumpsStart;
1416 BYTE* dumps;
1417} seqStore_t;
1418
1419
1420typedef struct ZSTD_Cctx_s
1421{
1422 const BYTE* base;
1423 U32 current;
1424 U32 nextUpdate;
1425 seqStore_t seqStore;
1426#ifdef __AVX2__
1427 __m256i hashTable[HASH_TABLESIZE>>3];
1428#else
1429 U32 hashTable[HASH_TABLESIZE];
1430#endif
1431 BYTE buffer[WORKPLACESIZE];
1432} cctxi_t;
1433
1434
1435
1436
1437/**************************************
1438* Error Management
1439**************************************/
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001440/* published entry point */
inikep8161e732016-09-05 12:29:51 +02001441unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001442
1443
1444/**************************************
1445* Tool functions
1446**************************************/
1447#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
1448#define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */
1449#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
1450#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1451
1452/**************************************************************
1453* Decompression code
1454**************************************************************/
1455
1456static size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1457{
1458 const BYTE* const in = (const BYTE* const)src;
1459 BYTE headerFlags;
1460 U32 cSize;
1461
inikep8161e732016-09-05 12:29:51 +02001462 if (srcSize < 3) return ERROR(srcSize_wrong);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001463
1464 headerFlags = *in;
1465 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1466
1467 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1468 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1469
1470 if (bpPtr->blockType == bt_end) return 0;
1471 if (bpPtr->blockType == bt_rle) return 1;
1472 return cSize;
1473}
1474
1475
1476static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1477{
inikep8161e732016-09-05 12:29:51 +02001478 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001479 memcpy(dst, src, srcSize);
1480 return srcSize;
1481}
1482
1483
1484static size_t ZSTD_decompressLiterals(void* ctx,
1485 void* dst, size_t maxDstSize,
1486 const void* src, size_t srcSize)
1487{
1488 BYTE* op = (BYTE*)dst;
1489 BYTE* const oend = op + maxDstSize;
1490 const BYTE* ip = (const BYTE*)src;
1491 size_t errorCode;
1492 size_t litSize;
1493
1494 /* check : minimum 2, for litSize, +1, for content */
inikep8161e732016-09-05 12:29:51 +02001495 if (srcSize <= 3) return ERROR(corruption_detected);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001496
1497 litSize = ip[1] + (ip[0]<<8);
1498 litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh....
1499 op = oend - litSize;
1500
1501 (void)ctx;
inikep8161e732016-09-05 12:29:51 +02001502 if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001503 errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
inikep8161e732016-09-05 12:29:51 +02001504 if (FSE_isError(errorCode)) return ERROR(GENERIC);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001505 return litSize;
1506}
1507
1508
1509static size_t ZSTD_decodeLiteralsBlock(void* ctx,
1510 void* dst, size_t maxDstSize,
1511 const BYTE** litStart, size_t* litSize,
1512 const void* src, size_t srcSize)
1513{
1514 const BYTE* const istart = (const BYTE* const)src;
1515 const BYTE* ip = istart;
1516 BYTE* const ostart = (BYTE* const)dst;
1517 BYTE* const oend = ostart + maxDstSize;
1518 blockProperties_t litbp;
1519
1520 size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
inikep8161e732016-09-05 12:29:51 +02001521 if (ZSTDv01_isError(litcSize)) return litcSize;
1522 if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001523 ip += ZSTD_blockHeaderSize;
1524
1525 switch(litbp.blockType)
1526 {
1527 case bt_raw:
1528 *litStart = ip;
1529 ip += litcSize;
1530 *litSize = litcSize;
1531 break;
1532 case bt_rle:
1533 {
1534 size_t rleSize = litbp.origSize;
inikep8161e732016-09-05 12:29:51 +02001535 if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001536 memset(oend - rleSize, *ip, rleSize);
1537 *litStart = oend - rleSize;
1538 *litSize = rleSize;
1539 ip++;
1540 break;
1541 }
1542 case bt_compressed:
1543 {
1544 size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
inikep8161e732016-09-05 12:29:51 +02001545 if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001546 *litStart = oend - decodedLitSize;
1547 *litSize = decodedLitSize;
1548 ip += litcSize;
1549 break;
1550 }
Yann Colletffec7402016-01-21 15:50:11 +01001551 case bt_end:
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001552 default:
inikep8161e732016-09-05 12:29:51 +02001553 return ERROR(GENERIC);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001554 }
1555
1556 return ip-istart;
1557}
1558
1559
inikep476964f2016-09-05 13:34:57 +02001560size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001561 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1562 const void* src, size_t srcSize)
1563{
1564 const BYTE* const istart = (const BYTE* const)src;
1565 const BYTE* ip = istart;
1566 const BYTE* const iend = istart + srcSize;
1567 U32 LLtype, Offtype, MLtype;
1568 U32 LLlog, Offlog, MLlog;
1569 size_t dumpsLength;
1570
1571 /* check */
inikep8161e732016-09-05 12:29:51 +02001572 if (srcSize < 5) return ERROR(srcSize_wrong);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001573
1574 /* SeqHead */
1575 *nbSeq = ZSTD_readLE16(ip); ip+=2;
1576 LLtype = *ip >> 6;
1577 Offtype = (*ip >> 4) & 3;
1578 MLtype = (*ip >> 2) & 3;
1579 if (*ip & 2)
1580 {
1581 dumpsLength = ip[2];
1582 dumpsLength += ip[1] << 8;
1583 ip += 3;
1584 }
1585 else
1586 {
1587 dumpsLength = ip[1];
1588 dumpsLength += (ip[0] & 1) << 8;
1589 ip += 2;
1590 }
1591 *dumpsPtr = ip;
1592 ip += dumpsLength;
1593 *dumpsLengthPtr = dumpsLength;
1594
1595 /* check */
inikep8161e732016-09-05 12:29:51 +02001596 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001597
1598 /* sequences */
1599 {
1600 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
1601 size_t headerSize;
1602
1603 /* Build DTables */
1604 switch(LLtype)
1605 {
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001606 case bt_rle :
1607 LLlog = 0;
1608 FSE_buildDTable_rle(DTableLL, *ip++); break;
1609 case bt_raw :
1610 LLlog = LLbits;
1611 FSE_buildDTable_raw(DTableLL, LLbits); break;
1612 default :
Yann Collet87c18b22016-08-26 01:43:47 +02001613 { U32 max = MaxLL;
1614 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
inikep8161e732016-09-05 12:29:51 +02001615 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1616 if (LLlog > LLFSELog) return ERROR(corruption_detected);
Yann Collet87c18b22016-08-26 01:43:47 +02001617 ip += headerSize;
1618 FSE_buildDTable(DTableLL, norm, max, LLlog);
1619 } }
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001620
1621 switch(Offtype)
1622 {
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001623 case bt_rle :
1624 Offlog = 0;
inikep8161e732016-09-05 12:29:51 +02001625 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001626 FSE_buildDTable_rle(DTableOffb, *ip++); break;
1627 case bt_raw :
1628 Offlog = Offbits;
1629 FSE_buildDTable_raw(DTableOffb, Offbits); break;
1630 default :
Yann Collet87c18b22016-08-26 01:43:47 +02001631 { U32 max = MaxOff;
1632 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
inikep8161e732016-09-05 12:29:51 +02001633 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1634 if (Offlog > OffFSELog) return ERROR(corruption_detected);
Yann Collet87c18b22016-08-26 01:43:47 +02001635 ip += headerSize;
1636 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1637 } }
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001638
1639 switch(MLtype)
1640 {
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001641 case bt_rle :
1642 MLlog = 0;
inikep8161e732016-09-05 12:29:51 +02001643 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001644 FSE_buildDTable_rle(DTableML, *ip++); break;
1645 case bt_raw :
1646 MLlog = MLbits;
1647 FSE_buildDTable_raw(DTableML, MLbits); break;
1648 default :
Yann Collet87c18b22016-08-26 01:43:47 +02001649 { U32 max = MaxML;
1650 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
inikep8161e732016-09-05 12:29:51 +02001651 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1652 if (MLlog > MLFSELog) return ERROR(corruption_detected);
Yann Collet87c18b22016-08-26 01:43:47 +02001653 ip += headerSize;
1654 FSE_buildDTable(DTableML, norm, max, MLlog);
1655 } } }
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001656
1657 return ip-istart;
1658}
1659
1660
1661typedef struct {
1662 size_t litLength;
1663 size_t offset;
1664 size_t matchLength;
1665} seq_t;
1666
1667typedef struct {
1668 FSE_DStream_t DStream;
1669 FSE_DState_t stateLL;
1670 FSE_DState_t stateOffb;
1671 FSE_DState_t stateML;
1672 size_t prevOffset;
1673 const BYTE* dumps;
1674 const BYTE* dumpsEnd;
1675} seqState_t;
1676
1677
1678static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1679{
1680 size_t litLength;
1681 size_t prevOffset;
1682 size_t offset;
1683 size_t matchLength;
1684 const BYTE* dumps = seqState->dumps;
1685 const BYTE* const de = seqState->dumpsEnd;
1686
1687 /* Literal length */
1688 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1689 prevOffset = litLength ? seq->offset : seqState->prevOffset;
1690 seqState->prevOffset = seq->offset;
1691 if (litLength == MaxLL)
1692 {
1693 U32 add = dumps<de ? *dumps++ : 0;
1694 if (add < 255) litLength += add;
1695 else
1696 {
1697 if (dumps<=(de-3))
1698 {
1699 litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
1700 dumps += 3;
1701 }
1702 }
1703 }
1704
1705 /* Offset */
1706 {
1707 U32 offsetCode, nbBits;
1708 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1709 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1710 nbBits = offsetCode - 1;
1711 if (offsetCode==0) nbBits = 0; /* cmove */
1712 offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1713 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1714 if (offsetCode==0) offset = prevOffset;
1715 }
1716
1717 /* MatchLength */
1718 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1719 if (matchLength == MaxML)
1720 {
1721 U32 add = dumps<de ? *dumps++ : 0;
1722 if (add < 255) matchLength += add;
1723 else
1724 {
1725 if (dumps<=(de-3))
1726 {
1727 matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
1728 dumps += 3;
1729 }
1730 }
1731 }
1732 matchLength += MINMATCH;
1733
1734 /* save result */
1735 seq->litLength = litLength;
1736 seq->offset = offset;
1737 seq->matchLength = matchLength;
1738 seqState->dumps = dumps;
1739}
1740
1741
1742static size_t ZSTD_execSequence(BYTE* op,
1743 seq_t sequence,
1744 const BYTE** litPtr, const BYTE* const litLimit,
1745 BYTE* const base, BYTE* const oend)
1746{
1747 static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1748 static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1749 const BYTE* const ostart = op;
1750 const size_t litLength = sequence.litLength;
1751 BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
1752 const BYTE* const litEnd = *litPtr + litLength;
1753
1754 /* check */
inikep8161e732016-09-05 12:29:51 +02001755 if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
1756 if (litEnd > litLimit) return ERROR(corruption_detected);
1757 if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001758
1759 /* copy Literals */
1760 if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1761 memmove(op, *litPtr, litLength); /* overwrite risk */
1762 else
1763 ZSTD_wildcopy(op, *litPtr, litLength);
1764 op += litLength;
1765 *litPtr = litEnd; /* update for next sequence */
1766
1767 /* check : last match must be at a minimum distance of 8 from end of dest buffer */
inikep8161e732016-09-05 12:29:51 +02001768 if (oend-op < 8) return ERROR(dstSize_tooSmall);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001769
1770 /* copy Match */
1771 {
1772 const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1773 const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */
1774 size_t qutt = 12;
1775 U64 saved[2];
1776
1777 /* check */
inikep8161e732016-09-05 12:29:51 +02001778 if (match < base) return ERROR(corruption_detected);
1779 if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001780
1781 /* save beginning of literal sequence, in case of write overlap */
1782 if (overlapRisk)
1783 {
1784 if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1785 memcpy(saved, endMatch, qutt);
1786 }
1787
1788 if (sequence.offset < 8)
1789 {
1790 const int dec64 = dec64table[sequence.offset];
1791 op[0] = match[0];
1792 op[1] = match[1];
1793 op[2] = match[2];
1794 op[3] = match[3];
1795 match += dec32table[sequence.offset];
1796 ZSTD_copy4(op+4, match);
1797 match -= dec64;
1798 } else { ZSTD_copy8(op, match); }
1799 op += 8; match += 8;
1800
1801 if (endMatch > oend-12)
1802 {
1803 if (op < oend-8)
1804 {
1805 ZSTD_wildcopy(op, match, (oend-8) - op);
1806 match += (oend-8) - op;
1807 op = oend-8;
1808 }
1809 while (op<endMatch) *op++ = *match++;
1810 }
1811 else
1812 ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
1813
1814 /* restore, in case of overlap */
1815 if (overlapRisk) memcpy(endMatch, saved, qutt);
1816 }
1817
1818 return endMatch-ostart;
1819}
1820
1821typedef struct ZSTDv01_Dctx_s
1822{
1823 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1824 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1825 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1826 void* previousDstEnd;
1827 void* base;
1828 size_t expected;
1829 blockType_t bType;
1830 U32 phase;
1831} dctx_t;
1832
1833
1834static size_t ZSTD_decompressSequences(
1835 void* ctx,
1836 void* dst, size_t maxDstSize,
1837 const void* seqStart, size_t seqSize,
1838 const BYTE* litStart, size_t litSize)
1839{
1840 dctx_t* dctx = (dctx_t*)ctx;
1841 const BYTE* ip = (const BYTE*)seqStart;
1842 const BYTE* const iend = ip + seqSize;
1843 BYTE* const ostart = (BYTE* const)dst;
1844 BYTE* op = ostart;
1845 BYTE* const oend = ostart + maxDstSize;
1846 size_t errorCode, dumpsLength;
1847 const BYTE* litPtr = litStart;
1848 const BYTE* const litEnd = litStart + litSize;
1849 int nbSeq;
1850 const BYTE* dumps;
1851 U32* DTableLL = dctx->LLTable;
1852 U32* DTableML = dctx->MLTable;
1853 U32* DTableOffb = dctx->OffTable;
1854 BYTE* const base = (BYTE*) (dctx->base);
1855
1856 /* Build Decoding Tables */
inikep476964f2016-09-05 13:34:57 +02001857 errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001858 DTableLL, DTableML, DTableOffb,
1859 ip, iend-ip);
inikep8161e732016-09-05 12:29:51 +02001860 if (ZSTDv01_isError(errorCode)) return errorCode;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001861 ip += errorCode;
1862
1863 /* Regen sequences */
1864 {
1865 seq_t sequence;
1866 seqState_t seqState;
1867
1868 memset(&sequence, 0, sizeof(sequence));
1869 seqState.dumps = dumps;
1870 seqState.dumpsEnd = dumps + dumpsLength;
1871 seqState.prevOffset = 1;
1872 errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
inikep8161e732016-09-05 12:29:51 +02001873 if (FSE_isError(errorCode)) return ERROR(corruption_detected);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001874 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1875 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1876 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1877
1878 for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1879 {
1880 size_t oneSeqSize;
1881 nbSeq--;
1882 ZSTD_decodeSequence(&sequence, &seqState);
1883 oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
inikep8161e732016-09-05 12:29:51 +02001884 if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001885 op += oneSeqSize;
1886 }
1887
1888 /* check if reached exact end */
inikep8161e732016-09-05 12:29:51 +02001889 if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */
1890 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001891
1892 /* last literal segment */
1893 {
1894 size_t lastLLSize = litEnd - litPtr;
inikep8161e732016-09-05 12:29:51 +02001895 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001896 if (op != litPtr) memmove(op, litPtr, lastLLSize);
1897 op += lastLLSize;
1898 }
1899 }
1900
1901 return op-ostart;
1902}
1903
1904
1905static size_t ZSTD_decompressBlock(
1906 void* ctx,
1907 void* dst, size_t maxDstSize,
1908 const void* src, size_t srcSize)
1909{
1910 /* blockType == blockCompressed, srcSize is trusted */
1911 const BYTE* ip = (const BYTE*)src;
Yann Collet1fdd8232016-01-06 12:35:42 +01001912 const BYTE* litPtr = NULL;
1913 size_t litSize = 0;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001914 size_t errorCode;
1915
1916 /* Decode literals sub-block */
1917 errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
inikep8161e732016-09-05 12:29:51 +02001918 if (ZSTDv01_isError(errorCode)) return errorCode;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001919 ip += errorCode;
1920 srcSize -= errorCode;
1921
1922 return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1923}
1924
1925
1926size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1927{
1928 const BYTE* ip = (const BYTE*)src;
1929 const BYTE* iend = ip + srcSize;
1930 BYTE* const ostart = (BYTE* const)dst;
1931 BYTE* op = ostart;
1932 BYTE* const oend = ostart + maxDstSize;
1933 size_t remainingSize = srcSize;
1934 U32 magicNumber;
1935 size_t errorCode=0;
1936 blockProperties_t blockProperties;
1937
1938 /* Frame Header */
inikep8161e732016-09-05 12:29:51 +02001939 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001940 magicNumber = ZSTD_readBE32(src);
inikep8161e732016-09-05 12:29:51 +02001941 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001942 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1943
1944 /* Loop on each block */
1945 while (1)
1946 {
1947 size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
inikep8161e732016-09-05 12:29:51 +02001948 if (ZSTDv01_isError(blockSize)) return blockSize;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001949
1950 ip += ZSTD_blockHeaderSize;
1951 remainingSize -= ZSTD_blockHeaderSize;
inikep8161e732016-09-05 12:29:51 +02001952 if (blockSize > remainingSize) return ERROR(srcSize_wrong);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001953
1954 switch(blockProperties.blockType)
1955 {
1956 case bt_compressed:
1957 errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1958 break;
1959 case bt_raw :
1960 errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1961 break;
1962 case bt_rle :
inikep8161e732016-09-05 12:29:51 +02001963 return ERROR(GENERIC); /* not yet supported */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001964 break;
1965 case bt_end :
1966 /* end of frame */
inikep8161e732016-09-05 12:29:51 +02001967 if (remainingSize) return ERROR(srcSize_wrong);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001968 break;
1969 default:
inikep8161e732016-09-05 12:29:51 +02001970 return ERROR(GENERIC);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001971 }
1972 if (blockSize == 0) break; /* bt_end */
1973
inikep8161e732016-09-05 12:29:51 +02001974 if (ZSTDv01_isError(errorCode)) return errorCode;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01001975 op += errorCode;
1976 ip += blockSize;
1977 remainingSize -= blockSize;
1978 }
1979
1980 return op-ostart;
1981}
1982
1983size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1984{
1985 dctx_t ctx;
1986 ctx.base = dst;
1987 return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1988}
1989
1990
1991/*******************************
1992* Streaming Decompression API
1993*******************************/
1994
1995size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
1996{
1997 dctx->expected = ZSTD_frameHeaderSize;
1998 dctx->phase = 0;
1999 dctx->previousDstEnd = NULL;
2000 dctx->base = NULL;
2001 return 0;
2002}
2003
2004ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2005{
2006 ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2007 if (dctx==NULL) return NULL;
2008 ZSTDv01_resetDCtx(dctx);
2009 return dctx;
2010}
2011
2012size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2013{
2014 free(dctx);
2015 return 0;
2016}
2017
2018size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2019{
2020 return ((dctx_t*)dctx)->expected;
2021}
2022
2023size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2024{
2025 dctx_t* ctx = (dctx_t*)dctx;
2026
2027 /* Sanity check */
inikep8161e732016-09-05 12:29:51 +02002028 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002029 if (dst != ctx->previousDstEnd) /* not contiguous */
2030 ctx->base = dst;
2031
2032 /* Decompress : frame header */
2033 if (ctx->phase == 0)
2034 {
2035 /* Check frame magic header */
2036 U32 magicNumber = ZSTD_readBE32(src);
inikep8161e732016-09-05 12:29:51 +02002037 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002038 ctx->phase = 1;
2039 ctx->expected = ZSTD_blockHeaderSize;
2040 return 0;
2041 }
2042
2043 /* Decompress : block header */
2044 if (ctx->phase == 1)
2045 {
2046 blockProperties_t bp;
2047 size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
inikep8161e732016-09-05 12:29:51 +02002048 if (ZSTDv01_isError(blockSize)) return blockSize;
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002049 if (bp.blockType == bt_end)
2050 {
2051 ctx->expected = 0;
2052 ctx->phase = 0;
2053 }
2054 else
2055 {
2056 ctx->expected = blockSize;
2057 ctx->bType = bp.blockType;
2058 ctx->phase = 2;
2059 }
2060
2061 return 0;
2062 }
2063
2064 /* Decompress : block content */
2065 {
2066 size_t rSize;
2067 switch(ctx->bType)
2068 {
2069 case bt_compressed:
2070 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2071 break;
2072 case bt_raw :
2073 rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2074 break;
2075 case bt_rle :
inikep8161e732016-09-05 12:29:51 +02002076 return ERROR(GENERIC); /* not yet handled */
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002077 break;
2078 case bt_end : /* should never happen (filtered at phase 1) */
2079 rSize = 0;
2080 break;
2081 default:
inikep8161e732016-09-05 12:29:51 +02002082 return ERROR(GENERIC);
Yann Colletb1f3f4b2015-10-18 22:18:32 +01002083 }
2084 ctx->phase = 1;
2085 ctx->expected = ZSTD_blockHeaderSize;
2086 ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
2087 return rSize;
2088 }
2089
2090}