blob: 63684fc04bdd9a2b77179a013ebc361ed2c20ea3 [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/* ******************************************************************
2 FSE : Finite State Entropy coder
3 Copyright (C) 2013-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33****************************************************************** */
34
35#ifndef FSE_COMMONDEFS_ONLY
36
37/****************************************************************
38* Tuning parameters
39****************************************************************/
40/* MEMORY_USAGE :
41* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
42* Increasing memory usage improves compression ratio
43* Reduced memory usage can improve speed, due to cache effect
44* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
45#define FSE_MAX_MEMORY_USAGE 14
46#define FSE_DEFAULT_MEMORY_USAGE 13
47
48/* FSE_MAX_SYMBOL_VALUE :
49* Maximum symbol value authorized.
50* Required for proper stack allocation */
51#define FSE_MAX_SYMBOL_VALUE 255
52
53
54/****************************************************************
Yann Collete8c6bb12015-07-26 00:23:57 +010055* template functions type & suffix
Yann Collet4856a002015-01-24 01:58:16 +010056****************************************************************/
57#define FSE_FUNCTION_TYPE BYTE
58#define FSE_FUNCTION_EXTENSION
59
Yann Collete8c6bb12015-07-26 00:23:57 +010060
61/****************************************************************
62* Byte symbol type
63****************************************************************/
64typedef struct
65{
66 unsigned short newState;
67 unsigned char symbol;
68 unsigned char nbBits;
69} FSE_decode_t; /* size == U32 */
70
Yann Collet4856a002015-01-24 01:58:16 +010071#endif /* !FSE_COMMONDEFS_ONLY */
72
73
74/****************************************************************
75* Compiler specifics
76****************************************************************/
77#ifdef _MSC_VER /* Visual Studio */
78# define FORCE_INLINE static __forceinline
79# include <intrin.h> /* For Visual 2005 */
80# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
81# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
82#else
83# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
84# ifdef __GNUC__
85# define FORCE_INLINE static inline __attribute__((always_inline))
86# else
87# define FORCE_INLINE static inline
88# endif
89#endif
90
91
92/****************************************************************
93* Includes
94****************************************************************/
95#include <stdlib.h> /* malloc, free, qsort */
96#include <string.h> /* memcpy, memset */
97#include <stdio.h> /* printf (debug) */
98#include "fse_static.h"
99
100
Yann Colletad68a0e2015-02-26 00:29:15 +0100101#ifndef MEM_ACCESS_MODULE
102#define MEM_ACCESS_MODULE
Yann Collet4856a002015-01-24 01:58:16 +0100103/****************************************************************
104* Basic Types
105*****************************************************************/
106#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
107# include <stdint.h>
108typedef uint8_t BYTE;
109typedef uint16_t U16;
110typedef int16_t S16;
111typedef uint32_t U32;
112typedef int32_t S32;
113typedef uint64_t U64;
114typedef int64_t S64;
115#else
116typedef unsigned char BYTE;
117typedef unsigned short U16;
118typedef signed short S16;
119typedef unsigned int U32;
120typedef signed int S32;
121typedef unsigned long long U64;
122typedef signed long long S64;
123#endif
124
Yann Colletad68a0e2015-02-26 00:29:15 +0100125#endif /* MEM_ACCESS_MODULE */
Yann Collet4856a002015-01-24 01:58:16 +0100126
127/****************************************************************
128* Memory I/O
129*****************************************************************/
Yann Collete8c6bb12015-07-26 00:23:57 +0100130static unsigned FSE_32bits(void)
131{
132 return sizeof(void*)==4;
133}
134
Yann Collet4856a002015-01-24 01:58:16 +0100135static unsigned FSE_isLittleEndian(void)
136{
137 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
138 return one.c[0];
139}
140
Yann Collet138db212015-07-27 20:19:21 +0100141static U16 FSE_read16(const void* memPtr)
142{
143 U16 val;
144 memcpy(&val, memPtr, sizeof(val));
145 return val;
146}
147
148static U16 FSE_readLE16(const void* memPtr)
149{
150 if (FSE_isLittleEndian())
151 return FSE_read16(memPtr);
152 else
153 {
154 const BYTE* p = (const BYTE*)memPtr;
155 return (U16)(p[0] + (p[1]<<8));
156 }
157}
158
159static void FSE_writeLE16(void* memPtr, U16 val)
160{
161 if (FSE_isLittleEndian())
162 {
163 memcpy(memPtr, &val, sizeof(val));
164 }
165 else
166 {
167 BYTE* p = (BYTE*)memPtr;
168 p[0] = (BYTE)val;
169 p[1] = (BYTE)(val>>8);
170 }
171}
172
Yann Collet4856a002015-01-24 01:58:16 +0100173static U32 FSE_read32(const void* memPtr)
174{
175 U32 val32;
176 memcpy(&val32, memPtr, 4);
177 return val32;
178}
179
180static U32 FSE_readLE32(const void* memPtr)
181{
182 if (FSE_isLittleEndian())
183 return FSE_read32(memPtr);
184 else
185 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100186 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100187 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
188 }
189}
190
191static void FSE_writeLE32(void* memPtr, U32 val32)
192{
193 if (FSE_isLittleEndian())
194 {
195 memcpy(memPtr, &val32, 4);
196 }
197 else
198 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100199 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100200 p[0] = (BYTE)val32;
201 p[1] = (BYTE)(val32>>8);
202 p[2] = (BYTE)(val32>>16);
203 p[3] = (BYTE)(val32>>24);
204 }
205}
206
207static U64 FSE_read64(const void* memPtr)
208{
209 U64 val64;
210 memcpy(&val64, memPtr, 8);
211 return val64;
212}
213
214static U64 FSE_readLE64(const void* memPtr)
215{
216 if (FSE_isLittleEndian())
217 return FSE_read64(memPtr);
218 else
219 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100220 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100221 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
222 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
223 }
224}
225
226static void FSE_writeLE64(void* memPtr, U64 val64)
227{
228 if (FSE_isLittleEndian())
229 {
230 memcpy(memPtr, &val64, 8);
231 }
232 else
233 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100234 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100235 p[0] = (BYTE)val64;
236 p[1] = (BYTE)(val64>>8);
237 p[2] = (BYTE)(val64>>16);
238 p[3] = (BYTE)(val64>>24);
239 p[4] = (BYTE)(val64>>32);
240 p[5] = (BYTE)(val64>>40);
241 p[6] = (BYTE)(val64>>48);
242 p[7] = (BYTE)(val64>>56);
243 }
244}
245
246static size_t FSE_readLEST(const void* memPtr)
247{
Yann Collete8c6bb12015-07-26 00:23:57 +0100248 if (FSE_32bits())
Yann Colletfb814172015-01-25 13:19:12 +0100249 return (size_t)FSE_readLE32(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100250 else
Yann Colletfb814172015-01-25 13:19:12 +0100251 return (size_t)FSE_readLE64(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100252}
253
254static void FSE_writeLEST(void* memPtr, size_t val)
255{
Yann Collete8c6bb12015-07-26 00:23:57 +0100256 if (FSE_32bits())
Yann Collet4856a002015-01-24 01:58:16 +0100257 FSE_writeLE32(memPtr, (U32)val);
258 else
259 FSE_writeLE64(memPtr, (U64)val);
260}
261
262
263/****************************************************************
264* Constants
265*****************************************************************/
266#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
267#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
268#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
269#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
270#define FSE_MIN_TABLELOG 5
271
272#define FSE_TABLELOG_ABSOLUTE_MAX 15
273#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
274#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
275#endif
276
277
278/****************************************************************
279* Error Management
280****************************************************************/
281#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
282
283
284/****************************************************************
285* Complex types
286****************************************************************/
287typedef struct
288{
Yann Collet77c82b62015-08-02 01:19:09 +0100289 int deltaFindState;
290 U32 deltaNbBits;
291} FSE_symbolCompressionTransform; /* total 8 bytes */
Yann Collet4856a002015-01-24 01:58:16 +0100292
Yann Collet1efa31f2015-07-04 15:56:41 -0800293typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
294typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
Yann Collet4856a002015-01-24 01:58:16 +0100295
296/****************************************************************
297* Internal functions
298****************************************************************/
299FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
300{
301# if defined(_MSC_VER) /* Visual */
302 unsigned long r;
303 _BitScanReverse ( &r, val );
304 return (unsigned) r;
305# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
306 return 31 - __builtin_clz (val);
307# else /* Software version */
308 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
309 U32 v = val;
310 unsigned r;
311 v |= v >> 1;
312 v |= v >> 2;
313 v |= v >> 4;
314 v |= v >> 8;
315 v |= v >> 16;
316 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
317 return r;
318# endif
319}
320
321
Yann Collete8c6bb12015-07-26 00:23:57 +0100322/****************************************************************
323* Templates
324****************************************************************/
325/*
326 designed to be included
327 for type-specific functions (template emulation in C)
328 Objective is to write these functions only once, for improved maintenance
329*/
330
331/* safety checks */
332#ifndef FSE_FUNCTION_EXTENSION
333# error "FSE_FUNCTION_EXTENSION must be defined"
334#endif
335#ifndef FSE_FUNCTION_TYPE
336# error "FSE_FUNCTION_TYPE must be defined"
337#endif
338
339/* Function names */
340#define FSE_CAT(X,Y) X##Y
341#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
342#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
343
344
345/* Function templates */
346size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
347(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
348{
349 const FSE_FUNCTION_TYPE* ip = source;
350 const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
351 unsigned maxSymbolValue = *maxSymbolValuePtr;
352 unsigned max=0;
353 int s;
354
355 U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
356 U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
357 U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
358 U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
359
360 /* safety checks */
361 if (!sourceSize)
362 {
363 memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
364 *maxSymbolValuePtr = 0;
365 return 0;
366 }
367 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
368 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
369
370 if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
371 {
372 /* check input values, to avoid count table overflow */
373 while (ip < iend-3)
374 {
375 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
376 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
377 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
378 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
379 }
380 }
381 else
382 {
383 U32 cached = FSE_read32(ip); ip += 4;
384 while (ip < iend-15)
385 {
386 U32 c = cached; cached = FSE_read32(ip); ip += 4;
387 Counting1[(BYTE) c ]++;
388 Counting2[(BYTE)(c>>8) ]++;
389 Counting3[(BYTE)(c>>16)]++;
390 Counting4[ c>>24 ]++;
391 c = cached; cached = FSE_read32(ip); ip += 4;
392 Counting1[(BYTE) c ]++;
393 Counting2[(BYTE)(c>>8) ]++;
394 Counting3[(BYTE)(c>>16)]++;
395 Counting4[ c>>24 ]++;
396 c = cached; cached = FSE_read32(ip); ip += 4;
397 Counting1[(BYTE) c ]++;
398 Counting2[(BYTE)(c>>8) ]++;
399 Counting3[(BYTE)(c>>16)]++;
400 Counting4[ c>>24 ]++;
401 c = cached; cached = FSE_read32(ip); ip += 4;
402 Counting1[(BYTE) c ]++;
403 Counting2[(BYTE)(c>>8) ]++;
404 Counting3[(BYTE)(c>>16)]++;
405 Counting4[ c>>24 ]++;
406 }
407 ip-=4;
408 }
409
410 /* finish last symbols */
411 while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
412
413 for (s=0; s<=(int)maxSymbolValue; s++)
414 {
415 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
416 if (count[s] > max) max = count[s];
417 }
418
419 while (!count[maxSymbolValue]) maxSymbolValue--;
420 *maxSymbolValuePtr = maxSymbolValue;
421 return (size_t)max;
422}
423
424/* hidden fast variant (unsafe) */
425size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION)
426(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
427{
428 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
429}
430
431size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
432(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
433{
434 if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
435 {
436 *maxSymbolValuePtr = 255;
437 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
438 }
439 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 1);
440}
441
442
443static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
444
445size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
446(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
447{
448 const unsigned tableSize = 1 << tableLog;
449 const unsigned tableMask = tableSize - 1;
450 U16* tableU16 = ( (U16*) ct) + 2;
451 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)ct) + 1 + (tableLog ? tableSize>>1 : 1) );
452 const unsigned step = FSE_tableStep(tableSize);
453 unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
454 U32 position = 0;
455 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init not necessary, but analyzer complain about it */
456 U32 highThreshold = tableSize-1;
457 unsigned symbol;
458 unsigned i;
459
460 /* header */
461 tableU16[-2] = (U16) tableLog;
462 tableU16[-1] = (U16) maxSymbolValue;
463
464 /* For explanations on how to distribute symbol values over the table :
465 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
466
467 /* symbol start positions */
468 cumul[0] = 0;
469 for (i=1; i<=maxSymbolValue+1; i++)
470 {
471 if (normalizedCounter[i-1]==-1) /* Low prob symbol */
472 {
473 cumul[i] = cumul[i-1] + 1;
474 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
475 }
476 else
477 cumul[i] = cumul[i-1] + normalizedCounter[i-1];
478 }
479 cumul[maxSymbolValue+1] = tableSize+1;
480
481 /* Spread symbols */
482 for (symbol=0; symbol<=maxSymbolValue; symbol++)
483 {
484 int nbOccurences;
485 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
486 {
487 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
488 position = (position + step) & tableMask;
489 while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
490 }
491 }
492
493 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
494
495 /* Build table */
496 for (i=0; i<tableSize; i++)
497 {
498 FSE_FUNCTION_TYPE s = tableSymbol[i]; /* static analyzer doesn't understand tableSymbol is properly initialized */
Yann Colleta7875502015-08-07 15:21:00 +0100499 tableU16[cumul[s]++] = (U16) (tableSize+i); /* TableU16 : sorted by symbol order; gives next state value */
Yann Collete8c6bb12015-07-26 00:23:57 +0100500 }
501
502 /* Build Symbol Transformation Table */
503 {
504 unsigned s;
505 unsigned total = 0;
506 for (s=0; s<=maxSymbolValue; s++)
507 {
508 switch (normalizedCounter[s])
509 {
Yann Collet77c82b62015-08-02 01:19:09 +0100510 case 0:
Yann Collete8c6bb12015-07-26 00:23:57 +0100511 break;
512 case -1:
Yann Collet77c82b62015-08-02 01:19:09 +0100513 case 1:
Yann Colleta7875502015-08-07 15:21:00 +0100514 symbolTT[s].deltaNbBits = tableLog << 16;
Yann Collete8c6bb12015-07-26 00:23:57 +0100515 symbolTT[s].deltaFindState = total - 1;
516 total ++;
Yann Collete8c6bb12015-07-26 00:23:57 +0100517 break;
518 default :
Yann Collet77c82b62015-08-02 01:19:09 +0100519 {
520 U32 maxBitsOut = tableLog - FSE_highbit32 (normalizedCounter[s]-1);
521 U32 minStatePlus = normalizedCounter[s] << maxBitsOut;
522 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
523 symbolTT[s].deltaFindState = total - normalizedCounter[s];
524 total += normalizedCounter[s];
525 }
Yann Collete8c6bb12015-07-26 00:23:57 +0100526 }
527 }
528 }
529
530 return 0;
531}
532
533
534#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
535
536FSE_DTable* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
537{
538 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
539 return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
540}
541
542void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (FSE_DTable* dt)
543{
544 free(dt);
545}
546
Yann Colleta7875502015-08-07 15:21:00 +0100547typedef struct {
548 U16 tableLog;
549 U16 fastMode;
550} FSE_DTableHeader; /* sizeof U32 */
Yann Collete8c6bb12015-07-26 00:23:57 +0100551
552size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
553(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
554{
Yann Colleta7875502015-08-07 15:21:00 +0100555 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
556 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1); /* because dt is unsigned, 32-bits aligned on 32-bits */
Yann Collete8c6bb12015-07-26 00:23:57 +0100557 const U32 tableSize = 1 << tableLog;
558 const U32 tableMask = tableSize-1;
559 const U32 step = FSE_tableStep(tableSize);
560 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
561 U32 position = 0;
562 U32 highThreshold = tableSize-1;
563 const S16 largeLimit= (S16)(1 << (tableLog-1));
564 U32 noLarge = 1;
565 U32 s;
566
567 /* Sanity Checks */
568 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
569 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
570
571 /* Init, lay down lowprob symbols */
Yann Colleta7875502015-08-07 15:21:00 +0100572 DTableH[0].tableLog = (U16)tableLog;
Yann Collete8c6bb12015-07-26 00:23:57 +0100573 for (s=0; s<=maxSymbolValue; s++)
574 {
575 if (normalizedCounter[s]==-1)
576 {
577 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
578 symbolNext[s] = 1;
579 }
580 else
581 {
582 if (normalizedCounter[s] >= largeLimit) noLarge=0;
583 symbolNext[s] = normalizedCounter[s];
584 }
585 }
586
587 /* Spread symbols */
588 for (s=0; s<=maxSymbolValue; s++)
589 {
590 int i;
591 for (i=0; i<normalizedCounter[s]; i++)
592 {
593 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
594 position = (position + step) & tableMask;
595 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
596 }
597 }
598
599 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
600
601 /* Build Decoding table */
602 {
603 U32 i;
604 for (i=0; i<tableSize; i++)
605 {
606 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
607 U16 nextState = symbolNext[symbol]++;
608 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
609 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
610 }
611 }
612
Yann Colleta7875502015-08-07 15:21:00 +0100613 DTableH->fastMode = (U16)noLarge;
614 return 0;
Yann Collete8c6bb12015-07-26 00:23:57 +0100615}
616
617
618/******************************************
619* FSE byte symbol
620******************************************/
Yann Collet4856a002015-01-24 01:58:16 +0100621#ifndef FSE_COMMONDEFS_ONLY
622
623unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
624
625#define FSE_GENERATE_STRING(STRING) #STRING,
626static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
627
628const char* FSE_getErrorName(size_t code)
629{
630 static const char* codeError = "Unspecified error code";
631 if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
632 return codeError;
633}
634
635static short FSE_abs(short a)
636{
637 return a<0? -a : a;
638}
639
640
641/****************************************************************
642* Header bitstream management
643****************************************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -0800644size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100645{
646 size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
Yann Colleta7875502015-08-07 15:21:00 +0100647 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;
Yann Collet4856a002015-01-24 01:58:16 +0100648}
649
Yann Collet1efa31f2015-07-04 15:56:41 -0800650static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
Yann Collet4856a002015-01-24 01:58:16 +0100651 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
652 unsigned safeWrite)
653{
654 BYTE* const ostart = (BYTE*) header;
655 BYTE* out = ostart;
656 BYTE* const oend = ostart + headerBufferSize;
657 int nbBits;
658 const int tableSize = 1 << tableLog;
659 int remaining;
660 int threshold;
661 U32 bitStream;
662 int bitCount;
663 unsigned charnum = 0;
664 int previous0 = 0;
665
666 bitStream = 0;
667 bitCount = 0;
668 /* Table Size */
669 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
670 bitCount += 4;
671
672 /* Init */
673 remaining = tableSize+1; /* +1 for extra accuracy */
674 threshold = tableSize;
675 nbBits = tableLog+1;
676
677 while (remaining>1) /* stops at 1 */
678 {
679 if (previous0)
680 {
681 unsigned start = charnum;
682 while (!normalizedCounter[charnum]) charnum++;
683 while (charnum >= start+24)
684 {
685 start+=24;
Yann Collet213089c2015-06-18 07:43:16 -0800686 bitStream += 0xFFFFU << bitCount;
Yann Colleta7875502015-08-07 15:21:00 +0100687 if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
Yann Collet213089c2015-06-18 07:43:16 -0800688 out[0] = (BYTE) bitStream;
Yann Collet4856a002015-01-24 01:58:16 +0100689 out[1] = (BYTE)(bitStream>>8);
690 out+=2;
691 bitStream>>=16;
692 }
693 while (charnum >= start+3)
694 {
695 start+=3;
696 bitStream += 3 << bitCount;
697 bitCount += 2;
698 }
699 bitStream += (charnum-start) << bitCount;
700 bitCount += 2;
701 if (bitCount>16)
702 {
Yann Colleta7875502015-08-07 15:21:00 +0100703 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
Yann Collet4856a002015-01-24 01:58:16 +0100704 out[0] = (BYTE)bitStream;
705 out[1] = (BYTE)(bitStream>>8);
706 out += 2;
707 bitStream >>= 16;
708 bitCount -= 16;
709 }
710 }
711 {
712 short count = normalizedCounter[charnum++];
713 const short max = (short)((2*threshold-1)-remaining);
714 remaining -= FSE_abs(count);
Yann Collet213089c2015-06-18 07:43:16 -0800715 if (remaining<1) return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +0100716 count++; /* +1 for extra accuracy */
717 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
718 bitStream += count << bitCount;
719 bitCount += nbBits;
720 bitCount -= (count<max);
721 previous0 = (count==1);
722 while (remaining<threshold) nbBits--, threshold>>=1;
723 }
724 if (bitCount>16)
725 {
Yann Colleta7875502015-08-07 15:21:00 +0100726 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
Yann Collet4856a002015-01-24 01:58:16 +0100727 out[0] = (BYTE)bitStream;
728 out[1] = (BYTE)(bitStream>>8);
729 out += 2;
730 bitStream >>= 16;
731 bitCount -= 16;
732 }
733 }
734
735 /* flush remaining bitStream */
Yann Colleta7875502015-08-07 15:21:00 +0100736 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
Yann Collet4856a002015-01-24 01:58:16 +0100737 out[0] = (BYTE)bitStream;
738 out[1] = (BYTE)(bitStream>>8);
739 out+= (bitCount+7) /8;
740
Yann Collete8c6bb12015-07-26 00:23:57 +0100741 if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +0100742
743 return (out-ostart);
744}
745
746
Yann Collet1efa31f2015-07-04 15:56:41 -0800747size_t FSE_writeNCount (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100748{
749 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
750 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
751
Yann Collet1efa31f2015-07-04 15:56:41 -0800752 if (headerBufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
753 return FSE_writeNCount_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
Yann Collet4856a002015-01-24 01:58:16 +0100754
Yann Collet1efa31f2015-07-04 15:56:41 -0800755 return FSE_writeNCount_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
Yann Collet4856a002015-01-24 01:58:16 +0100756}
757
758
Yann Collet1efa31f2015-07-04 15:56:41 -0800759size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
Yann Collet4856a002015-01-24 01:58:16 +0100760 const void* headerBuffer, size_t hbSize)
761{
762 const BYTE* const istart = (const BYTE*) headerBuffer;
Yann Collet1efa31f2015-07-04 15:56:41 -0800763 const BYTE* const iend = istart + hbSize;
Yann Collet4856a002015-01-24 01:58:16 +0100764 const BYTE* ip = istart;
765 int nbBits;
766 int remaining;
767 int threshold;
768 U32 bitStream;
769 int bitCount;
770 unsigned charnum = 0;
771 int previous0 = 0;
772
Yann Collet1efa31f2015-07-04 15:56:41 -0800773 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
Yann Collet4856a002015-01-24 01:58:16 +0100774 bitStream = FSE_readLE32(ip);
775 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
776 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
777 bitStream >>= 4;
778 bitCount = 4;
779 *tableLogPtr = nbBits;
780 remaining = (1<<nbBits)+1;
781 threshold = 1<<nbBits;
782 nbBits++;
783
784 while ((remaining>1) && (charnum<=*maxSVPtr))
785 {
786 if (previous0)
787 {
788 unsigned n0 = charnum;
789 while ((bitStream & 0xFFFF) == 0xFFFF)
790 {
791 n0+=24;
792 ip+=2;
793 bitStream = FSE_readLE32(ip) >> bitCount;
794 }
795 while ((bitStream & 3) == 3)
796 {
797 n0+=3;
798 bitStream>>=2;
799 bitCount+=2;
800 }
801 n0 += bitStream & 3;
802 bitCount += 2;
Yann Collet1efa31f2015-07-04 15:56:41 -0800803 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
Yann Collet4856a002015-01-24 01:58:16 +0100804 while (charnum < n0) normalizedCounter[charnum++] = 0;
805 ip += bitCount>>3;
806 bitCount &= 7;
807 bitStream = FSE_readLE32(ip) >> bitCount;
808 }
809 {
810 const short max = (short)((2*threshold-1)-remaining);
811 short count;
812
813 if ((bitStream & (threshold-1)) < (U32)max)
814 {
815 count = (short)(bitStream & (threshold-1));
816 bitCount += nbBits-1;
817 }
818 else
819 {
820 count = (short)(bitStream & (2*threshold-1));
821 if (count >= threshold) count -= max;
822 bitCount += nbBits;
823 }
824
825 count--; /* extra accuracy */
826 remaining -= FSE_abs(count);
827 normalizedCounter[charnum++] = count;
828 previous0 = !count;
829 while (remaining < threshold)
830 {
831 nbBits--;
832 threshold >>= 1;
833 }
834
Yann Collet1efa31f2015-07-04 15:56:41 -0800835 {
836 const BYTE* itarget = ip + (bitCount>>3);
837 if (itarget > iend - 4)
838 {
839 ip = iend - 4;
840 bitCount -= (int)(8 * (iend - 4 - ip));
841 }
842 else
843 {
844 ip = itarget;
845 bitCount &= 7;
846 }
847 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
848 }
Yann Collet4856a002015-01-24 01:58:16 +0100849 }
850 }
851 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
852 *maxSVPtr = charnum-1;
853
Yann Collet1efa31f2015-07-04 15:56:41 -0800854 ip += (bitCount+7)>>3;
855 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
Yann Collet4856a002015-01-24 01:58:16 +0100856 return ip-istart;
857}
858
859
860/****************************************************************
861* FSE Compression Code
862****************************************************************/
863/*
Yann Collet1efa31f2015-07-04 15:56:41 -0800864FSE_CTable[0] is a variable size structure which contains :
Yann Collet4856a002015-01-24 01:58:16 +0100865 U16 tableLog;
866 U16 maxSymbolValue;
867 U16 nextStateNumber[1 << tableLog]; // This size is variable
868 FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1]; // This size is variable
869Allocation is manual, since C standard does not support variable-size structures.
870*/
871
872size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
873{
874 size_t size;
875 FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
876 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;
877 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
878 return size;
879}
880
Yann Collet1efa31f2015-07-04 15:56:41 -0800881FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100882{
883 size_t size;
884 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
885 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
Yann Collet1efa31f2015-07-04 15:56:41 -0800886 return (FSE_CTable*)malloc(size);
Yann Collet4856a002015-01-24 01:58:16 +0100887}
888
Yann Collet1efa31f2015-07-04 15:56:41 -0800889void FSE_freeCTable (FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +0100890{
Yann Collet1efa31f2015-07-04 15:56:41 -0800891 free(ct);
Yann Collet4856a002015-01-24 01:58:16 +0100892}
893
Yann Collet4856a002015-01-24 01:58:16 +0100894
895unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
896{
897 U32 tableLog = maxTableLog;
898 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
899 if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
Yann Colletbbfa7d72015-03-23 23:15:22 +0100900 if ((FSE_highbit32(maxSymbolValue)+2) > tableLog) tableLog = FSE_highbit32(maxSymbolValue)+2; /* Need a minimum to safely represent all symbol values */
Yann Collet4856a002015-01-24 01:58:16 +0100901 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
902 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
903 return tableLog;
904}
905
906
Yann Collet26aa1ec2015-02-24 09:05:58 +0100907/* Secondary normalization method.
908 To be used when primary method fails. */
909
910static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
Yann Colleta3c75ba2015-02-21 03:31:59 +0100911{
912 U32 s;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100913 U32 distributed = 0;
914 U32 ToDistribute;
915
916 /* Init */
Yann Colleta3c75ba2015-02-21 03:31:59 +0100917 U32 lowThreshold = (U32)(total >> tableLog);
918 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
919
920 for (s=0; s<=maxSymbolValue; s++)
921 {
922 if (count[s] == 0)
923 {
924 norm[s]=0;
925 continue;
926 }
927 if (count[s] <= lowThreshold)
928 {
929 norm[s] = -1;
930 distributed++;
931 total -= count[s];
932 continue;
933 }
934 if (count[s] <= lowOne)
935 {
936 norm[s] = 1;
937 distributed++;
938 total -= count[s];
939 continue;
940 }
941 norm[s]=-2;
942 }
943 ToDistribute = (1 << tableLog) - distributed;
944
945 if ((total / ToDistribute) > lowOne)
946 {
947 /* risk of rounding to zero */
948 lowOne = (U32)((total * 3) / (ToDistribute * 2));
949 for (s=0; s<=maxSymbolValue; s++)
950 {
951 if ((norm[s] == -2) && (count[s] <= lowOne))
952 {
953 norm[s] = 1;
954 distributed++;
955 total -= count[s];
956 continue;
957 }
958 }
959 ToDistribute = (1 << tableLog) - distributed;
960 }
961
962 if (distributed == maxSymbolValue+1)
963 {
Yann Collet26aa1ec2015-02-24 09:05:58 +0100964 /* all values are pretty poor;
965 probably incompressible data (should have already been detected);
966 find max, then give all remaining points to max */
Yann Colleta3c75ba2015-02-21 03:31:59 +0100967 U32 maxV = 0, maxC =0;
968 for (s=0; s<=maxSymbolValue; s++)
969 if (count[s] > maxC) maxV=s, maxC=count[s];
Yann Collet1efa31f2015-07-04 15:56:41 -0800970 norm[maxV] += (short)ToDistribute;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100971 return 0;
972 }
973
974 {
975 U64 const vStepLog = 62 - tableLog;
976 U64 const mid = (1ULL << (vStepLog-1)) - 1;
977 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
978 U64 tmpTotal = mid;
979 for (s=0; s<=maxSymbolValue; s++)
980 {
981 if (norm[s]==-2)
982 {
983 U64 end = tmpTotal + (count[s] * rStep);
984 U32 sStart = (U32)(tmpTotal >> vStepLog);
985 U32 sEnd = (U32)(end >> vStepLog);
986 U32 weight = sEnd - sStart;
987 if (weight < 1)
988 return (size_t)-FSE_ERROR_GENERIC;
Yann Collet213089c2015-06-18 07:43:16 -0800989 norm[s] = (short)weight;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100990 tmpTotal = end;
991 }
992 }
993 }
994
995 return 0;
996}
Yann Colleta3c75ba2015-02-21 03:31:59 +0100997
Yann Collet4856a002015-01-24 01:58:16 +0100998
999size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
1000 const unsigned* count, size_t total,
1001 unsigned maxSymbolValue)
1002{
1003 /* Sanity checks */
1004 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
1005 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
1006 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
1007 if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
1008
1009 {
1010 U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
1011 U64 const scale = 62 - tableLog;
1012 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
1013 U64 const vStep = 1ULL<<(scale-20);
1014 int stillToDistribute = 1<<tableLog;
1015 unsigned s;
1016 unsigned largest=0;
1017 short largestP=0;
1018 U32 lowThreshold = (U32)(total >> tableLog);
1019
1020 for (s=0; s<=maxSymbolValue; s++)
1021 {
1022 if (count[s] == total) return 0;
1023 if (count[s] == 0)
1024 {
1025 normalizedCounter[s]=0;
1026 continue;
1027 }
1028 if (count[s] <= lowThreshold)
1029 {
1030 normalizedCounter[s] = -1;
1031 stillToDistribute--;
1032 }
1033 else
1034 {
1035 short proba = (short)((count[s]*step) >> scale);
1036 if (proba<8)
1037 {
Yann Collet2ddf7e92015-02-08 20:26:47 +01001038 U64 restToBeat = vStep * rtbTable[proba];
Yann Collet4856a002015-01-24 01:58:16 +01001039 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
1040 }
1041 if (proba > largestP)
1042 {
1043 largestP=proba;
1044 largest=s;
1045 }
1046 normalizedCounter[s] = proba;
1047 stillToDistribute -= proba;
1048 }
1049 }
Yann Collet4856a002015-01-24 01:58:16 +01001050 if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
1051 {
Yann Collet26aa1ec2015-02-24 09:05:58 +01001052 /* corner case, need another normalization method */
1053 size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
Yann Collet565b81d2015-01-29 06:51:30 +01001054 if (FSE_isError(errorCode)) return errorCode;
Yann Collet4856a002015-01-24 01:58:16 +01001055 }
1056 else normalizedCounter[largest] += (short)stillToDistribute;
1057 }
1058
1059#if 0
1060 { /* Print Table (debug) */
Yann Collet565b81d2015-01-29 06:51:30 +01001061 U32 s;
1062 U32 nTotal = 0;
Yann Collet4856a002015-01-24 01:58:16 +01001063 for (s=0; s<=maxSymbolValue; s++)
1064 printf("%3i: %4i \n", s, normalizedCounter[s]);
Yann Collet565b81d2015-01-29 06:51:30 +01001065 for (s=0; s<=maxSymbolValue; s++)
1066 nTotal += abs(normalizedCounter[s]);
1067 if (nTotal != (1U<<tableLog))
1068 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001069 getchar();
1070 }
1071#endif
1072
1073 return tableLog;
1074}
1075
1076
Yann Collet1efa31f2015-07-04 15:56:41 -08001077/* fake FSE_CTable, for raw (uncompressed) input */
1078size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001079{
1080 const unsigned tableSize = 1 << nbBits;
1081 const unsigned tableMask = tableSize - 1;
1082 const unsigned maxSymbolValue = tableMask;
Yann Collet1efa31f2015-07-04 15:56:41 -08001083 U16* tableU16 = ( (U16*) ct) + 2;
1084 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)ct)+1) + (tableSize>>1));
Yann Collet4856a002015-01-24 01:58:16 +01001085 unsigned s;
1086
1087 /* Sanity checks */
1088 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
Yann Collet4856a002015-01-24 01:58:16 +01001089
1090 /* header */
1091 tableU16[-2] = (U16) nbBits;
1092 tableU16[-1] = (U16) maxSymbolValue;
1093
1094 /* Build table */
1095 for (s=0; s<tableSize; s++)
1096 tableU16[s] = (U16)(tableSize + s);
1097
1098 /* Build Symbol Transformation Table */
1099 for (s=0; s<=maxSymbolValue; s++)
1100 {
Yann Collet77c82b62015-08-02 01:19:09 +01001101 symbolTT[s].deltaNbBits = nbBits << 16;
Yann Collet4856a002015-01-24 01:58:16 +01001102 symbolTT[s].deltaFindState = s-1;
Yann Collet4856a002015-01-24 01:58:16 +01001103 }
1104
1105 return 0;
1106}
1107
1108
Yann Collet1efa31f2015-07-04 15:56:41 -08001109/* fake FSE_CTable, for rle (100% always same symbol) input */
1110size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
Yann Collet4856a002015-01-24 01:58:16 +01001111{
Yann Collet1efa31f2015-07-04 15:56:41 -08001112 U16* tableU16 = ( (U16*) ct) + 2;
1113 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)ct + 2);
Yann Collet4856a002015-01-24 01:58:16 +01001114
1115 /* header */
1116 tableU16[-2] = (U16) 0;
1117 tableU16[-1] = (U16) symbolValue;
1118
1119 /* Build table */
1120 tableU16[0] = 0;
1121 tableU16[1] = 0; /* just in case */
1122
1123 /* Build Symbol Transformation Table */
1124 {
Yann Collet77c82b62015-08-02 01:19:09 +01001125 symbolTT[symbolValue].deltaNbBits = 0;
Yann Collet4856a002015-01-24 01:58:16 +01001126 symbolTT[symbolValue].deltaFindState = 0;
Yann Collet4856a002015-01-24 01:58:16 +01001127 }
1128
1129 return 0;
1130}
1131
1132
Yann Colleta7875502015-08-07 15:21:00 +01001133size_t FSE_initCStream(FSE_CStream_t* bitC, void* start, size_t maxSize)
Yann Collet4856a002015-01-24 01:58:16 +01001134{
Yann Colleta7875502015-08-07 15:21:00 +01001135 if (maxSize < 8) return (size_t)-FSE_ERROR_dstSize_tooSmall;
Yann Collet4856a002015-01-24 01:58:16 +01001136 bitC->bitContainer = 0;
Yann Colleta7875502015-08-07 15:21:00 +01001137 bitC->bitPos = 0;
Yann Collet4856a002015-01-24 01:58:16 +01001138 bitC->startPtr = (char*)start;
1139 bitC->ptr = bitC->startPtr;
Yann Colleta7875502015-08-07 15:21:00 +01001140 bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr);
1141 return 0;
Yann Collet4856a002015-01-24 01:58:16 +01001142}
1143
Yann Collet1efa31f2015-07-04 15:56:41 -08001144void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +01001145{
Yann Collet1efa31f2015-07-04 15:56:41 -08001146 const U32 tableLog = ( (const U16*) ct) [0];
Yann Collet4856a002015-01-24 01:58:16 +01001147 statePtr->value = (ptrdiff_t)1<<tableLog;
Yann Collet1efa31f2015-07-04 15:56:41 -08001148 statePtr->stateTable = ((const U16*) ct) + 2;
1149 statePtr->symbolTT = (const FSE_symbolCompressionTransform*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
Yann Collet4856a002015-01-24 01:58:16 +01001150 statePtr->stateLog = tableLog;
1151}
1152
Yann Collete8c6bb12015-07-26 00:23:57 +01001153void FSE_addBitsFast(FSE_CStream_t* bitC, size_t value, unsigned nbBits) /* only use if upper bits are clean 0 */
1154{
1155 bitC->bitContainer |= value << bitC->bitPos;
1156 bitC->bitPos += nbBits;
1157}
1158
Yann Collet4856a002015-01-24 01:58:16 +01001159void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
1160{
1161 static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */
1162 bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
1163 bitC->bitPos += nbBits;
1164}
1165
Yann Collet77c82b62015-08-02 01:19:09 +01001166void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
Yann Collet4856a002015-01-24 01:58:16 +01001167{
Yann Collet1efa31f2015-07-04 15:56:41 -08001168 const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
1169 const U16* const stateTable = (const U16*)(statePtr->stateTable);
Yann Collet77c82b62015-08-02 01:19:09 +01001170 U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
Yann Collet4856a002015-01-24 01:58:16 +01001171 FSE_addBits(bitC, statePtr->value, nbBitsOut);
Yann Collet1efa31f2015-07-04 15:56:41 -08001172 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
Yann Collet4856a002015-01-24 01:58:16 +01001173}
1174
Yann Colleta7875502015-08-07 15:21:00 +01001175void FSE_flushBitsFast(FSE_CStream_t* bitC) /* only if dst buffer is large enough ( >= FSE_compressBound()) */
1176{
1177 size_t nbBytes = bitC->bitPos >> 3;
1178 FSE_writeLEST(bitC->ptr, bitC->bitContainer);
1179 bitC->ptr += nbBytes;
1180 bitC->bitPos &= 7;
1181 bitC->bitContainer >>= nbBytes*8;
1182}
1183
Yann Collet4856a002015-01-24 01:58:16 +01001184void FSE_flushBits(FSE_CStream_t* bitC)
1185{
1186 size_t nbBytes = bitC->bitPos >> 3;
1187 FSE_writeLEST(bitC->ptr, bitC->bitContainer);
Yann Collet4856a002015-01-24 01:58:16 +01001188 bitC->ptr += nbBytes;
Yann Colleta7875502015-08-07 15:21:00 +01001189 if (bitC->ptr <= bitC->endPtr)
1190 {
1191 bitC->bitPos &= 7;
1192 bitC->bitContainer >>= nbBytes*8;
1193 return;
1194 }
Yann Collet4856a002015-01-24 01:58:16 +01001195}
1196
1197void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr)
1198{
1199 FSE_addBits(bitC, statePtr->value, statePtr->stateLog);
1200 FSE_flushBits(bitC);
1201}
1202
1203
1204size_t FSE_closeCStream(FSE_CStream_t* bitC)
1205{
1206 char* endPtr;
1207
Yann Colleta7875502015-08-07 15:21:00 +01001208 FSE_addBitsFast(bitC, 1, 1);
Yann Collet4856a002015-01-24 01:58:16 +01001209 FSE_flushBits(bitC);
1210
Yann Colleta7875502015-08-07 15:21:00 +01001211 if (bitC->bitPos > 7) /* still some data to flush => too close to buffer's end */
1212 return 0; /* not compressible */
1213
Yann Collet4856a002015-01-24 01:58:16 +01001214 endPtr = bitC->ptr;
1215 endPtr += bitC->bitPos > 0;
1216
1217 return (endPtr - bitC->startPtr);
1218}
1219
1220
Yann Colleta7875502015-08-07 15:21:00 +01001221static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
Yann Collet4856a002015-01-24 01:58:16 +01001222 const void* src, size_t srcSize,
Yann Colleta7875502015-08-07 15:21:00 +01001223 const FSE_CTable* ct, const unsigned fast)
Yann Collet4856a002015-01-24 01:58:16 +01001224{
1225 const BYTE* const istart = (const BYTE*) src;
1226 const BYTE* ip;
1227 const BYTE* const iend = istart + srcSize;
1228
Yann Colleta7875502015-08-07 15:21:00 +01001229 size_t errorCode;
Yann Collet4856a002015-01-24 01:58:16 +01001230 FSE_CStream_t bitC;
1231 FSE_CState_t CState1, CState2;
1232
1233
1234 /* init */
Yann Colleta7875502015-08-07 15:21:00 +01001235 errorCode = FSE_initCStream(&bitC, dst, dstSize);
1236 if (FSE_isError(errorCode)) return 0;
Yann Collet1efa31f2015-07-04 15:56:41 -08001237 FSE_initCState(&CState1, ct);
Yann Collet4856a002015-01-24 01:58:16 +01001238 CState2 = CState1;
1239
1240 ip=iend;
1241
Yann Colleta7875502015-08-07 15:21:00 +01001242#define FSE_FLUSHBITS(s) (fast ? FSE_flushBitsFast(s) : FSE_flushBits(s))
1243
Yann Collet4856a002015-01-24 01:58:16 +01001244 /* join to even */
1245 if (srcSize & 1)
1246 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001247 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Colleta7875502015-08-07 15:21:00 +01001248 FSE_FLUSHBITS(&bitC);
Yann Collet4856a002015-01-24 01:58:16 +01001249 }
1250
1251 /* join to mod 4 */
Yann Collet77c82b62015-08-02 01:19:09 +01001252 if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
Yann Collet4856a002015-01-24 01:58:16 +01001253 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001254 FSE_encodeSymbol(&bitC, &CState2, *--ip);
1255 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Colleta7875502015-08-07 15:21:00 +01001256 FSE_FLUSHBITS(&bitC);
Yann Collet4856a002015-01-24 01:58:16 +01001257 }
1258
1259 /* 2 or 4 encoding per loop */
Yann Colleta7875502015-08-07 15:21:00 +01001260 for ( ; ip>istart ; )
Yann Collet4856a002015-01-24 01:58:16 +01001261 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001262 FSE_encodeSymbol(&bitC, &CState2, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001263
Yann Collet77c82b62015-08-02 01:19:09 +01001264 if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
Yann Colleta7875502015-08-07 15:21:00 +01001265 FSE_FLUSHBITS(&bitC);
Yann Collet4856a002015-01-24 01:58:16 +01001266
Yann Collet1efa31f2015-07-04 15:56:41 -08001267 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001268
Yann Collet77c82b62015-08-02 01:19:09 +01001269 if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
Yann Collet4856a002015-01-24 01:58:16 +01001270 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001271 FSE_encodeSymbol(&bitC, &CState2, *--ip);
1272 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001273 }
1274
Yann Colleta7875502015-08-07 15:21:00 +01001275 FSE_FLUSHBITS(&bitC);
Yann Collet4856a002015-01-24 01:58:16 +01001276 }
1277
1278 FSE_flushCState(&bitC, &CState2);
1279 FSE_flushCState(&bitC, &CState1);
1280 return FSE_closeCStream(&bitC);
1281}
1282
Yann Colleta7875502015-08-07 15:21:00 +01001283size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
1284 const void* src, size_t srcSize,
1285 const FSE_CTable* ct)
1286{
1287 const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
1288
1289 if (fast)
1290 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
1291 else
1292 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
1293}
1294
Yann Collet4856a002015-01-24 01:58:16 +01001295
Yann Collet4856a002015-01-24 01:58:16 +01001296size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
1297
Yann Collet4856a002015-01-24 01:58:16 +01001298size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
1299{
1300 const BYTE* const istart = (const BYTE*) src;
1301 const BYTE* ip = istart;
1302
1303 BYTE* const ostart = (BYTE*) dst;
1304 BYTE* op = ostart;
1305 BYTE* const oend = ostart + dstSize;
1306
1307 U32 count[FSE_MAX_SYMBOL_VALUE+1];
1308 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
Yann Collet1efa31f2015-07-04 15:56:41 -08001309 CTable_max_t ct;
Yann Collet4856a002015-01-24 01:58:16 +01001310 size_t errorCode;
1311
Yann Colleta7875502015-08-07 15:21:00 +01001312 /* init conditions */
1313 if (srcSize <= 1) return 0; /* Uncompressible */
Yann Collet4856a002015-01-24 01:58:16 +01001314 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1315 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
1316
1317 /* Scan input and build symbol stats */
Yann Collet1efa31f2015-07-04 15:56:41 -08001318 errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
Yann Collet4856a002015-01-24 01:58:16 +01001319 if (FSE_isError(errorCode)) return errorCode;
Yann Colleta3c75ba2015-02-21 03:31:59 +01001320 if (errorCode == srcSize) return 1;
1321 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
Yann Collet4856a002015-01-24 01:58:16 +01001322
1323 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
1324 errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
1325 if (FSE_isError(errorCode)) return errorCode;
1326
1327 /* Write table description header */
Yann Colleta7875502015-08-07 15:21:00 +01001328 errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001329 if (FSE_isError(errorCode)) return errorCode;
1330 op += errorCode;
1331
1332 /* Compress */
Yann Collet1efa31f2015-07-04 15:56:41 -08001333 errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001334 if (FSE_isError(errorCode)) return errorCode;
Yann Colleta7875502015-08-07 15:21:00 +01001335 errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
1336 if (errorCode == 0) return 0; /* not enough space for compressed data */
1337 op += errorCode;
Yann Collet4856a002015-01-24 01:58:16 +01001338
1339 /* check compressibility */
1340 if ( (size_t)(op-ostart) >= srcSize-1 )
1341 return 0;
1342
1343 return op-ostart;
1344}
1345
Yann Collet4856a002015-01-24 01:58:16 +01001346size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
1347{
1348 return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
1349}
1350
1351
1352/*********************************************************
1353* Decompression (Byte symbols)
1354*********************************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -08001355size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
Yann Collet4856a002015-01-24 01:58:16 +01001356{
Yann Colleta7875502015-08-07 15:21:00 +01001357 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
1358 FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */
Yann Collet4856a002015-01-24 01:58:16 +01001359
Yann Colleta7875502015-08-07 15:21:00 +01001360 DTableH->tableLog = 0;
1361 DTableH->fastMode = 0;
Yann Collet4856a002015-01-24 01:58:16 +01001362
1363 cell->newState = 0;
1364 cell->symbol = symbolValue;
1365 cell->nbBits = 0;
1366
1367 return 0;
1368}
1369
1370
Yann Collet1efa31f2015-07-04 15:56:41 -08001371size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001372{
Yann Colleta7875502015-08-07 15:21:00 +01001373 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
1374 FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */
Yann Collet4856a002015-01-24 01:58:16 +01001375 const unsigned tableSize = 1 << nbBits;
1376 const unsigned tableMask = tableSize - 1;
1377 const unsigned maxSymbolValue = tableMask;
1378 unsigned s;
1379
1380 /* Sanity checks */
1381 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
Yann Collet4856a002015-01-24 01:58:16 +01001382
1383 /* Build Decoding Table */
Yann Colleta7875502015-08-07 15:21:00 +01001384 DTableH->tableLog = (U16)nbBits;
1385 DTableH->fastMode = 1;
Yann Collet4856a002015-01-24 01:58:16 +01001386 for (s=0; s<=maxSymbolValue; s++)
1387 {
1388 dinfo[s].newState = 0;
1389 dinfo[s].symbol = (BYTE)s;
1390 dinfo[s].nbBits = (BYTE)nbBits;
1391 }
1392
1393 return 0;
1394}
1395
1396
1397/* FSE_initDStream
1398 * Initialize a FSE_DStream_t.
1399 * srcBuffer must point at the beginning of an FSE block.
1400 * The function result is the size of the FSE_block (== srcSize).
1401 * If srcSize is too small, the function will return an errorCode;
1402 */
1403size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
1404{
1405 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1406
Yann Collet1efa31f2015-07-04 15:56:41 -08001407 if (srcSize >= sizeof(size_t))
Yann Collet4856a002015-01-24 01:58:16 +01001408 {
1409 U32 contain32;
Yann Collet213089c2015-06-18 07:43:16 -08001410 bitD->start = (const char*)srcBuffer;
Yann Collet1efa31f2015-07-04 15:56:41 -08001411 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
Yann Collet4856a002015-01-24 01:58:16 +01001412 bitD->bitContainer = FSE_readLEST(bitD->ptr);
Yann Collet213089c2015-06-18 07:43:16 -08001413 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
Yann Collet4856a002015-01-24 01:58:16 +01001414 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1415 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1416 }
1417 else
1418 {
1419 U32 contain32;
Yann Collet213089c2015-06-18 07:43:16 -08001420 bitD->start = (const char*)srcBuffer;
Yann Collet4856a002015-01-24 01:58:16 +01001421 bitD->ptr = bitD->start;
Yann Collet213089c2015-06-18 07:43:16 -08001422 bitD->bitContainer = *(const BYTE*)(bitD->start);
Yann Collet4856a002015-01-24 01:58:16 +01001423 switch(srcSize)
1424 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001425 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
1426 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
1427 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
1428 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
1429 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
1430 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
Yann Collet4856a002015-01-24 01:58:16 +01001431 default:;
1432 }
Yann Collet213089c2015-06-18 07:43:16 -08001433 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
Yann Collet4856a002015-01-24 01:58:16 +01001434 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1435 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
Yann Collet1efa31f2015-07-04 15:56:41 -08001436 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
Yann Collet4856a002015-01-24 01:58:16 +01001437 }
1438
1439 return srcSize;
1440}
1441
1442
Yann Collete8c6bb12015-07-26 00:23:57 +01001443/* FSE_lookBits
1444 * Provides next n bits from the bitContainer.
1445 * bitContainer is not modified (bits are still present for next read/look)
1446 * On 32-bits, maxNbBits==25
1447 * On 64-bits, maxNbBits==57
1448 * return : value extracted.
1449 */
1450static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
1451{
1452 return ((bitD->bitContainer << (bitD->bitsConsumed & ((sizeof(bitD->bitContainer)*8)-1))) >> 1) >> (((sizeof(bitD->bitContainer)*8)-1)-nbBits);
1453}
1454
1455static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
1456{
1457 return (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD->bitContainer)*8)-nbBits);
1458}
1459
1460static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
1461{
1462 bitD->bitsConsumed += nbBits;
1463}
1464
1465
Yann Collet4856a002015-01-24 01:58:16 +01001466/* FSE_readBits
1467 * Read next n bits from the bitContainer.
Yann Collet213089c2015-06-18 07:43:16 -08001468 * On 32-bits, don't read more than maxNbBits==25
1469 * On 64-bits, don't read more than maxNbBits==57
1470 * Use the fast variant *only* if n >= 1.
Yann Collet4856a002015-01-24 01:58:16 +01001471 * return : value extracted.
1472 */
Yann Collet1efa31f2015-07-04 15:56:41 -08001473size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001474{
Yann Collete8c6bb12015-07-26 00:23:57 +01001475 size_t value = FSE_lookBits(bitD, nbBits);
1476 FSE_skipBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001477 return value;
1478}
1479
Yann Collet1efa31f2015-07-04 15:56:41 -08001480size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
Yann Collet4856a002015-01-24 01:58:16 +01001481{
Yann Collete8c6bb12015-07-26 00:23:57 +01001482 size_t value = FSE_lookBitsFast(bitD, nbBits);
1483 FSE_skipBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001484 return value;
1485}
1486
1487unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
1488{
Yann Collete8c6bb12015-07-26 00:23:57 +01001489 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
Yann Collet4856a002015-01-24 01:58:16 +01001490 {
1491 bitD->ptr -= bitD->bitsConsumed >> 3;
1492 bitD->bitsConsumed &= 7;
1493 bitD->bitContainer = FSE_readLEST(bitD->ptr);
Yann Collete8c6bb12015-07-26 00:23:57 +01001494 return FSE_DStream_unfinished;
Yann Collet4856a002015-01-24 01:58:16 +01001495 }
1496 if (bitD->ptr == bitD->start)
1497 {
Yann Colleta7875502015-08-07 15:21:00 +01001498 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
Yann Collete8c6bb12015-07-26 00:23:57 +01001499 if (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8) return FSE_DStream_completed;
1500 return FSE_DStream_tooFar;
Yann Collet4856a002015-01-24 01:58:16 +01001501 }
1502 {
1503 U32 nbBytes = bitD->bitsConsumed >> 3;
Yann Collete8c6bb12015-07-26 00:23:57 +01001504 U32 result = FSE_DStream_unfinished;
Yann Collet4856a002015-01-24 01:58:16 +01001505 if (bitD->ptr - nbBytes < bitD->start)
Yann Collete8c6bb12015-07-26 00:23:57 +01001506 {
Yann Collet4856a002015-01-24 01:58:16 +01001507 nbBytes = (U32)(bitD->ptr - bitD->start); /* note : necessarily ptr > start */
Yann Colleta7875502015-08-07 15:21:00 +01001508 result = FSE_DStream_endOfBuffer;
Yann Collete8c6bb12015-07-26 00:23:57 +01001509 }
Yann Collet4856a002015-01-24 01:58:16 +01001510 bitD->ptr -= nbBytes;
1511 bitD->bitsConsumed -= nbBytes*8;
1512 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* note : necessarily srcSize > sizeof(bitD) */
Yann Collete8c6bb12015-07-26 00:23:57 +01001513 return result;
Yann Collet4856a002015-01-24 01:58:16 +01001514 }
1515}
1516
1517
Yann Collet1efa31f2015-07-04 15:56:41 -08001518void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
Yann Collet4856a002015-01-24 01:58:16 +01001519{
Yann Colleta7875502015-08-07 15:21:00 +01001520 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
1521 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001522 FSE_reloadDStream(bitD);
Yann Colleta7875502015-08-07 15:21:00 +01001523 DStatePtr->table = dt + 1;
Yann Collet4856a002015-01-24 01:58:16 +01001524}
1525
1526BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1527{
1528 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1529 const U32 nbBits = DInfo.nbBits;
1530 BYTE symbol = DInfo.symbol;
Yann Collet1efa31f2015-07-04 15:56:41 -08001531 size_t lowBits = FSE_readBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001532
1533 DStatePtr->state = DInfo.newState + lowBits;
1534 return symbol;
1535}
1536
1537BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1538{
1539 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1540 const U32 nbBits = DInfo.nbBits;
1541 BYTE symbol = DInfo.symbol;
Yann Collet1efa31f2015-07-04 15:56:41 -08001542 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001543
1544 DStatePtr->state = DInfo.newState + lowBits;
1545 return symbol;
1546}
1547
1548/* FSE_endOfDStream
1549 Tells if bitD has reached end of bitStream or not */
1550
1551unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
1552{
Yann Collete8c6bb12015-07-26 00:23:57 +01001553 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
Yann Collet4856a002015-01-24 01:58:16 +01001554}
1555
Yann Collet1efa31f2015-07-04 15:56:41 -08001556unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
Yann Collet4856a002015-01-24 01:58:16 +01001557{
Yann Collet1efa31f2015-07-04 15:56:41 -08001558 return DStatePtr->state == 0;
Yann Collet4856a002015-01-24 01:58:16 +01001559}
1560
1561
1562FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1563 void* dst, size_t maxDstSize,
1564 const void* cSrc, size_t cSrcSize,
Yann Colleta7875502015-08-07 15:21:00 +01001565 const FSE_DTable* dt, const unsigned fast)
Yann Collet4856a002015-01-24 01:58:16 +01001566{
1567 BYTE* const ostart = (BYTE*) dst;
1568 BYTE* op = ostart;
1569 BYTE* const omax = op + maxDstSize;
1570 BYTE* const olimit = omax-3;
1571
1572 FSE_DStream_t bitD;
Yann Collet1efa31f2015-07-04 15:56:41 -08001573 FSE_DState_t state1;
1574 FSE_DState_t state2;
Yann Collet4856a002015-01-24 01:58:16 +01001575 size_t errorCode;
1576
1577 /* Init */
1578 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
1579 if (FSE_isError(errorCode)) return errorCode;
1580
Yann Collet1efa31f2015-07-04 15:56:41 -08001581 FSE_initDState(&state1, &bitD, dt);
1582 FSE_initDState(&state2, &bitD, dt);
Yann Collet4856a002015-01-24 01:58:16 +01001583
Yann Collet77c82b62015-08-02 01:19:09 +01001584#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
Yann Collet4856a002015-01-24 01:58:16 +01001585
Yann Collet77c82b62015-08-02 01:19:09 +01001586 /* 4 symbols per loop */
1587 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
Yann Collet4856a002015-01-24 01:58:16 +01001588 {
Yann Collet77c82b62015-08-02 01:19:09 +01001589 op[0] = FSE_GETSYMBOL(&state1);
Yann Collet4856a002015-01-24 01:58:16 +01001590
Yann Collet77c82b62015-08-02 01:19:09 +01001591 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
Yann Collet4856a002015-01-24 01:58:16 +01001592 FSE_reloadDStream(&bitD);
1593
Yann Collet77c82b62015-08-02 01:19:09 +01001594 op[1] = FSE_GETSYMBOL(&state2);
Yann Collet4856a002015-01-24 01:58:16 +01001595
Yann Collet77c82b62015-08-02 01:19:09 +01001596 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1597 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
1598
1599 op[2] = FSE_GETSYMBOL(&state1);
1600
1601 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1602 FSE_reloadDStream(&bitD);
1603
1604 op[3] = FSE_GETSYMBOL(&state2);
Yann Collet4856a002015-01-24 01:58:16 +01001605 }
1606
1607 /* tail */
Yann Collete8c6bb12015-07-26 00:23:57 +01001608 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
Yann Collet4856a002015-01-24 01:58:16 +01001609 while (1)
1610 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001611 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
Yann Collet4856a002015-01-24 01:58:16 +01001612 break;
1613
Yann Collet77c82b62015-08-02 01:19:09 +01001614 *op++ = FSE_GETSYMBOL(&state1);
Yann Collet4856a002015-01-24 01:58:16 +01001615
Yann Collete8c6bb12015-07-26 00:23:57 +01001616 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
Yann Collet4856a002015-01-24 01:58:16 +01001617 break;
1618
Yann Collet77c82b62015-08-02 01:19:09 +01001619 *op++ = FSE_GETSYMBOL(&state2);
Yann Collet4856a002015-01-24 01:58:16 +01001620 }
1621
1622 /* end ? */
Yann Collet77c82b62015-08-02 01:19:09 +01001623 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
Yann Collet4856a002015-01-24 01:58:16 +01001624 return op-ostart;
1625
1626 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1627
1628 return (size_t)-FSE_ERROR_corruptionDetected;
1629}
1630
1631
1632size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
1633 const void* cSrc, size_t cSrcSize,
Yann Colleta7875502015-08-07 15:21:00 +01001634 const FSE_DTable* dt)
Yann Collet4856a002015-01-24 01:58:16 +01001635{
Yann Colleta7875502015-08-07 15:21:00 +01001636 const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
1637 const U32 fastMode = DTableH->fastMode;
1638
Yann Collet4856a002015-01-24 01:58:16 +01001639 /* select fast mode (static) */
Yann Collet1efa31f2015-07-04 15:56:41 -08001640 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
1641 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
Yann Collet4856a002015-01-24 01:58:16 +01001642}
1643
1644
1645size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1646{
1647 const BYTE* const istart = (const BYTE*)cSrc;
1648 const BYTE* ip = istart;
1649 short counting[FSE_MAX_SYMBOL_VALUE+1];
Yann Collet1efa31f2015-07-04 15:56:41 -08001650 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
Yann Collet4856a002015-01-24 01:58:16 +01001651 unsigned tableLog;
Yann Collet1efa31f2015-07-04 15:56:41 -08001652 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
Yann Colleta7875502015-08-07 15:21:00 +01001653 size_t errorCode;
Yann Collet4856a002015-01-24 01:58:16 +01001654
1655 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1656
1657 /* normal FSE decoding mode */
Yann Collet1efa31f2015-07-04 15:56:41 -08001658 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
Yann Collet4856a002015-01-24 01:58:16 +01001659 if (FSE_isError(errorCode)) return errorCode;
1660 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1661 ip += errorCode;
1662 cSrcSize -= errorCode;
1663
Yann Colleta7875502015-08-07 15:21:00 +01001664 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
1665 if (FSE_isError(errorCode)) return errorCode;
Yann Collet4856a002015-01-24 01:58:16 +01001666
1667 /* always return, even if it is an error code */
Yann Colleta7875502015-08-07 15:21:00 +01001668 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
Yann Collet4856a002015-01-24 01:58:16 +01001669}
1670
1671
Yann Collet4856a002015-01-24 01:58:16 +01001672
Yann Collete8c6bb12015-07-26 00:23:57 +01001673/*********************************************************
1674* Huff0 : Huffman block compression
1675*********************************************************/
Yann Collete8c6bb12015-07-26 00:23:57 +01001676#define HUF_MAX_SYMBOL_VALUE 255
Yann Colletfb8296f2015-07-27 19:34:27 +01001677#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
1678#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
Yann Collet77c82b62015-08-02 01:19:09 +01001679#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
Yann Colletfb8296f2015-07-27 19:34:27 +01001680#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
1681# error "HUF_MAX_TABLELOG is too large !"
1682#endif
Yann Collet4856a002015-01-24 01:58:16 +01001683
Yann Collete8c6bb12015-07-26 00:23:57 +01001684typedef struct HUF_CElt_s {
1685 U16 val;
1686 BYTE nbBits;
1687} HUF_CElt ;
Yann Collet4856a002015-01-24 01:58:16 +01001688
Yann Collete8c6bb12015-07-26 00:23:57 +01001689typedef struct nodeElt_s {
1690 U32 count;
1691 U16 parent;
1692 BYTE byte;
1693 BYTE nbBits;
1694} nodeElt;
Yann Collet4856a002015-01-24 01:58:16 +01001695
Yann Colleta7875502015-08-07 15:21:00 +01001696/* HUF_writeCTable() :
1697 return : size of saved CTable */
Yann Collete8c6bb12015-07-26 00:23:57 +01001698size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* tree, U32 maxSymbolValue, U32 huffLog)
Yann Collet4856a002015-01-24 01:58:16 +01001699{
Yann Colleta7875502015-08-07 15:21:00 +01001700 BYTE bitsToWeight[HUF_ABSOLUTEMAX_TABLELOG + 1];
Yann Collete8c6bb12015-07-26 00:23:57 +01001701 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
1702 U32 n;
1703 BYTE* op = (BYTE*)dst;
1704 size_t size;
Yann Collet4856a002015-01-24 01:58:16 +01001705
Yann Colleta7875502015-08-07 15:21:00 +01001706 /* check conditions */
Yann Collete8c6bb12015-07-26 00:23:57 +01001707 if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
1708 return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +01001709
Yann Colleta7875502015-08-07 15:21:00 +01001710 /* convert to weight */
1711 bitsToWeight[0] = 0;
1712 for (n=1; n<=huffLog; n++)
1713 bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
Yann Collete8c6bb12015-07-26 00:23:57 +01001714 for (n=0; n<maxSymbolValue; n++)
Yann Colleta7875502015-08-07 15:21:00 +01001715 huffWeight[n] = bitsToWeight[tree[n].nbBits];
Yann Collet4856a002015-01-24 01:58:16 +01001716
Yann Colleta7875502015-08-07 15:21:00 +01001717 size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue); /* don't need last symbol stat : implied */
Yann Collete8c6bb12015-07-26 00:23:57 +01001718 if (FSE_isError(size)) return size;
Yann Colleta7875502015-08-07 15:21:00 +01001719 if (size >= 128) return (size_t)-FSE_ERROR_GENERIC; /* should never happen, since maxSymbolValue <= 255 */
Yann Collet77c82b62015-08-02 01:19:09 +01001720 if ((size <= 1) || (size >= maxSymbolValue/2))
1721 {
Yann Colleta7875502015-08-07 15:21:00 +01001722 if (size==1) /* RLE */
Yann Collet77c82b62015-08-02 01:19:09 +01001723 {
Yann Colleta7875502015-08-07 15:21:00 +01001724 /* only possible case : serie of 1 (because there are at least 2) */
1725 /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
1726 BYTE code;
1727 switch(maxSymbolValue)
1728 {
1729 case 1: code = 0; break;
1730 case 2: code = 1; break;
1731 case 3: code = 2; break;
1732 case 4: code = 3; break;
1733 case 7: code = 4; break;
1734 case 8: code = 5; break;
1735 case 15: code = 6; break;
1736 case 16: code = 7; break;
1737 case 31: code = 8; break;
1738 case 32: code = 9; break;
1739 case 63: code = 10; break;
1740 case 64: code = 11; break;
1741 case 127: code = 12; break;
1742 case 128: code = 13; break;
1743 default : return (size_t)-FSE_ERROR_corruptionDetected;
1744 }
1745 op[0] = (BYTE)(255-13 + code);
1746 return 1;
Yann Collet77c82b62015-08-02 01:19:09 +01001747 }
Yann Colleta7875502015-08-07 15:21:00 +01001748 /* Not compressible */
1749 if (maxSymbolValue > (241-128)) return (size_t)-FSE_ERROR_GENERIC; /* not implemented (not possible with current format) */
1750 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* not enough space within dst buffer */
Yann Collet77c82b62015-08-02 01:19:09 +01001751 op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
Yann Colleta7875502015-08-07 15:21:00 +01001752 huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */
Yann Collet77c82b62015-08-02 01:19:09 +01001753 for (n=0; n<maxSymbolValue; n+=2)
1754 op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
1755 return ((maxSymbolValue+1)/2) + 1;
1756 }
Yann Collet4856a002015-01-24 01:58:16 +01001757
Yann Colleta7875502015-08-07 15:21:00 +01001758 /* normal header case */
Yann Collete8c6bb12015-07-26 00:23:57 +01001759 op[0] = (BYTE)size;
1760 return size+1;
Yann Collet4856a002015-01-24 01:58:16 +01001761}
1762
1763
Yann Collete8c6bb12015-07-26 00:23:57 +01001764static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001765{
Yann Collete8c6bb12015-07-26 00:23:57 +01001766 int totalCost = 0;
1767 const U32 largestBits = huffNode[lastNonNull].nbBits;
Yann Collet4856a002015-01-24 01:58:16 +01001768
Yann Colleta7875502015-08-07 15:21:00 +01001769 /* early exit : all is fine */
Yann Collete8c6bb12015-07-26 00:23:57 +01001770 if (largestBits <= maxNbBits) return largestBits;
Yann Collet4856a002015-01-24 01:58:16 +01001771
Yann Collete8c6bb12015-07-26 00:23:57 +01001772 // now we have a few too large elements (at least >= 2)
Yann Collet4856a002015-01-24 01:58:16 +01001773 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001774 const U32 baseCost = 1 << (largestBits - maxNbBits);
1775 U32 n = lastNonNull;
1776
1777 while (huffNode[n].nbBits > maxNbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001778 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001779 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
1780 huffNode[n].nbBits = (BYTE)maxNbBits;
1781 n --;
Yann Collet4856a002015-01-24 01:58:16 +01001782 }
Yann Collet4856a002015-01-24 01:58:16 +01001783
Yann Collete8c6bb12015-07-26 00:23:57 +01001784 /* renorm totalCost */
1785 totalCost >>= (largestBits - maxNbBits); /* note : totalCost necessarily multiple of baseCost */
1786
1787 // repay cost
1788 while (huffNode[n].nbBits == maxNbBits) n--; // n at last of rank (maxNbBits-1)
1789
Yann Collet4856a002015-01-24 01:58:16 +01001790 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001791 const U32 noOne = 0xF0F0F0F0;
1792 // Get pos of last (smallest) symbol per rank
1793 U32 rankLast[HUF_MAX_TABLELOG];
1794 U32 currentNbBits = maxNbBits;
1795 int pos;
1796 memset(rankLast, 0xF0, sizeof(rankLast));
1797 for (pos=n ; pos >= 0; pos--)
Yann Collet4856a002015-01-24 01:58:16 +01001798 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001799 if (huffNode[pos].nbBits >= currentNbBits) continue;
1800 currentNbBits = huffNode[pos].nbBits;
1801 rankLast[maxNbBits-currentNbBits] = pos;
1802 }
1803
1804 while (totalCost > 0)
1805 {
1806 U32 nBitsToDecrease = FSE_highbit32(totalCost) + 1;
1807 for ( ; nBitsToDecrease > 1; nBitsToDecrease--)
1808 {
1809 U32 highPos = rankLast[nBitsToDecrease];
1810 U32 lowPos = rankLast[nBitsToDecrease-1];
1811 if (highPos == noOne) continue;
1812 if (lowPos == noOne) break;
1813 {
1814 U32 highTotal = huffNode[highPos].count;
1815 U32 lowTotal = 2 * huffNode[lowPos].count;
1816 if (highTotal <= lowTotal) break;
1817 }
1818 }
1819 while (rankLast[nBitsToDecrease] == noOne)
1820 nBitsToDecrease ++; // In some rare cases, no more rank 1 left => overshoot to closest
1821 totalCost -= 1 << (nBitsToDecrease-1);
1822 if (rankLast[nBitsToDecrease-1] == noOne)
1823 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; // now there is one elt
1824 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
1825 rankLast[nBitsToDecrease]--;
1826 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
1827 rankLast[nBitsToDecrease] = noOne; // rank list emptied
1828 }
1829 while (totalCost < 0) // Sometimes, cost correction overshoot
1830 {
1831 if (rankLast[1] == noOne) /* special case, no weight 1, let's find it back at n */
1832 {
1833 while (huffNode[n].nbBits == maxNbBits) n--;
1834 huffNode[n+1].nbBits--;
1835 rankLast[1] = n+1;
1836 totalCost++;
1837 continue;
1838 }
1839 huffNode[ rankLast[1] + 1 ].nbBits--;
1840 rankLast[1]++;
1841 totalCost ++;
1842 }
1843 }
1844 }
1845
1846 return maxNbBits;
1847}
1848
1849
1850typedef struct {
1851 U32 base;
1852 U32 current;
1853} rankPos;
1854
1855static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
1856{
1857 rankPos rank[32];
1858 U32 n;
1859
1860 memset(rank, 0, sizeof(rank));
1861 for (n=0; n<=maxSymbolValue; n++)
1862 {
1863 U32 r = FSE_highbit32(count[n] + 1);
1864 rank[r].base ++;
1865 }
1866 for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
1867 for (n=0; n<32; n++) rank[n].current = rank[n].base;
1868 for (n=0; n<=maxSymbolValue; n++)
1869 {
1870 U32 c = count[n];
1871 U32 r = FSE_highbit32(c+1) + 1;
1872 U32 pos = rank[r].current++;
1873 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
1874 huffNode[pos].count = c;
1875 huffNode[pos].byte = (BYTE)n;
1876 }
1877}
1878
1879
1880#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
1881size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
1882{
1883 nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
1884 nodeElt* huffNode = huffNode0 + 1;
1885 U32 n, nonNullRank;
1886 int lowS, lowN;
1887 U16 nodeNb = STARTNODE;
1888 U32 nodeRoot;
1889
1890 // check
Yann Collet77c82b62015-08-02 01:19:09 +01001891 if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG;
1892 if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC;
Yann Collete8c6bb12015-07-26 00:23:57 +01001893 memset(huffNode0, 0, sizeof(huffNode0));
1894
1895 // sort, decreasing order
1896 HUF_sort(huffNode, count, maxSymbolValue);
1897
1898 // init for parents
1899 nonNullRank = maxSymbolValue;
1900 while(huffNode[nonNullRank].count == 0) nonNullRank--;
1901 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
1902 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
1903 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
1904 nodeNb++; lowS-=2;
1905 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
1906 huffNode0[0].count = (U32)(1U<<31);
1907
1908 // create parents
1909 while (nodeNb <= nodeRoot)
1910 {
1911 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
1912 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
1913 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
1914 huffNode[n1].parent = huffNode[n2].parent = nodeNb;
1915 nodeNb++;
1916 }
1917
1918 // distribute weights (unlimited tree height)
1919 huffNode[nodeRoot].nbBits = 0;
1920 for (n=nodeRoot-1; n>=STARTNODE; n--)
1921 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1922 for (n=0; n<=nonNullRank; n++)
1923 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1924
1925 // enforce maxTableLog
1926 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
1927
1928 // fill result into tree (val, nbBits)
1929 {
1930 U16 nbPerRank[HUF_ABSOLUTEMAX_TABLELOG+1] = {0};
1931 U16 valPerRank[HUF_ABSOLUTEMAX_TABLELOG+1];
1932 if (maxNbBits > HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; // check
1933 for (n=0; n<=nonNullRank; n++)
1934 nbPerRank[huffNode[n].nbBits]++;
1935 {
1936 // determine stating value per rank
1937 U16 min = 0;
1938 for (n=maxNbBits; n>0; n--)
1939 {
1940 valPerRank[n] = min; // get starting value within each rank
1941 min += nbPerRank[n];
1942 min >>= 1;
Yann Collet4856a002015-01-24 01:58:16 +01001943 }
1944 }
Yann Collete8c6bb12015-07-26 00:23:57 +01001945 for (n=0; n<=maxSymbolValue; n++)
1946 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; // push nbBits per symbol, symbol order
1947 for (n=0; n<=maxSymbolValue; n++)
1948 tree[n].val = valPerRank[tree[n].nbBits]++; // assign value within rank, symbol order
Yann Collet4856a002015-01-24 01:58:16 +01001949 }
1950
Yann Collete8c6bb12015-07-26 00:23:57 +01001951 return maxNbBits;
Yann Collet4856a002015-01-24 01:58:16 +01001952}
1953
Yann Collet77c82b62015-08-02 01:19:09 +01001954static void HUF_encodeSymbol(FSE_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
1955{
1956 FSE_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
1957}
1958
1959#define FSE_FLUSHBITS_1(stream) \
Yann Colleta7875502015-08-07 15:21:00 +01001960 if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) FSE_FLUSHBITS(stream)
Yann Collet77c82b62015-08-02 01:19:09 +01001961
1962#define FSE_FLUSHBITS_2(stream) \
Yann Colleta7875502015-08-07 15:21:00 +01001963 if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) FSE_FLUSHBITS(stream)
Yann Collet4856a002015-01-24 01:58:16 +01001964
Yann Colleta7875502015-08-07 15:21:00 +01001965size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, HUF_CElt* CTable)
Yann Collet4856a002015-01-24 01:58:16 +01001966{
Yann Collete8c6bb12015-07-26 00:23:57 +01001967 const BYTE* ip = (const BYTE*) src;
1968 BYTE* const ostart = (BYTE*)dst;
1969 BYTE* op = (BYTE*) ostart;
Yann Colleta7875502015-08-07 15:21:00 +01001970 BYTE* const oend = ostart + dstSize;
Yann Collete8c6bb12015-07-26 00:23:57 +01001971 U16* jumpTable = (U16*) dst;
1972 size_t n, streamSize;
Yann Colleta7875502015-08-07 15:21:00 +01001973 const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
1974 size_t errorCode;
Yann Collete8c6bb12015-07-26 00:23:57 +01001975 FSE_CStream_t bitC;
Yann Collet4856a002015-01-24 01:58:16 +01001976
Yann Collete8c6bb12015-07-26 00:23:57 +01001977 /* init */
Yann Collete8c6bb12015-07-26 00:23:57 +01001978 op += 6; /* jump Table -- could be optimized by delta / deviation */
Yann Colleta7875502015-08-07 15:21:00 +01001979 errorCode = FSE_initCStream(&bitC, op, dstSize);
1980 if (FSE_isError(errorCode)) return 0;
Yann Collet4856a002015-01-24 01:58:16 +01001981
Yann Collete8c6bb12015-07-26 00:23:57 +01001982 n = srcSize & ~15; // mod 16
1983 switch (srcSize & 15)
Yann Collet4856a002015-01-24 01:58:16 +01001984 {
Yann Collet77c82b62015-08-02 01:19:09 +01001985 case 15: HUF_encodeSymbol(&bitC, ip[n+14], CTable);
1986 FSE_FLUSHBITS_1(&bitC);
1987 case 14: HUF_encodeSymbol(&bitC, ip[n+13], CTable);
1988 FSE_FLUSHBITS_2(&bitC);
1989 case 13: HUF_encodeSymbol(&bitC, ip[n+12], CTable);
1990 FSE_FLUSHBITS_1(&bitC);
1991 case 12: HUF_encodeSymbol(&bitC, ip[n+11], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01001992 FSE_FLUSHBITS(&bitC);
Yann Collet77c82b62015-08-02 01:19:09 +01001993 case 11: HUF_encodeSymbol(&bitC, ip[n+10], CTable);
1994 FSE_FLUSHBITS_1(&bitC);
1995 case 10: HUF_encodeSymbol(&bitC, ip[n+ 9], CTable);
1996 FSE_FLUSHBITS_2(&bitC);
1997 case 9 : HUF_encodeSymbol(&bitC, ip[n+ 8], CTable);
1998 FSE_FLUSHBITS_1(&bitC);
1999 case 8 : HUF_encodeSymbol(&bitC, ip[n+ 7], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01002000 FSE_FLUSHBITS(&bitC);
Yann Collet77c82b62015-08-02 01:19:09 +01002001 case 7 : HUF_encodeSymbol(&bitC, ip[n+ 6], CTable);
2002 FSE_FLUSHBITS_1(&bitC);
2003 case 6 : HUF_encodeSymbol(&bitC, ip[n+ 5], CTable);
2004 FSE_FLUSHBITS_2(&bitC);
2005 case 5 : HUF_encodeSymbol(&bitC, ip[n+ 4], CTable);
2006 FSE_FLUSHBITS_1(&bitC);
2007 case 4 : HUF_encodeSymbol(&bitC, ip[n+ 3], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01002008 FSE_FLUSHBITS(&bitC);
Yann Collet77c82b62015-08-02 01:19:09 +01002009 case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
2010 FSE_FLUSHBITS_2(&bitC);
2011 case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
2012 FSE_FLUSHBITS_1(&bitC);
2013 case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01002014 FSE_FLUSHBITS(&bitC);
Yann Collete8c6bb12015-07-26 00:23:57 +01002015 case 0 :
2016 default: ;
Yann Collet4856a002015-01-24 01:58:16 +01002017 }
2018
Yann Collete8c6bb12015-07-26 00:23:57 +01002019 for (; n>0; n-=16)
Yann Collet4856a002015-01-24 01:58:16 +01002020 {
Yann Collet77c82b62015-08-02 01:19:09 +01002021 HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
2022 FSE_FLUSHBITS_1(&bitC);
2023 HUF_encodeSymbol(&bitC, ip[n- 8], CTable);
2024 FSE_FLUSHBITS_2(&bitC);
2025 HUF_encodeSymbol(&bitC, ip[n-12], CTable);
2026 FSE_FLUSHBITS_1(&bitC);
2027 HUF_encodeSymbol(&bitC, ip[n-16], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01002028 FSE_FLUSHBITS(&bitC);
Yann Collete8c6bb12015-07-26 00:23:57 +01002029 }
2030 streamSize = FSE_closeCStream(&bitC);
Yann Colleta7875502015-08-07 15:21:00 +01002031 if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
Yann Collet138db212015-07-27 20:19:21 +01002032 FSE_writeLE16(jumpTable, (U16)streamSize);
Yann Collete8c6bb12015-07-26 00:23:57 +01002033 op += streamSize;
2034
Yann Colleta7875502015-08-07 15:21:00 +01002035 errorCode = FSE_initCStream(&bitC, op, oend-op);
2036 if (FSE_isError(errorCode)) return 0;
Yann Collete8c6bb12015-07-26 00:23:57 +01002037 n = srcSize & ~15; // mod 16
2038 for (; n>0; n-=16)
2039 {
Yann Collet77c82b62015-08-02 01:19:09 +01002040 HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
2041 FSE_FLUSHBITS_1(&bitC);
2042 HUF_encodeSymbol(&bitC, ip[n- 7], CTable);
2043 FSE_FLUSHBITS_2(&bitC);
2044 HUF_encodeSymbol(&bitC, ip[n-11], CTable);
2045 FSE_FLUSHBITS_1(&bitC);
2046 HUF_encodeSymbol(&bitC, ip[n-15], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01002047 FSE_FLUSHBITS(&bitC);
Yann Collete8c6bb12015-07-26 00:23:57 +01002048 }
2049 streamSize = FSE_closeCStream(&bitC);
Yann Colleta7875502015-08-07 15:21:00 +01002050 if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
Yann Collet138db212015-07-27 20:19:21 +01002051 FSE_writeLE16(jumpTable+1, (U16)streamSize);
Yann Collete8c6bb12015-07-26 00:23:57 +01002052 op += streamSize;
2053
Yann Colleta7875502015-08-07 15:21:00 +01002054 errorCode = FSE_initCStream(&bitC, op, oend-op);
2055 if (FSE_isError(errorCode)) return 0;
Yann Collete8c6bb12015-07-26 00:23:57 +01002056 n = srcSize & ~15; // mod 16
2057 for (; n>0; n-=16)
2058 {
Yann Collet77c82b62015-08-02 01:19:09 +01002059 HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
2060 FSE_FLUSHBITS_1(&bitC);
2061 HUF_encodeSymbol(&bitC, ip[n- 6], CTable);
2062 FSE_FLUSHBITS_2(&bitC);
2063 HUF_encodeSymbol(&bitC, ip[n-10], CTable);
2064 FSE_FLUSHBITS_1(&bitC);
2065 HUF_encodeSymbol(&bitC, ip[n-14], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01002066 FSE_FLUSHBITS(&bitC);
Yann Collete8c6bb12015-07-26 00:23:57 +01002067 }
2068 streamSize = FSE_closeCStream(&bitC);
Yann Colleta7875502015-08-07 15:21:00 +01002069 if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
Yann Collet138db212015-07-27 20:19:21 +01002070 FSE_writeLE16(jumpTable+2, (U16)streamSize);
Yann Collete8c6bb12015-07-26 00:23:57 +01002071 op += streamSize;
2072
Yann Colleta7875502015-08-07 15:21:00 +01002073 errorCode = FSE_initCStream(&bitC, op, oend-op);
2074 if (FSE_isError(errorCode)) return 0;
Yann Collete8c6bb12015-07-26 00:23:57 +01002075 n = srcSize & ~15; // mod 16
2076 for (; n>0; n-=16)
2077 {
Yann Collet77c82b62015-08-02 01:19:09 +01002078 HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
2079 FSE_FLUSHBITS_1(&bitC);
2080 HUF_encodeSymbol(&bitC, ip[n- 5], CTable);
2081 FSE_FLUSHBITS_2(&bitC);
2082 HUF_encodeSymbol(&bitC, ip[n- 9], CTable);
2083 FSE_FLUSHBITS_1(&bitC);
2084 HUF_encodeSymbol(&bitC, ip[n-13], CTable);
Yann Colleta7875502015-08-07 15:21:00 +01002085 FSE_FLUSHBITS(&bitC);
Yann Collete8c6bb12015-07-26 00:23:57 +01002086 }
2087 streamSize = FSE_closeCStream(&bitC);
Yann Colleta7875502015-08-07 15:21:00 +01002088 if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
Yann Collete8c6bb12015-07-26 00:23:57 +01002089 op += streamSize;
2090
2091 return op-ostart;
2092}
2093
2094
2095size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog)
2096{
2097 BYTE* const ostart = (BYTE*)dst;
2098 BYTE* op = ostart;
2099 BYTE* const oend = ostart + dstSize;
2100
2101 U32 count[HUF_MAX_SYMBOL_VALUE+1];
2102 HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
2103 size_t errorCode;
2104
2105 /* early out */
Yann Collete8c6bb12015-07-26 00:23:57 +01002106 if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
2107 if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
2108 if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
2109
2110 /* Scan input and build symbol stats */
2111 errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
2112 if (FSE_isError(errorCode)) return errorCode;
2113 if (errorCode == srcSize) return 1;
2114 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
2115
2116 /* Build Huffman Tree */
2117 errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
2118 if (FSE_isError(errorCode)) return errorCode;
2119 huffLog = (U32)errorCode;
2120
2121 /* Write table description header */
2122 errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); /* don't write last symbol, implied */
2123 if (FSE_isError(errorCode)) return errorCode;
2124 op += errorCode;
2125
2126 /* Compress */
2127 op += HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable);
2128
2129 /* check compressibility */
2130 if ((size_t)(op-ostart) >= srcSize-1)
Yann Colleta7875502015-08-07 15:21:00 +01002131 return op-ostart;
Yann Collete8c6bb12015-07-26 00:23:57 +01002132
2133 return op-ostart;
2134}
2135
2136size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2137{
2138 return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
2139}
2140
2141
2142/*********************************************************
2143* Huff0 : Huffman block decompression
2144*********************************************************/
2145typedef struct {
2146 BYTE byte;
2147 BYTE nbBits;
2148} HUF_DElt;
2149
2150size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
2151{
2152 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
Yann Colleta7875502015-08-07 15:21:00 +01002153 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
2154 U32 weightTotal;
Yann Collete8c6bb12015-07-26 00:23:57 +01002155 U32 maxBits;
2156 const BYTE* ip = (const BYTE*) src;
2157 size_t iSize = ip[0];
2158 size_t oSize;
2159 U32 n;
2160 U32 nextRankStart;
2161 HUF_DElt* const dt = (HUF_DElt*)(DTable + 1);
2162
Yann Colleta7875502015-08-07 15:21:00 +01002163 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
2164 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
2165 if (iSize >= 128) /* special header */
Yann Collet77c82b62015-08-02 01:19:09 +01002166 {
Yann Colleta7875502015-08-07 15:21:00 +01002167 if (iSize >= (242)) /* RLE */
Yann Collet77c82b62015-08-02 01:19:09 +01002168 {
Yann Colleta7875502015-08-07 15:21:00 +01002169 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
2170 oSize = l[iSize-242];
2171 memset(huffWeight, 1, oSize);
2172 iSize = 0;
Yann Collet77c82b62015-08-02 01:19:09 +01002173 }
Yann Colleta7875502015-08-07 15:21:00 +01002174 else /* Incompressible */
Yann Collet77c82b62015-08-02 01:19:09 +01002175 {
Yann Colleta7875502015-08-07 15:21:00 +01002176 oSize = iSize - 127;
Yann Collet77c82b62015-08-02 01:19:09 +01002177 iSize = ((oSize+1)/2);
2178 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2179 ip += 1;
2180 for (n=0; n<oSize; n+=2)
2181 {
Yann Colleta7875502015-08-07 15:21:00 +01002182 huffWeight[n] = ip[n/2] >> 4;
2183 huffWeight[n+1] = ip[n/2] & 15;
Yann Collet77c82b62015-08-02 01:19:09 +01002184 }
2185 }
2186 }
Yann Colleta7875502015-08-07 15:21:00 +01002187 else /* header compressed with FSE (normal case) */
Yann Collet77c82b62015-08-02 01:19:09 +01002188 {
2189 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
Yann Colleta7875502015-08-07 15:21:00 +01002190 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
Yann Collet77c82b62015-08-02 01:19:09 +01002191 if (FSE_isError(oSize)) return oSize;
2192 }
Yann Collete8c6bb12015-07-26 00:23:57 +01002193
Yann Colleta7875502015-08-07 15:21:00 +01002194 /* collect weight stats */
2195 memset(rankVal, 0, sizeof(rankVal));
2196 weightTotal = 0;
Yann Collete8c6bb12015-07-26 00:23:57 +01002197 for (n=0; n<oSize; n++)
2198 {
Yann Colleta7875502015-08-07 15:21:00 +01002199 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
Yann Collete8c6bb12015-07-26 00:23:57 +01002200 rankVal[huffWeight[n]]++;
2201 weightTotal += (1 << huffWeight[n]) >> 1;
Yann Collet4856a002015-01-24 01:58:16 +01002202 }
2203
Yann Colleta7875502015-08-07 15:21:00 +01002204 /* get last non-null symbol weight (implied, total must be 2^n) */
Yann Collete8c6bb12015-07-26 00:23:57 +01002205 maxBits = FSE_highbit32(weightTotal) + 1;
Yann Colleta7875502015-08-07 15:21:00 +01002206 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
Yann Collete8c6bb12015-07-26 00:23:57 +01002207 DTable[0] = (U16)maxBits;
2208 {
2209 U32 total = 1 << maxBits;
2210 U32 rest = total - weightTotal;
2211 U32 verif = 1 << FSE_highbit32(rest);
Yann Colleta7875502015-08-07 15:21:00 +01002212 U32 lastWeight = FSE_highbit32(rest) + 1;
2213 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
2214 huffWeight[oSize] = (BYTE)lastWeight;
2215 rankVal[lastWeight]++;
Yann Collete8c6bb12015-07-26 00:23:57 +01002216 }
Yann Collet4856a002015-01-24 01:58:16 +01002217
Yann Colleta7875502015-08-07 15:21:00 +01002218 /* check tree construction validity */
2219 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
2220
2221 /* Prepare ranks */
Yann Collete8c6bb12015-07-26 00:23:57 +01002222 nextRankStart = 0;
2223 for (n=1; n<=maxBits; n++)
2224 {
2225 U32 current = nextRankStart;
2226 nextRankStart += (rankVal[n] << (n-1));
2227 rankVal[n] = current;
2228 }
2229
Yann Colleta7875502015-08-07 15:21:00 +01002230 /* fill DTable */
Yann Collete8c6bb12015-07-26 00:23:57 +01002231 for (n=0; n<=oSize; n++)
Yann Collet4856a002015-01-24 01:58:16 +01002232 {
Yann Collete8c6bb12015-07-26 00:23:57 +01002233 const U32 w = huffWeight[n];
2234 const U32 length = (1 << w) >> 1;
Yann Colleta7875502015-08-07 15:21:00 +01002235 U32 i;
Yann Collete8c6bb12015-07-26 00:23:57 +01002236 HUF_DElt D;
2237 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
2238 for (i = rankVal[w]; i < rankVal[w] + length; i++)
2239 dt[i] = D;
2240 rankVal[w] += length;
Yann Collet4856a002015-01-24 01:58:16 +01002241 }
2242
Yann Collete8c6bb12015-07-26 00:23:57 +01002243 return iSize+1;
Yann Collet4856a002015-01-24 01:58:16 +01002244}
Yann Collete8c6bb12015-07-26 00:23:57 +01002245
Yann Colleta7875502015-08-07 15:21:00 +01002246
2247static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
Yann Collete8c6bb12015-07-26 00:23:57 +01002248{
Yann Colleta7875502015-08-07 15:21:00 +01002249 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
2250 const BYTE c = dt[val].byte;
Yann Collete8c6bb12015-07-26 00:23:57 +01002251 FSE_skipBits(Dstream, dt[val].nbBits);
Yann Colleta7875502015-08-07 15:21:00 +01002252 return c;
Yann Collete8c6bb12015-07-26 00:23:57 +01002253}
2254
Yann Colleta7875502015-08-07 15:21:00 +01002255static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
Yann Collete8c6bb12015-07-26 00:23:57 +01002256 void* dst, size_t maxDstSize,
2257 const void* cSrc, size_t cSrcSize,
2258 const U16* DTable)
2259{
2260 BYTE* const ostart = (BYTE*) dst;
2261 BYTE* op = ostart;
2262 BYTE* const omax = op + maxDstSize;
2263 BYTE* const olimit = omax-15;
2264
2265 const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1);
2266 const U32 dtLog = DTable[0];
2267 size_t errorCode;
2268 U32 reloadStatus;
2269
2270 /* Init */
2271
2272 const U16* jumpTable = (const U16*)cSrc;
Yann Collet138db212015-07-27 20:19:21 +01002273 const size_t length1 = FSE_readLE16(jumpTable);
2274 const size_t length2 = FSE_readLE16(jumpTable+1);
2275 const size_t length3 = FSE_readLE16(jumpTable+2);
Yann Collete8c6bb12015-07-26 00:23:57 +01002276 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
2277 const char* const start1 = (const char*)(cSrc) + 6;
2278 const char* const start2 = start1 + length1;
2279 const char* const start3 = start2 + length2;
2280 const char* const start4 = start3 + length3;
2281 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
2282
Yann Colleta7875502015-08-07 15:21:00 +01002283 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2284
Yann Collete8c6bb12015-07-26 00:23:57 +01002285 errorCode = FSE_initDStream(&bitD1, start1, length1);
2286 if (FSE_isError(errorCode)) return errorCode;
2287 errorCode = FSE_initDStream(&bitD2, start2, length2);
2288 if (FSE_isError(errorCode)) return errorCode;
2289 errorCode = FSE_initDStream(&bitD3, start3, length3);
2290 if (FSE_isError(errorCode)) return errorCode;
2291 errorCode = FSE_initDStream(&bitD4, start4, length4);
2292 if (FSE_isError(errorCode)) return errorCode;
2293
2294 reloadStatus=FSE_reloadDStream(&bitD2);
2295
2296 /* 16 symbols per loop */
2297 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
2298 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
2299 {
Yann Colletfb8296f2015-07-27 19:34:27 +01002300#define HUF_DECODE_SYMBOL_0(n, Dstream) \
Yann Colleta7875502015-08-07 15:21:00 +01002301 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
Yann Colletfb8296f2015-07-27 19:34:27 +01002302
2303#define HUF_DECODE_SYMBOL_1(n, Dstream) \
Yann Colleta7875502015-08-07 15:21:00 +01002304 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
Yann Colletfb8296f2015-07-27 19:34:27 +01002305 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
2306
2307#define HUF_DECODE_SYMBOL_2(n, Dstream) \
Yann Colleta7875502015-08-07 15:21:00 +01002308 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
Yann Collete8c6bb12015-07-26 00:23:57 +01002309 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
2310
Yann Colletfb8296f2015-07-27 19:34:27 +01002311 HUF_DECODE_SYMBOL_1( 0, bitD1);
2312 HUF_DECODE_SYMBOL_1( 1, bitD2);
2313 HUF_DECODE_SYMBOL_1( 2, bitD3);
2314 HUF_DECODE_SYMBOL_1( 3, bitD4);
2315 HUF_DECODE_SYMBOL_2( 4, bitD1);
2316 HUF_DECODE_SYMBOL_2( 5, bitD2);
2317 HUF_DECODE_SYMBOL_2( 6, bitD3);
2318 HUF_DECODE_SYMBOL_2( 7, bitD4);
2319 HUF_DECODE_SYMBOL_1( 8, bitD1);
2320 HUF_DECODE_SYMBOL_1( 9, bitD2);
2321 HUF_DECODE_SYMBOL_1(10, bitD3);
2322 HUF_DECODE_SYMBOL_1(11, bitD4);
2323 HUF_DECODE_SYMBOL_0(12, bitD1);
2324 HUF_DECODE_SYMBOL_0(13, bitD2);
2325 HUF_DECODE_SYMBOL_0(14, bitD3);
2326 HUF_DECODE_SYMBOL_0(15, bitD4);
Yann Collete8c6bb12015-07-26 00:23:57 +01002327 }
2328
Yann Colleta7875502015-08-07 15:21:00 +01002329 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
Yann Collete8c6bb12015-07-26 00:23:57 +01002330 return (size_t)-FSE_ERROR_corruptionDetected;
2331
2332 /* tail */
2333 {
2334 // bitTail = bitD1; // *much* slower : -20% !??!
2335 FSE_DStream_t bitTail;
2336 bitTail.ptr = bitD1.ptr;
2337 bitTail.bitsConsumed = bitD1.bitsConsumed;
Yann Colleta7875502015-08-07 15:21:00 +01002338 bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
Yann Collete8c6bb12015-07-26 00:23:57 +01002339 bitTail.start = start1;
2340 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
2341 {
Yann Colletfb8296f2015-07-27 19:34:27 +01002342 HUF_DECODE_SYMBOL_0(0, bitTail);
Yann Collete8c6bb12015-07-26 00:23:57 +01002343 }
2344
2345 if (FSE_endOfDStream(&bitTail))
2346 return op-ostart;
2347 }
2348
2349 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
2350
2351 return (size_t)-FSE_ERROR_corruptionDetected;
2352}
2353
2354
2355size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
2356{
2357 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
2358 const BYTE* ip = (const BYTE*) cSrc;
2359 size_t errorCode;
2360
2361 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
2362 if (FSE_isError(errorCode)) return errorCode;
2363 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2364 ip += errorCode;
2365 cSrcSize -= errorCode;
2366
2367 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
2368}
2369
2370
2371#endif /* FSE_COMMONDEFS_ONLY */