blob: 8088711ba3f8ffb596d75a0b331a106c6ddfcac4 [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/* ******************************************************************
2 FSE : Finite State Entropy coder
3 Copyright (C) 2013-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33****************************************************************** */
34
35#ifndef FSE_COMMONDEFS_ONLY
36
37/****************************************************************
38* Tuning parameters
39****************************************************************/
40/* MEMORY_USAGE :
41* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
42* Increasing memory usage improves compression ratio
43* Reduced memory usage can improve speed, due to cache effect
44* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
45#define FSE_MAX_MEMORY_USAGE 14
46#define FSE_DEFAULT_MEMORY_USAGE 13
47
48/* FSE_MAX_SYMBOL_VALUE :
49* Maximum symbol value authorized.
50* Required for proper stack allocation */
51#define FSE_MAX_SYMBOL_VALUE 255
52
53
54/****************************************************************
Yann Collete8c6bb12015-07-26 00:23:57 +010055* template functions type & suffix
Yann Collet4856a002015-01-24 01:58:16 +010056****************************************************************/
57#define FSE_FUNCTION_TYPE BYTE
58#define FSE_FUNCTION_EXTENSION
59
Yann Collete8c6bb12015-07-26 00:23:57 +010060
61/****************************************************************
62* Byte symbol type
63****************************************************************/
64typedef struct
65{
66 unsigned short newState;
67 unsigned char symbol;
68 unsigned char nbBits;
69} FSE_decode_t; /* size == U32 */
70
Yann Collet4856a002015-01-24 01:58:16 +010071#endif /* !FSE_COMMONDEFS_ONLY */
72
73
74/****************************************************************
75* Compiler specifics
76****************************************************************/
77#ifdef _MSC_VER /* Visual Studio */
78# define FORCE_INLINE static __forceinline
79# include <intrin.h> /* For Visual 2005 */
80# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
81# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
82#else
83# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
84# ifdef __GNUC__
85# define FORCE_INLINE static inline __attribute__((always_inline))
86# else
87# define FORCE_INLINE static inline
88# endif
89#endif
90
91
92/****************************************************************
93* Includes
94****************************************************************/
95#include <stdlib.h> /* malloc, free, qsort */
96#include <string.h> /* memcpy, memset */
97#include <stdio.h> /* printf (debug) */
98#include "fse_static.h"
99
100
Yann Colletad68a0e2015-02-26 00:29:15 +0100101#ifndef MEM_ACCESS_MODULE
102#define MEM_ACCESS_MODULE
Yann Collet4856a002015-01-24 01:58:16 +0100103/****************************************************************
104* Basic Types
105*****************************************************************/
106#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
107# include <stdint.h>
108typedef uint8_t BYTE;
109typedef uint16_t U16;
110typedef int16_t S16;
111typedef uint32_t U32;
112typedef int32_t S32;
113typedef uint64_t U64;
114typedef int64_t S64;
115#else
116typedef unsigned char BYTE;
117typedef unsigned short U16;
118typedef signed short S16;
119typedef unsigned int U32;
120typedef signed int S32;
121typedef unsigned long long U64;
122typedef signed long long S64;
123#endif
124
Yann Colletad68a0e2015-02-26 00:29:15 +0100125#endif /* MEM_ACCESS_MODULE */
Yann Collet4856a002015-01-24 01:58:16 +0100126
127/****************************************************************
128* Memory I/O
129*****************************************************************/
Yann Collete8c6bb12015-07-26 00:23:57 +0100130static unsigned FSE_32bits(void)
131{
132 return sizeof(void*)==4;
133}
134
Yann Collet4856a002015-01-24 01:58:16 +0100135static unsigned FSE_isLittleEndian(void)
136{
137 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
138 return one.c[0];
139}
140
Yann Collet138db212015-07-27 20:19:21 +0100141static U16 FSE_read16(const void* memPtr)
142{
143 U16 val;
144 memcpy(&val, memPtr, sizeof(val));
145 return val;
146}
147
148static U16 FSE_readLE16(const void* memPtr)
149{
150 if (FSE_isLittleEndian())
151 return FSE_read16(memPtr);
152 else
153 {
154 const BYTE* p = (const BYTE*)memPtr;
155 return (U16)(p[0] + (p[1]<<8));
156 }
157}
158
159static void FSE_writeLE16(void* memPtr, U16 val)
160{
161 if (FSE_isLittleEndian())
162 {
163 memcpy(memPtr, &val, sizeof(val));
164 }
165 else
166 {
167 BYTE* p = (BYTE*)memPtr;
168 p[0] = (BYTE)val;
169 p[1] = (BYTE)(val>>8);
170 }
171}
172
Yann Collet4856a002015-01-24 01:58:16 +0100173static U32 FSE_read32(const void* memPtr)
174{
175 U32 val32;
176 memcpy(&val32, memPtr, 4);
177 return val32;
178}
179
180static U32 FSE_readLE32(const void* memPtr)
181{
182 if (FSE_isLittleEndian())
183 return FSE_read32(memPtr);
184 else
185 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100186 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100187 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
188 }
189}
190
191static void FSE_writeLE32(void* memPtr, U32 val32)
192{
193 if (FSE_isLittleEndian())
194 {
195 memcpy(memPtr, &val32, 4);
196 }
197 else
198 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100199 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100200 p[0] = (BYTE)val32;
201 p[1] = (BYTE)(val32>>8);
202 p[2] = (BYTE)(val32>>16);
203 p[3] = (BYTE)(val32>>24);
204 }
205}
206
207static U64 FSE_read64(const void* memPtr)
208{
209 U64 val64;
210 memcpy(&val64, memPtr, 8);
211 return val64;
212}
213
214static U64 FSE_readLE64(const void* memPtr)
215{
216 if (FSE_isLittleEndian())
217 return FSE_read64(memPtr);
218 else
219 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100220 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100221 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
222 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
223 }
224}
225
226static void FSE_writeLE64(void* memPtr, U64 val64)
227{
228 if (FSE_isLittleEndian())
229 {
230 memcpy(memPtr, &val64, 8);
231 }
232 else
233 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100234 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100235 p[0] = (BYTE)val64;
236 p[1] = (BYTE)(val64>>8);
237 p[2] = (BYTE)(val64>>16);
238 p[3] = (BYTE)(val64>>24);
239 p[4] = (BYTE)(val64>>32);
240 p[5] = (BYTE)(val64>>40);
241 p[6] = (BYTE)(val64>>48);
242 p[7] = (BYTE)(val64>>56);
243 }
244}
245
246static size_t FSE_readLEST(const void* memPtr)
247{
Yann Collete8c6bb12015-07-26 00:23:57 +0100248 if (FSE_32bits())
Yann Colletfb814172015-01-25 13:19:12 +0100249 return (size_t)FSE_readLE32(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100250 else
Yann Colletfb814172015-01-25 13:19:12 +0100251 return (size_t)FSE_readLE64(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100252}
253
254static void FSE_writeLEST(void* memPtr, size_t val)
255{
Yann Collete8c6bb12015-07-26 00:23:57 +0100256 if (FSE_32bits())
Yann Collet4856a002015-01-24 01:58:16 +0100257 FSE_writeLE32(memPtr, (U32)val);
258 else
259 FSE_writeLE64(memPtr, (U64)val);
260}
261
262
263/****************************************************************
264* Constants
265*****************************************************************/
266#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
267#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
268#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
269#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
270#define FSE_MIN_TABLELOG 5
271
272#define FSE_TABLELOG_ABSOLUTE_MAX 15
273#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
274#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
275#endif
276
277
278/****************************************************************
279* Error Management
280****************************************************************/
281#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
282
283
284/****************************************************************
285* Complex types
286****************************************************************/
287typedef struct
288{
289 int deltaFindState;
290 U16 maxState;
291 BYTE minBitsOut;
Yann Collet1efa31f2015-07-04 15:56:41 -0800292 /* one byte padding ; total 8 bytes */
Yann Collet4856a002015-01-24 01:58:16 +0100293} FSE_symbolCompressionTransform;
294
Yann Collet1efa31f2015-07-04 15:56:41 -0800295typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
296typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
Yann Collet4856a002015-01-24 01:58:16 +0100297
298/****************************************************************
299* Internal functions
300****************************************************************/
301FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
302{
303# if defined(_MSC_VER) /* Visual */
304 unsigned long r;
305 _BitScanReverse ( &r, val );
306 return (unsigned) r;
307# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
308 return 31 - __builtin_clz (val);
309# else /* Software version */
310 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
311 U32 v = val;
312 unsigned r;
313 v |= v >> 1;
314 v |= v >> 2;
315 v |= v >> 4;
316 v |= v >> 8;
317 v |= v >> 16;
318 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
319 return r;
320# endif
321}
322
323
Yann Collete8c6bb12015-07-26 00:23:57 +0100324/****************************************************************
325* Templates
326****************************************************************/
327/*
328 designed to be included
329 for type-specific functions (template emulation in C)
330 Objective is to write these functions only once, for improved maintenance
331*/
332
333/* safety checks */
334#ifndef FSE_FUNCTION_EXTENSION
335# error "FSE_FUNCTION_EXTENSION must be defined"
336#endif
337#ifndef FSE_FUNCTION_TYPE
338# error "FSE_FUNCTION_TYPE must be defined"
339#endif
340
341/* Function names */
342#define FSE_CAT(X,Y) X##Y
343#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
344#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
345
346
347/* Function templates */
348size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
349(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
350{
351 const FSE_FUNCTION_TYPE* ip = source;
352 const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
353 unsigned maxSymbolValue = *maxSymbolValuePtr;
354 unsigned max=0;
355 int s;
356
357 U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
358 U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
359 U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
360 U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
361
362 /* safety checks */
363 if (!sourceSize)
364 {
365 memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
366 *maxSymbolValuePtr = 0;
367 return 0;
368 }
369 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
370 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
371
372 if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
373 {
374 /* check input values, to avoid count table overflow */
375 while (ip < iend-3)
376 {
377 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
378 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
379 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
380 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
381 }
382 }
383 else
384 {
385 U32 cached = FSE_read32(ip); ip += 4;
386 while (ip < iend-15)
387 {
388 U32 c = cached; cached = FSE_read32(ip); ip += 4;
389 Counting1[(BYTE) c ]++;
390 Counting2[(BYTE)(c>>8) ]++;
391 Counting3[(BYTE)(c>>16)]++;
392 Counting4[ c>>24 ]++;
393 c = cached; cached = FSE_read32(ip); ip += 4;
394 Counting1[(BYTE) c ]++;
395 Counting2[(BYTE)(c>>8) ]++;
396 Counting3[(BYTE)(c>>16)]++;
397 Counting4[ c>>24 ]++;
398 c = cached; cached = FSE_read32(ip); ip += 4;
399 Counting1[(BYTE) c ]++;
400 Counting2[(BYTE)(c>>8) ]++;
401 Counting3[(BYTE)(c>>16)]++;
402 Counting4[ c>>24 ]++;
403 c = cached; cached = FSE_read32(ip); ip += 4;
404 Counting1[(BYTE) c ]++;
405 Counting2[(BYTE)(c>>8) ]++;
406 Counting3[(BYTE)(c>>16)]++;
407 Counting4[ c>>24 ]++;
408 }
409 ip-=4;
410 }
411
412 /* finish last symbols */
413 while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
414
415 for (s=0; s<=(int)maxSymbolValue; s++)
416 {
417 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
418 if (count[s] > max) max = count[s];
419 }
420
421 while (!count[maxSymbolValue]) maxSymbolValue--;
422 *maxSymbolValuePtr = maxSymbolValue;
423 return (size_t)max;
424}
425
426/* hidden fast variant (unsafe) */
427size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION)
428(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
429{
430 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
431}
432
433size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
434(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
435{
436 if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
437 {
438 *maxSymbolValuePtr = 255;
439 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
440 }
441 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 1);
442}
443
444
445static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
446
447size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
448(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
449{
450 const unsigned tableSize = 1 << tableLog;
451 const unsigned tableMask = tableSize - 1;
452 U16* tableU16 = ( (U16*) ct) + 2;
453 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)ct) + 1 + (tableLog ? tableSize>>1 : 1) );
454 const unsigned step = FSE_tableStep(tableSize);
455 unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
456 U32 position = 0;
457 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init not necessary, but analyzer complain about it */
458 U32 highThreshold = tableSize-1;
459 unsigned symbol;
460 unsigned i;
461
462 /* header */
463 tableU16[-2] = (U16) tableLog;
464 tableU16[-1] = (U16) maxSymbolValue;
465
466 /* For explanations on how to distribute symbol values over the table :
467 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
468
469 /* symbol start positions */
470 cumul[0] = 0;
471 for (i=1; i<=maxSymbolValue+1; i++)
472 {
473 if (normalizedCounter[i-1]==-1) /* Low prob symbol */
474 {
475 cumul[i] = cumul[i-1] + 1;
476 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
477 }
478 else
479 cumul[i] = cumul[i-1] + normalizedCounter[i-1];
480 }
481 cumul[maxSymbolValue+1] = tableSize+1;
482
483 /* Spread symbols */
484 for (symbol=0; symbol<=maxSymbolValue; symbol++)
485 {
486 int nbOccurences;
487 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
488 {
489 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
490 position = (position + step) & tableMask;
491 while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
492 }
493 }
494
495 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
496
497 /* Build table */
498 for (i=0; i<tableSize; i++)
499 {
500 FSE_FUNCTION_TYPE s = tableSymbol[i]; /* static analyzer doesn't understand tableSymbol is properly initialized */
501 tableU16[cumul[s]++] = (U16) (tableSize+i); /* Table U16 : sorted by symbol order; gives next state value */
502 }
503
504 /* Build Symbol Transformation Table */
505 {
506 unsigned s;
507 unsigned total = 0;
508 for (s=0; s<=maxSymbolValue; s++)
509 {
510 switch (normalizedCounter[s])
511 {
512 case 0:
513 break;
514 case -1:
515 case 1:
516 symbolTT[s].minBitsOut = (BYTE)tableLog;
517 symbolTT[s].deltaFindState = total - 1;
518 total ++;
519 symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
520 break;
521 default :
522 symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
523 symbolTT[s].deltaFindState = total - normalizedCounter[s];
524 total += normalizedCounter[s];
525 symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
526 }
527 }
528 }
529
530 return 0;
531}
532
533
534#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
535
536FSE_DTable* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
537{
538 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
539 return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
540}
541
542void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (FSE_DTable* dt)
543{
544 free(dt);
545}
546
547
548size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
549(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
550{
551 U32* const base32 = (U32*)dt;
552 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
553 const U32 tableSize = 1 << tableLog;
554 const U32 tableMask = tableSize-1;
555 const U32 step = FSE_tableStep(tableSize);
556 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
557 U32 position = 0;
558 U32 highThreshold = tableSize-1;
559 const S16 largeLimit= (S16)(1 << (tableLog-1));
560 U32 noLarge = 1;
561 U32 s;
562
563 /* Sanity Checks */
564 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
565 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
566
567 /* Init, lay down lowprob symbols */
568 base32[0] = tableLog;
569 for (s=0; s<=maxSymbolValue; s++)
570 {
571 if (normalizedCounter[s]==-1)
572 {
573 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
574 symbolNext[s] = 1;
575 }
576 else
577 {
578 if (normalizedCounter[s] >= largeLimit) noLarge=0;
579 symbolNext[s] = normalizedCounter[s];
580 }
581 }
582
583 /* Spread symbols */
584 for (s=0; s<=maxSymbolValue; s++)
585 {
586 int i;
587 for (i=0; i<normalizedCounter[s]; i++)
588 {
589 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
590 position = (position + step) & tableMask;
591 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
592 }
593 }
594
595 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
596
597 /* Build Decoding table */
598 {
599 U32 i;
600 for (i=0; i<tableSize; i++)
601 {
602 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
603 U16 nextState = symbolNext[symbol]++;
604 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
605 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
606 }
607 }
608
609 return noLarge;
610}
611
612
613/******************************************
614* FSE byte symbol
615******************************************/
Yann Collet4856a002015-01-24 01:58:16 +0100616#ifndef FSE_COMMONDEFS_ONLY
617
618unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
619
620#define FSE_GENERATE_STRING(STRING) #STRING,
621static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
622
623const char* FSE_getErrorName(size_t code)
624{
625 static const char* codeError = "Unspecified error code";
626 if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
627 return codeError;
628}
629
630static short FSE_abs(short a)
631{
632 return a<0? -a : a;
633}
634
635
636/****************************************************************
637* Header bitstream management
638****************************************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -0800639size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100640{
641 size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
642 return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
643}
644
Yann Collet1efa31f2015-07-04 15:56:41 -0800645static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
Yann Collet4856a002015-01-24 01:58:16 +0100646 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
647 unsigned safeWrite)
648{
649 BYTE* const ostart = (BYTE*) header;
650 BYTE* out = ostart;
651 BYTE* const oend = ostart + headerBufferSize;
652 int nbBits;
653 const int tableSize = 1 << tableLog;
654 int remaining;
655 int threshold;
656 U32 bitStream;
657 int bitCount;
658 unsigned charnum = 0;
659 int previous0 = 0;
660
661 bitStream = 0;
662 bitCount = 0;
663 /* Table Size */
664 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
665 bitCount += 4;
666
667 /* Init */
668 remaining = tableSize+1; /* +1 for extra accuracy */
669 threshold = tableSize;
670 nbBits = tableLog+1;
671
672 while (remaining>1) /* stops at 1 */
673 {
674 if (previous0)
675 {
676 unsigned start = charnum;
677 while (!normalizedCounter[charnum]) charnum++;
678 while (charnum >= start+24)
679 {
680 start+=24;
Yann Collet213089c2015-06-18 07:43:16 -0800681 bitStream += 0xFFFFU << bitCount;
Yann Collet4856a002015-01-24 01:58:16 +0100682 if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
Yann Collet213089c2015-06-18 07:43:16 -0800683 out[0] = (BYTE) bitStream;
Yann Collet4856a002015-01-24 01:58:16 +0100684 out[1] = (BYTE)(bitStream>>8);
685 out+=2;
686 bitStream>>=16;
687 }
688 while (charnum >= start+3)
689 {
690 start+=3;
691 bitStream += 3 << bitCount;
692 bitCount += 2;
693 }
694 bitStream += (charnum-start) << bitCount;
695 bitCount += 2;
696 if (bitCount>16)
697 {
698 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
699 out[0] = (BYTE)bitStream;
700 out[1] = (BYTE)(bitStream>>8);
701 out += 2;
702 bitStream >>= 16;
703 bitCount -= 16;
704 }
705 }
706 {
707 short count = normalizedCounter[charnum++];
708 const short max = (short)((2*threshold-1)-remaining);
709 remaining -= FSE_abs(count);
Yann Collet213089c2015-06-18 07:43:16 -0800710 if (remaining<1) return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +0100711 count++; /* +1 for extra accuracy */
712 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
713 bitStream += count << bitCount;
714 bitCount += nbBits;
715 bitCount -= (count<max);
716 previous0 = (count==1);
717 while (remaining<threshold) nbBits--, threshold>>=1;
718 }
719 if (bitCount>16)
720 {
721 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
722 out[0] = (BYTE)bitStream;
723 out[1] = (BYTE)(bitStream>>8);
724 out += 2;
725 bitStream >>= 16;
726 bitCount -= 16;
727 }
728 }
729
730 /* flush remaining bitStream */
731 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
732 out[0] = (BYTE)bitStream;
733 out[1] = (BYTE)(bitStream>>8);
734 out+= (bitCount+7) /8;
735
Yann Collete8c6bb12015-07-26 00:23:57 +0100736 if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +0100737
738 return (out-ostart);
739}
740
741
Yann Collet1efa31f2015-07-04 15:56:41 -0800742size_t FSE_writeNCount (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100743{
744 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
745 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
746
Yann Collet1efa31f2015-07-04 15:56:41 -0800747 if (headerBufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
748 return FSE_writeNCount_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
Yann Collet4856a002015-01-24 01:58:16 +0100749
Yann Collet1efa31f2015-07-04 15:56:41 -0800750 return FSE_writeNCount_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
Yann Collet4856a002015-01-24 01:58:16 +0100751}
752
753
Yann Collet1efa31f2015-07-04 15:56:41 -0800754size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
Yann Collet4856a002015-01-24 01:58:16 +0100755 const void* headerBuffer, size_t hbSize)
756{
757 const BYTE* const istart = (const BYTE*) headerBuffer;
Yann Collet1efa31f2015-07-04 15:56:41 -0800758 const BYTE* const iend = istart + hbSize;
Yann Collet4856a002015-01-24 01:58:16 +0100759 const BYTE* ip = istart;
760 int nbBits;
761 int remaining;
762 int threshold;
763 U32 bitStream;
764 int bitCount;
765 unsigned charnum = 0;
766 int previous0 = 0;
767
Yann Collet1efa31f2015-07-04 15:56:41 -0800768 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
Yann Collet4856a002015-01-24 01:58:16 +0100769 bitStream = FSE_readLE32(ip);
770 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
771 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
772 bitStream >>= 4;
773 bitCount = 4;
774 *tableLogPtr = nbBits;
775 remaining = (1<<nbBits)+1;
776 threshold = 1<<nbBits;
777 nbBits++;
778
779 while ((remaining>1) && (charnum<=*maxSVPtr))
780 {
781 if (previous0)
782 {
783 unsigned n0 = charnum;
784 while ((bitStream & 0xFFFF) == 0xFFFF)
785 {
786 n0+=24;
787 ip+=2;
788 bitStream = FSE_readLE32(ip) >> bitCount;
789 }
790 while ((bitStream & 3) == 3)
791 {
792 n0+=3;
793 bitStream>>=2;
794 bitCount+=2;
795 }
796 n0 += bitStream & 3;
797 bitCount += 2;
Yann Collet1efa31f2015-07-04 15:56:41 -0800798 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
Yann Collet4856a002015-01-24 01:58:16 +0100799 while (charnum < n0) normalizedCounter[charnum++] = 0;
800 ip += bitCount>>3;
801 bitCount &= 7;
802 bitStream = FSE_readLE32(ip) >> bitCount;
803 }
804 {
805 const short max = (short)((2*threshold-1)-remaining);
806 short count;
807
808 if ((bitStream & (threshold-1)) < (U32)max)
809 {
810 count = (short)(bitStream & (threshold-1));
811 bitCount += nbBits-1;
812 }
813 else
814 {
815 count = (short)(bitStream & (2*threshold-1));
816 if (count >= threshold) count -= max;
817 bitCount += nbBits;
818 }
819
820 count--; /* extra accuracy */
821 remaining -= FSE_abs(count);
822 normalizedCounter[charnum++] = count;
823 previous0 = !count;
824 while (remaining < threshold)
825 {
826 nbBits--;
827 threshold >>= 1;
828 }
829
Yann Collet1efa31f2015-07-04 15:56:41 -0800830 {
831 const BYTE* itarget = ip + (bitCount>>3);
832 if (itarget > iend - 4)
833 {
834 ip = iend - 4;
835 bitCount -= (int)(8 * (iend - 4 - ip));
836 }
837 else
838 {
839 ip = itarget;
840 bitCount &= 7;
841 }
842 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
843 }
Yann Collet4856a002015-01-24 01:58:16 +0100844 }
845 }
846 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
847 *maxSVPtr = charnum-1;
848
Yann Collet1efa31f2015-07-04 15:56:41 -0800849 ip += (bitCount+7)>>3;
850 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
Yann Collet4856a002015-01-24 01:58:16 +0100851 return ip-istart;
852}
853
854
855/****************************************************************
856* FSE Compression Code
857****************************************************************/
858/*
Yann Collet1efa31f2015-07-04 15:56:41 -0800859FSE_CTable[0] is a variable size structure which contains :
Yann Collet4856a002015-01-24 01:58:16 +0100860 U16 tableLog;
861 U16 maxSymbolValue;
862 U16 nextStateNumber[1 << tableLog]; // This size is variable
863 FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1]; // This size is variable
864Allocation is manual, since C standard does not support variable-size structures.
865*/
866
867size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
868{
869 size_t size;
870 FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
871 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;
872 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
873 return size;
874}
875
Yann Collet1efa31f2015-07-04 15:56:41 -0800876FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100877{
878 size_t size;
879 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
880 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
Yann Collet1efa31f2015-07-04 15:56:41 -0800881 return (FSE_CTable*)malloc(size);
Yann Collet4856a002015-01-24 01:58:16 +0100882}
883
Yann Collet1efa31f2015-07-04 15:56:41 -0800884void FSE_freeCTable (FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +0100885{
Yann Collet1efa31f2015-07-04 15:56:41 -0800886 free(ct);
Yann Collet4856a002015-01-24 01:58:16 +0100887}
888
Yann Collet4856a002015-01-24 01:58:16 +0100889
890unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
891{
892 U32 tableLog = maxTableLog;
893 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
894 if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
Yann Colletbbfa7d72015-03-23 23:15:22 +0100895 if ((FSE_highbit32(maxSymbolValue)+2) > tableLog) tableLog = FSE_highbit32(maxSymbolValue)+2; /* Need a minimum to safely represent all symbol values */
Yann Collet4856a002015-01-24 01:58:16 +0100896 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
897 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
898 return tableLog;
899}
900
901
902typedef struct
903{
904 U32 id;
905 U32 count;
906} rank_t;
907
908int FSE_compareRankT(const void* r1, const void* r2)
909{
Yann Collet1cc58de2015-01-29 07:13:54 +0100910 const rank_t* R1 = (const rank_t*)r1;
911 const rank_t* R2 = (const rank_t*)r2;
Yann Collet4856a002015-01-24 01:58:16 +0100912
913 return 2 * (R1->count < R2->count) - 1;
914}
915
Yann Colleta3c75ba2015-02-21 03:31:59 +0100916
917#if 0
Yann Collet565b81d2015-01-29 06:51:30 +0100918static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
919{
Yann Collet2ddf7e92015-02-08 20:26:47 +0100920 rank_t rank[FSE_MAX_SYMBOL_VALUE+2];
Yann Collet565b81d2015-01-29 06:51:30 +0100921 U32 s;
922
923 /* Init */
924 for (s=0; s<=maxSymbolValue; s++)
925 {
926 rank[s].id = s;
927 rank[s].count = count[s];
928 if (norm[s] <= 1) rank[s].count = 0;
929 }
Yann Collet2ddf7e92015-02-08 20:26:47 +0100930 rank[maxSymbolValue+1].id = 0;
931 rank[maxSymbolValue+1].count = 0; /* ensures comparison ends here in worst case */
Yann Collet565b81d2015-01-29 06:51:30 +0100932
933 /* Sort according to count */
934 qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
935
936 while(pointsToRemove)
937 {
938 int newRank = 1;
939 rank_t savedR;
940 if (norm[rank[0].id] == 1)
941 return (size_t)-FSE_ERROR_GENERIC;
942 norm[rank[0].id]--;
943 pointsToRemove--;
944 rank[0].count -= (rank[0].count + 6) >> 3;
945 if (norm[rank[0].id] == 1)
946 rank[0].count=0;
947 savedR = rank[0];
948 while (rank[newRank].count > savedR.count)
949 {
950 rank[newRank-1] = rank[newRank];
951 newRank++;
952 }
953 rank[newRank-1] = savedR;
954 }
955
956 return 0;
957}
Yann Collet565b81d2015-01-29 06:51:30 +0100958
Yann Colleta3c75ba2015-02-21 03:31:59 +0100959#else
960
Yann Collet26aa1ec2015-02-24 09:05:58 +0100961/* Secondary normalization method.
962 To be used when primary method fails. */
963
964static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
Yann Colleta3c75ba2015-02-21 03:31:59 +0100965{
966 U32 s;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100967 U32 distributed = 0;
968 U32 ToDistribute;
969
970 /* Init */
Yann Colleta3c75ba2015-02-21 03:31:59 +0100971 U32 lowThreshold = (U32)(total >> tableLog);
972 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
973
974 for (s=0; s<=maxSymbolValue; s++)
975 {
976 if (count[s] == 0)
977 {
978 norm[s]=0;
979 continue;
980 }
981 if (count[s] <= lowThreshold)
982 {
983 norm[s] = -1;
984 distributed++;
985 total -= count[s];
986 continue;
987 }
988 if (count[s] <= lowOne)
989 {
990 norm[s] = 1;
991 distributed++;
992 total -= count[s];
993 continue;
994 }
995 norm[s]=-2;
996 }
997 ToDistribute = (1 << tableLog) - distributed;
998
999 if ((total / ToDistribute) > lowOne)
1000 {
1001 /* risk of rounding to zero */
1002 lowOne = (U32)((total * 3) / (ToDistribute * 2));
1003 for (s=0; s<=maxSymbolValue; s++)
1004 {
1005 if ((norm[s] == -2) && (count[s] <= lowOne))
1006 {
1007 norm[s] = 1;
1008 distributed++;
1009 total -= count[s];
1010 continue;
1011 }
1012 }
1013 ToDistribute = (1 << tableLog) - distributed;
1014 }
1015
1016 if (distributed == maxSymbolValue+1)
1017 {
Yann Collet26aa1ec2015-02-24 09:05:58 +01001018 /* all values are pretty poor;
1019 probably incompressible data (should have already been detected);
1020 find max, then give all remaining points to max */
Yann Colleta3c75ba2015-02-21 03:31:59 +01001021 U32 maxV = 0, maxC =0;
1022 for (s=0; s<=maxSymbolValue; s++)
1023 if (count[s] > maxC) maxV=s, maxC=count[s];
Yann Collet1efa31f2015-07-04 15:56:41 -08001024 norm[maxV] += (short)ToDistribute;
Yann Colleta3c75ba2015-02-21 03:31:59 +01001025 return 0;
1026 }
1027
1028 {
1029 U64 const vStepLog = 62 - tableLog;
1030 U64 const mid = (1ULL << (vStepLog-1)) - 1;
1031 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
1032 U64 tmpTotal = mid;
1033 for (s=0; s<=maxSymbolValue; s++)
1034 {
1035 if (norm[s]==-2)
1036 {
1037 U64 end = tmpTotal + (count[s] * rStep);
1038 U32 sStart = (U32)(tmpTotal >> vStepLog);
1039 U32 sEnd = (U32)(end >> vStepLog);
1040 U32 weight = sEnd - sStart;
1041 if (weight < 1)
1042 return (size_t)-FSE_ERROR_GENERIC;
Yann Collet213089c2015-06-18 07:43:16 -08001043 norm[s] = (short)weight;
Yann Colleta3c75ba2015-02-21 03:31:59 +01001044 tmpTotal = end;
1045 }
1046 }
1047 }
1048
1049 return 0;
1050}
1051#endif
1052
Yann Collet4856a002015-01-24 01:58:16 +01001053
1054size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
1055 const unsigned* count, size_t total,
1056 unsigned maxSymbolValue)
1057{
1058 /* Sanity checks */
1059 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
1060 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
1061 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
1062 if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
1063
1064 {
1065 U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
1066 U64 const scale = 62 - tableLog;
1067 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
1068 U64 const vStep = 1ULL<<(scale-20);
1069 int stillToDistribute = 1<<tableLog;
1070 unsigned s;
1071 unsigned largest=0;
1072 short largestP=0;
1073 U32 lowThreshold = (U32)(total >> tableLog);
1074
1075 for (s=0; s<=maxSymbolValue; s++)
1076 {
1077 if (count[s] == total) return 0;
1078 if (count[s] == 0)
1079 {
1080 normalizedCounter[s]=0;
1081 continue;
1082 }
1083 if (count[s] <= lowThreshold)
1084 {
1085 normalizedCounter[s] = -1;
1086 stillToDistribute--;
1087 }
1088 else
1089 {
1090 short proba = (short)((count[s]*step) >> scale);
1091 if (proba<8)
1092 {
Yann Collet2ddf7e92015-02-08 20:26:47 +01001093 U64 restToBeat = vStep * rtbTable[proba];
Yann Collet4856a002015-01-24 01:58:16 +01001094 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
1095 }
1096 if (proba > largestP)
1097 {
1098 largestP=proba;
1099 largest=s;
1100 }
1101 normalizedCounter[s] = proba;
1102 stillToDistribute -= proba;
1103 }
1104 }
Yann Collet4856a002015-01-24 01:58:16 +01001105 if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
1106 {
Yann Collet26aa1ec2015-02-24 09:05:58 +01001107 /* corner case, need another normalization method */
1108 size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
Yann Collet565b81d2015-01-29 06:51:30 +01001109 if (FSE_isError(errorCode)) return errorCode;
Yann Collet4856a002015-01-24 01:58:16 +01001110 }
1111 else normalizedCounter[largest] += (short)stillToDistribute;
1112 }
1113
1114#if 0
1115 { /* Print Table (debug) */
Yann Collet565b81d2015-01-29 06:51:30 +01001116 U32 s;
1117 U32 nTotal = 0;
Yann Collet4856a002015-01-24 01:58:16 +01001118 for (s=0; s<=maxSymbolValue; s++)
1119 printf("%3i: %4i \n", s, normalizedCounter[s]);
Yann Collet565b81d2015-01-29 06:51:30 +01001120 for (s=0; s<=maxSymbolValue; s++)
1121 nTotal += abs(normalizedCounter[s]);
1122 if (nTotal != (1U<<tableLog))
1123 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001124 getchar();
1125 }
1126#endif
1127
1128 return tableLog;
1129}
1130
1131
Yann Collet1efa31f2015-07-04 15:56:41 -08001132/* fake FSE_CTable, for raw (uncompressed) input */
1133size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001134{
1135 const unsigned tableSize = 1 << nbBits;
1136 const unsigned tableMask = tableSize - 1;
1137 const unsigned maxSymbolValue = tableMask;
Yann Collet1efa31f2015-07-04 15:56:41 -08001138 U16* tableU16 = ( (U16*) ct) + 2;
1139 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)ct)+1) + (tableSize>>1));
Yann Collet4856a002015-01-24 01:58:16 +01001140 unsigned s;
1141
1142 /* Sanity checks */
1143 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
Yann Collet4856a002015-01-24 01:58:16 +01001144
1145 /* header */
1146 tableU16[-2] = (U16) nbBits;
1147 tableU16[-1] = (U16) maxSymbolValue;
1148
1149 /* Build table */
1150 for (s=0; s<tableSize; s++)
1151 tableU16[s] = (U16)(tableSize + s);
1152
1153 /* Build Symbol Transformation Table */
1154 for (s=0; s<=maxSymbolValue; s++)
1155 {
1156 symbolTT[s].minBitsOut = (BYTE)nbBits;
1157 symbolTT[s].deltaFindState = s-1;
1158 symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
1159 }
1160
1161 return 0;
1162}
1163
1164
Yann Collet1efa31f2015-07-04 15:56:41 -08001165/* fake FSE_CTable, for rle (100% always same symbol) input */
1166size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
Yann Collet4856a002015-01-24 01:58:16 +01001167{
1168 const unsigned tableSize = 1;
Yann Collet1efa31f2015-07-04 15:56:41 -08001169 U16* tableU16 = ( (U16*) ct) + 2;
1170 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)ct + 2);
Yann Collet4856a002015-01-24 01:58:16 +01001171
1172 /* header */
1173 tableU16[-2] = (U16) 0;
1174 tableU16[-1] = (U16) symbolValue;
1175
1176 /* Build table */
1177 tableU16[0] = 0;
1178 tableU16[1] = 0; /* just in case */
1179
1180 /* Build Symbol Transformation Table */
1181 {
1182 symbolTT[symbolValue].minBitsOut = 0;
1183 symbolTT[symbolValue].deltaFindState = 0;
1184 symbolTT[symbolValue].maxState = (U16)(2*tableSize-1); /* ensures state <= maxState */
1185 }
1186
1187 return 0;
1188}
1189
1190
1191void FSE_initCStream(FSE_CStream_t* bitC, void* start)
1192{
1193 bitC->bitContainer = 0;
1194 bitC->bitPos = 0; /* reserved for unusedBits */
1195 bitC->startPtr = (char*)start;
1196 bitC->ptr = bitC->startPtr;
1197}
1198
Yann Collet1efa31f2015-07-04 15:56:41 -08001199void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +01001200{
Yann Collet1efa31f2015-07-04 15:56:41 -08001201 const U32 tableLog = ( (const U16*) ct) [0];
Yann Collet4856a002015-01-24 01:58:16 +01001202 statePtr->value = (ptrdiff_t)1<<tableLog;
Yann Collet1efa31f2015-07-04 15:56:41 -08001203 statePtr->stateTable = ((const U16*) ct) + 2;
1204 statePtr->symbolTT = (const FSE_symbolCompressionTransform*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
Yann Collet4856a002015-01-24 01:58:16 +01001205 statePtr->stateLog = tableLog;
1206}
1207
Yann Collete8c6bb12015-07-26 00:23:57 +01001208void FSE_addBitsFast(FSE_CStream_t* bitC, size_t value, unsigned nbBits) /* only use if upper bits are clean 0 */
1209{
1210 bitC->bitContainer |= value << bitC->bitPos;
1211 bitC->bitPos += nbBits;
1212}
1213
Yann Collet4856a002015-01-24 01:58:16 +01001214void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
1215{
1216 static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */
1217 bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
1218 bitC->bitPos += nbBits;
1219}
1220
Yann Collet1efa31f2015-07-04 15:56:41 -08001221void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
Yann Collet4856a002015-01-24 01:58:16 +01001222{
Yann Collet1efa31f2015-07-04 15:56:41 -08001223 const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
1224 const U16* const stateTable = (const U16*)(statePtr->stateTable);
1225 int nbBitsOut = symbolTT.minBitsOut;
1226 nbBitsOut -= (int)((symbolTT.maxState - statePtr->value) >> 31);
Yann Collet4856a002015-01-24 01:58:16 +01001227 FSE_addBits(bitC, statePtr->value, nbBitsOut);
Yann Collet1efa31f2015-07-04 15:56:41 -08001228 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
Yann Collet4856a002015-01-24 01:58:16 +01001229}
1230
1231void FSE_flushBits(FSE_CStream_t* bitC)
1232{
1233 size_t nbBytes = bitC->bitPos >> 3;
1234 FSE_writeLEST(bitC->ptr, bitC->bitContainer);
1235 bitC->bitPos &= 7;
1236 bitC->ptr += nbBytes;
1237 bitC->bitContainer >>= nbBytes*8;
1238}
1239
1240void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr)
1241{
1242 FSE_addBits(bitC, statePtr->value, statePtr->stateLog);
1243 FSE_flushBits(bitC);
1244}
1245
1246
1247size_t FSE_closeCStream(FSE_CStream_t* bitC)
1248{
1249 char* endPtr;
1250
1251 FSE_addBits(bitC, 1, 1);
1252 FSE_flushBits(bitC);
1253
1254 endPtr = bitC->ptr;
1255 endPtr += bitC->bitPos > 0;
1256
1257 return (endPtr - bitC->startPtr);
1258}
1259
1260
1261size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
1262 const void* src, size_t srcSize,
Yann Collet1efa31f2015-07-04 15:56:41 -08001263 const FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +01001264{
1265 const BYTE* const istart = (const BYTE*) src;
1266 const BYTE* ip;
1267 const BYTE* const iend = istart + srcSize;
1268
1269 FSE_CStream_t bitC;
1270 FSE_CState_t CState1, CState2;
1271
1272
1273 /* init */
1274 (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
1275 FSE_initCStream(&bitC, dst);
Yann Collet1efa31f2015-07-04 15:56:41 -08001276 FSE_initCState(&CState1, ct);
Yann Collet4856a002015-01-24 01:58:16 +01001277 CState2 = CState1;
1278
1279 ip=iend;
1280
1281 /* join to even */
1282 if (srcSize & 1)
1283 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001284 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001285 FSE_flushBits(&bitC);
1286 }
1287
1288 /* join to mod 4 */
1289 if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
1290 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001291 FSE_encodeSymbol(&bitC, &CState2, *--ip);
1292 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001293 FSE_flushBits(&bitC);
1294 }
1295
1296 /* 2 or 4 encoding per loop */
1297 while (ip>istart)
1298 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001299 FSE_encodeSymbol(&bitC, &CState2, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001300
1301 if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
1302 FSE_flushBits(&bitC);
1303
Yann Collet1efa31f2015-07-04 15:56:41 -08001304 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001305
1306 if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
1307 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001308 FSE_encodeSymbol(&bitC, &CState2, *--ip);
1309 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001310 }
1311
1312 FSE_flushBits(&bitC);
1313 }
1314
1315 FSE_flushCState(&bitC, &CState2);
1316 FSE_flushCState(&bitC, &CState1);
1317 return FSE_closeCStream(&bitC);
1318}
1319
1320
Yann Collet4856a002015-01-24 01:58:16 +01001321size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
1322
1323
1324size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
1325{
1326 const BYTE* const istart = (const BYTE*) src;
1327 const BYTE* ip = istart;
1328
1329 BYTE* const ostart = (BYTE*) dst;
1330 BYTE* op = ostart;
1331 BYTE* const oend = ostart + dstSize;
1332
1333 U32 count[FSE_MAX_SYMBOL_VALUE+1];
1334 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
Yann Collet1efa31f2015-07-04 15:56:41 -08001335 CTable_max_t ct;
Yann Collet4856a002015-01-24 01:58:16 +01001336 size_t errorCode;
1337
1338 /* early out */
1339 if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
1340 if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
1341 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1342 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
1343
1344 /* Scan input and build symbol stats */
Yann Collet1efa31f2015-07-04 15:56:41 -08001345 errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
Yann Collet4856a002015-01-24 01:58:16 +01001346 if (FSE_isError(errorCode)) return errorCode;
Yann Colleta3c75ba2015-02-21 03:31:59 +01001347 if (errorCode == srcSize) return 1;
1348 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
Yann Collet4856a002015-01-24 01:58:16 +01001349
1350 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
1351 errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
1352 if (FSE_isError(errorCode)) return errorCode;
1353
1354 /* Write table description header */
Yann Collet1efa31f2015-07-04 15:56:41 -08001355 errorCode = FSE_writeNCount (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001356 if (FSE_isError(errorCode)) return errorCode;
1357 op += errorCode;
1358
1359 /* Compress */
Yann Collet1efa31f2015-07-04 15:56:41 -08001360 errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001361 if (FSE_isError(errorCode)) return errorCode;
Yann Collet1efa31f2015-07-04 15:56:41 -08001362 op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
Yann Collet4856a002015-01-24 01:58:16 +01001363
1364 /* check compressibility */
1365 if ( (size_t)(op-ostart) >= srcSize-1 )
1366 return 0;
1367
1368 return op-ostart;
1369}
1370
Yann Collet4856a002015-01-24 01:58:16 +01001371size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
1372{
1373 return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
1374}
1375
1376
1377/*********************************************************
1378* Decompression (Byte symbols)
1379*********************************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -08001380size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
Yann Collet4856a002015-01-24 01:58:16 +01001381{
Yann Collet1efa31f2015-07-04 15:56:41 -08001382 U32* const base32 = (U32*)dt;
Yann Collet4856a002015-01-24 01:58:16 +01001383 FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
1384
Yann Collet4856a002015-01-24 01:58:16 +01001385 base32[0] = 0;
1386
1387 cell->newState = 0;
1388 cell->symbol = symbolValue;
1389 cell->nbBits = 0;
1390
1391 return 0;
1392}
1393
1394
Yann Collet1efa31f2015-07-04 15:56:41 -08001395size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001396{
Yann Collet1efa31f2015-07-04 15:56:41 -08001397 U32* const base32 = (U32*)dt;
Yann Collet4856a002015-01-24 01:58:16 +01001398 FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
1399 const unsigned tableSize = 1 << nbBits;
1400 const unsigned tableMask = tableSize - 1;
1401 const unsigned maxSymbolValue = tableMask;
1402 unsigned s;
1403
1404 /* Sanity checks */
1405 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
Yann Collet4856a002015-01-24 01:58:16 +01001406
1407 /* Build Decoding Table */
1408 base32[0] = nbBits;
1409 for (s=0; s<=maxSymbolValue; s++)
1410 {
1411 dinfo[s].newState = 0;
1412 dinfo[s].symbol = (BYTE)s;
1413 dinfo[s].nbBits = (BYTE)nbBits;
1414 }
1415
1416 return 0;
1417}
1418
1419
1420/* FSE_initDStream
1421 * Initialize a FSE_DStream_t.
1422 * srcBuffer must point at the beginning of an FSE block.
1423 * The function result is the size of the FSE_block (== srcSize).
1424 * If srcSize is too small, the function will return an errorCode;
1425 */
1426size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
1427{
1428 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1429
Yann Collet1efa31f2015-07-04 15:56:41 -08001430 if (srcSize >= sizeof(size_t))
Yann Collet4856a002015-01-24 01:58:16 +01001431 {
1432 U32 contain32;
Yann Collet213089c2015-06-18 07:43:16 -08001433 bitD->start = (const char*)srcBuffer;
Yann Collet1efa31f2015-07-04 15:56:41 -08001434 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
Yann Collet4856a002015-01-24 01:58:16 +01001435 bitD->bitContainer = FSE_readLEST(bitD->ptr);
Yann Collet213089c2015-06-18 07:43:16 -08001436 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
Yann Collet4856a002015-01-24 01:58:16 +01001437 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1438 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1439 }
1440 else
1441 {
1442 U32 contain32;
Yann Collet213089c2015-06-18 07:43:16 -08001443 bitD->start = (const char*)srcBuffer;
Yann Collet4856a002015-01-24 01:58:16 +01001444 bitD->ptr = bitD->start;
Yann Collet213089c2015-06-18 07:43:16 -08001445 bitD->bitContainer = *(const BYTE*)(bitD->start);
Yann Collet4856a002015-01-24 01:58:16 +01001446 switch(srcSize)
1447 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001448 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
1449 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
1450 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
1451 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
1452 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
1453 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
Yann Collet4856a002015-01-24 01:58:16 +01001454 default:;
1455 }
Yann Collet213089c2015-06-18 07:43:16 -08001456 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
Yann Collet4856a002015-01-24 01:58:16 +01001457 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1458 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
Yann Collet1efa31f2015-07-04 15:56:41 -08001459 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
Yann Collet4856a002015-01-24 01:58:16 +01001460 }
1461
1462 return srcSize;
1463}
1464
1465
Yann Collete8c6bb12015-07-26 00:23:57 +01001466/* FSE_lookBits
1467 * Provides next n bits from the bitContainer.
1468 * bitContainer is not modified (bits are still present for next read/look)
1469 * On 32-bits, maxNbBits==25
1470 * On 64-bits, maxNbBits==57
1471 * return : value extracted.
1472 */
1473static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
1474{
1475 return ((bitD->bitContainer << (bitD->bitsConsumed & ((sizeof(bitD->bitContainer)*8)-1))) >> 1) >> (((sizeof(bitD->bitContainer)*8)-1)-nbBits);
1476}
1477
1478static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
1479{
1480 return (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD->bitContainer)*8)-nbBits);
1481}
1482
1483static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
1484{
1485 bitD->bitsConsumed += nbBits;
1486}
1487
1488
Yann Collet4856a002015-01-24 01:58:16 +01001489/* FSE_readBits
1490 * Read next n bits from the bitContainer.
Yann Collet213089c2015-06-18 07:43:16 -08001491 * On 32-bits, don't read more than maxNbBits==25
1492 * On 64-bits, don't read more than maxNbBits==57
1493 * Use the fast variant *only* if n >= 1.
Yann Collet4856a002015-01-24 01:58:16 +01001494 * return : value extracted.
1495 */
Yann Collet1efa31f2015-07-04 15:56:41 -08001496size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001497{
Yann Collete8c6bb12015-07-26 00:23:57 +01001498 size_t value = FSE_lookBits(bitD, nbBits);
1499 FSE_skipBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001500 return value;
1501}
1502
Yann Collet1efa31f2015-07-04 15:56:41 -08001503size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
Yann Collet4856a002015-01-24 01:58:16 +01001504{
Yann Collete8c6bb12015-07-26 00:23:57 +01001505 size_t value = FSE_lookBitsFast(bitD, nbBits);
1506 FSE_skipBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001507 return value;
1508}
1509
1510unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
1511{
Yann Collete8c6bb12015-07-26 00:23:57 +01001512 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
Yann Collet4856a002015-01-24 01:58:16 +01001513 {
1514 bitD->ptr -= bitD->bitsConsumed >> 3;
1515 bitD->bitsConsumed &= 7;
1516 bitD->bitContainer = FSE_readLEST(bitD->ptr);
Yann Collete8c6bb12015-07-26 00:23:57 +01001517 return FSE_DStream_unfinished;
Yann Collet4856a002015-01-24 01:58:16 +01001518 }
1519 if (bitD->ptr == bitD->start)
1520 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001521 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_partiallyFilled;
1522 if (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8) return FSE_DStream_completed;
1523 return FSE_DStream_tooFar;
Yann Collet4856a002015-01-24 01:58:16 +01001524 }
1525 {
1526 U32 nbBytes = bitD->bitsConsumed >> 3;
Yann Collete8c6bb12015-07-26 00:23:57 +01001527 U32 result = FSE_DStream_unfinished;
Yann Collet4856a002015-01-24 01:58:16 +01001528 if (bitD->ptr - nbBytes < bitD->start)
Yann Collete8c6bb12015-07-26 00:23:57 +01001529 {
Yann Collet4856a002015-01-24 01:58:16 +01001530 nbBytes = (U32)(bitD->ptr - bitD->start); /* note : necessarily ptr > start */
Yann Collete8c6bb12015-07-26 00:23:57 +01001531 result = FSE_DStream_partiallyFilled;
1532 }
Yann Collet4856a002015-01-24 01:58:16 +01001533 bitD->ptr -= nbBytes;
1534 bitD->bitsConsumed -= nbBytes*8;
1535 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* note : necessarily srcSize > sizeof(bitD) */
Yann Collete8c6bb12015-07-26 00:23:57 +01001536 return result;
Yann Collet4856a002015-01-24 01:58:16 +01001537 }
1538}
1539
1540
Yann Collet1efa31f2015-07-04 15:56:41 -08001541void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
Yann Collet4856a002015-01-24 01:58:16 +01001542{
Yann Collet1efa31f2015-07-04 15:56:41 -08001543 const U32* const base32 = (const U32*)dt;
Yann Collet4856a002015-01-24 01:58:16 +01001544 DStatePtr->state = FSE_readBits(bitD, base32[0]);
1545 FSE_reloadDStream(bitD);
1546 DStatePtr->table = base32 + 1;
1547}
1548
1549BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1550{
1551 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1552 const U32 nbBits = DInfo.nbBits;
1553 BYTE symbol = DInfo.symbol;
Yann Collet1efa31f2015-07-04 15:56:41 -08001554 size_t lowBits = FSE_readBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001555
1556 DStatePtr->state = DInfo.newState + lowBits;
1557 return symbol;
1558}
1559
1560BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1561{
1562 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1563 const U32 nbBits = DInfo.nbBits;
1564 BYTE symbol = DInfo.symbol;
Yann Collet1efa31f2015-07-04 15:56:41 -08001565 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001566
1567 DStatePtr->state = DInfo.newState + lowBits;
1568 return symbol;
1569}
1570
1571/* FSE_endOfDStream
1572 Tells if bitD has reached end of bitStream or not */
1573
1574unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
1575{
Yann Collete8c6bb12015-07-26 00:23:57 +01001576 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
Yann Collet4856a002015-01-24 01:58:16 +01001577}
1578
Yann Collet1efa31f2015-07-04 15:56:41 -08001579unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
Yann Collet4856a002015-01-24 01:58:16 +01001580{
Yann Collet1efa31f2015-07-04 15:56:41 -08001581 return DStatePtr->state == 0;
Yann Collet4856a002015-01-24 01:58:16 +01001582}
1583
1584
1585FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1586 void* dst, size_t maxDstSize,
1587 const void* cSrc, size_t cSrcSize,
Yann Collet1efa31f2015-07-04 15:56:41 -08001588 const FSE_DTable* dt, unsigned fast)
Yann Collet4856a002015-01-24 01:58:16 +01001589{
1590 BYTE* const ostart = (BYTE*) dst;
1591 BYTE* op = ostart;
1592 BYTE* const omax = op + maxDstSize;
1593 BYTE* const olimit = omax-3;
1594
1595 FSE_DStream_t bitD;
Yann Collet1efa31f2015-07-04 15:56:41 -08001596 FSE_DState_t state1;
1597 FSE_DState_t state2;
Yann Collet4856a002015-01-24 01:58:16 +01001598 size_t errorCode;
1599
1600 /* Init */
1601 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
1602 if (FSE_isError(errorCode)) return errorCode;
1603
Yann Collet1efa31f2015-07-04 15:56:41 -08001604 FSE_initDState(&state1, &bitD, dt);
1605 FSE_initDState(&state2, &bitD, dt);
Yann Collet4856a002015-01-24 01:58:16 +01001606
1607
1608 /* 2 symbols per loop */
Yann Collete8c6bb12015-07-26 00:23:57 +01001609 while ((FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit))
Yann Collet4856a002015-01-24 01:58:16 +01001610 {
1611 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1612
Yann Collet1efa31f2015-07-04 15:56:41 -08001613 if (FSE_MAX_TABLELOG*2+7 > sizeof(size_t)*8) /* This test must be static */
Yann Collet4856a002015-01-24 01:58:16 +01001614 FSE_reloadDStream(&bitD);
1615
1616 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1617
Yann Collet1efa31f2015-07-04 15:56:41 -08001618 if (FSE_MAX_TABLELOG*4+7 < sizeof(size_t)*8) /* This test must be static */
Yann Collet4856a002015-01-24 01:58:16 +01001619 {
1620 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1621 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1622 }
1623 }
1624
1625 /* tail */
Yann Collete8c6bb12015-07-26 00:23:57 +01001626 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
Yann Collet4856a002015-01-24 01:58:16 +01001627 while (1)
1628 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001629 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
Yann Collet4856a002015-01-24 01:58:16 +01001630 break;
1631
1632 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1633
Yann Collete8c6bb12015-07-26 00:23:57 +01001634 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
Yann Collet4856a002015-01-24 01:58:16 +01001635 break;
1636
1637 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1638 }
1639
1640 /* end ? */
1641 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1642 return op-ostart;
1643
1644 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1645
1646 return (size_t)-FSE_ERROR_corruptionDetected;
1647}
1648
1649
1650size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
1651 const void* cSrc, size_t cSrcSize,
Yann Collet1efa31f2015-07-04 15:56:41 -08001652 const FSE_DTable* dt, size_t fastMode)
Yann Collet4856a002015-01-24 01:58:16 +01001653{
1654 /* select fast mode (static) */
Yann Collet1efa31f2015-07-04 15:56:41 -08001655 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
1656 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
Yann Collet4856a002015-01-24 01:58:16 +01001657}
1658
1659
1660size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1661{
1662 const BYTE* const istart = (const BYTE*)cSrc;
1663 const BYTE* ip = istart;
1664 short counting[FSE_MAX_SYMBOL_VALUE+1];
Yann Collet1efa31f2015-07-04 15:56:41 -08001665 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
Yann Collet4856a002015-01-24 01:58:16 +01001666 unsigned tableLog;
Yann Collet1efa31f2015-07-04 15:56:41 -08001667 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
Yann Collet4856a002015-01-24 01:58:16 +01001668 size_t errorCode, fastMode;
1669
1670 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1671
1672 /* normal FSE decoding mode */
Yann Collet1efa31f2015-07-04 15:56:41 -08001673 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
Yann Collet4856a002015-01-24 01:58:16 +01001674 if (FSE_isError(errorCode)) return errorCode;
1675 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1676 ip += errorCode;
1677 cSrcSize -= errorCode;
1678
Yann Collet1efa31f2015-07-04 15:56:41 -08001679 fastMode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001680 if (FSE_isError(fastMode)) return fastMode;
1681
1682 /* always return, even if it is an error code */
Yann Collet1efa31f2015-07-04 15:56:41 -08001683 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt, fastMode);
Yann Collet4856a002015-01-24 01:58:16 +01001684}
1685
1686
Yann Collet4856a002015-01-24 01:58:16 +01001687
Yann Collete8c6bb12015-07-26 00:23:57 +01001688/*********************************************************
1689* Huff0 : Huffman block compression
1690*********************************************************/
Yann Collete8c6bb12015-07-26 00:23:57 +01001691#define HUF_MAX_SYMBOL_VALUE 255
Yann Colletfb8296f2015-07-27 19:34:27 +01001692#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
1693#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
1694#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code is unsupported */
1695#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
1696# error "HUF_MAX_TABLELOG is too large !"
1697#endif
Yann Collet4856a002015-01-24 01:58:16 +01001698
Yann Collete8c6bb12015-07-26 00:23:57 +01001699typedef struct HUF_CElt_s {
1700 U16 val;
1701 BYTE nbBits;
1702} HUF_CElt ;
Yann Collet4856a002015-01-24 01:58:16 +01001703
Yann Collete8c6bb12015-07-26 00:23:57 +01001704typedef struct nodeElt_s {
1705 U32 count;
1706 U16 parent;
1707 BYTE byte;
1708 BYTE nbBits;
1709} nodeElt;
Yann Collet4856a002015-01-24 01:58:16 +01001710
1711
Yann Collete8c6bb12015-07-26 00:23:57 +01001712#define HUF_HEADERLOG 8
1713size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* tree, U32 maxSymbolValue, U32 huffLog)
Yann Collet4856a002015-01-24 01:58:16 +01001714{
Yann Collete8c6bb12015-07-26 00:23:57 +01001715 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
1716 U32 n;
1717 BYTE* op = (BYTE*)dst;
1718 size_t size;
Yann Collet4856a002015-01-24 01:58:16 +01001719
Yann Collete8c6bb12015-07-26 00:23:57 +01001720 // check conditions
1721 if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
1722 return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +01001723
Yann Collete8c6bb12015-07-26 00:23:57 +01001724 for (n=0; n<maxSymbolValue; n++)
1725 huffWeight[n] = tree[n].nbBits ? (BYTE)(huffLog + 1 - tree[n].nbBits) : 0;
Yann Collet4856a002015-01-24 01:58:16 +01001726
Yann Collete8c6bb12015-07-26 00:23:57 +01001727 size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue); // don't need last symbol stat : implied
1728 if (FSE_isError(size)) return size;
1729 if (size >= 128) return (size_t)-FSE_ERROR_GENERIC;
1730 if (size <= 1) return (size_t)-FSE_ERROR_GENERIC; // special case, not implemented
Yann Collet4856a002015-01-24 01:58:16 +01001731
Yann Collete8c6bb12015-07-26 00:23:57 +01001732 op[0] = (BYTE)size;
1733 return size+1;
Yann Collet4856a002015-01-24 01:58:16 +01001734}
1735
1736
Yann Collete8c6bb12015-07-26 00:23:57 +01001737static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001738{
Yann Collete8c6bb12015-07-26 00:23:57 +01001739 int totalCost = 0;
1740 const U32 largestBits = huffNode[lastNonNull].nbBits;
Yann Collet4856a002015-01-24 01:58:16 +01001741
Yann Collete8c6bb12015-07-26 00:23:57 +01001742 // early exit : all is fine
1743 if (largestBits <= maxNbBits) return largestBits;
Yann Collet4856a002015-01-24 01:58:16 +01001744
Yann Collete8c6bb12015-07-26 00:23:57 +01001745 // now we have a few too large elements (at least >= 2)
Yann Collet4856a002015-01-24 01:58:16 +01001746 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001747 const U32 baseCost = 1 << (largestBits - maxNbBits);
1748 U32 n = lastNonNull;
1749
1750 while (huffNode[n].nbBits > maxNbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001751 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001752 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
1753 huffNode[n].nbBits = (BYTE)maxNbBits;
1754 n --;
Yann Collet4856a002015-01-24 01:58:16 +01001755 }
Yann Collet4856a002015-01-24 01:58:16 +01001756
Yann Collete8c6bb12015-07-26 00:23:57 +01001757 /* renorm totalCost */
1758 totalCost >>= (largestBits - maxNbBits); /* note : totalCost necessarily multiple of baseCost */
1759
1760 // repay cost
1761 while (huffNode[n].nbBits == maxNbBits) n--; // n at last of rank (maxNbBits-1)
1762
Yann Collet4856a002015-01-24 01:58:16 +01001763 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001764 const U32 noOne = 0xF0F0F0F0;
1765 // Get pos of last (smallest) symbol per rank
1766 U32 rankLast[HUF_MAX_TABLELOG];
1767 U32 currentNbBits = maxNbBits;
1768 int pos;
1769 memset(rankLast, 0xF0, sizeof(rankLast));
1770 for (pos=n ; pos >= 0; pos--)
Yann Collet4856a002015-01-24 01:58:16 +01001771 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001772 if (huffNode[pos].nbBits >= currentNbBits) continue;
1773 currentNbBits = huffNode[pos].nbBits;
1774 rankLast[maxNbBits-currentNbBits] = pos;
1775 }
1776
1777 while (totalCost > 0)
1778 {
1779 U32 nBitsToDecrease = FSE_highbit32(totalCost) + 1;
1780 for ( ; nBitsToDecrease > 1; nBitsToDecrease--)
1781 {
1782 U32 highPos = rankLast[nBitsToDecrease];
1783 U32 lowPos = rankLast[nBitsToDecrease-1];
1784 if (highPos == noOne) continue;
1785 if (lowPos == noOne) break;
1786 {
1787 U32 highTotal = huffNode[highPos].count;
1788 U32 lowTotal = 2 * huffNode[lowPos].count;
1789 if (highTotal <= lowTotal) break;
1790 }
1791 }
1792 while (rankLast[nBitsToDecrease] == noOne)
1793 nBitsToDecrease ++; // In some rare cases, no more rank 1 left => overshoot to closest
1794 totalCost -= 1 << (nBitsToDecrease-1);
1795 if (rankLast[nBitsToDecrease-1] == noOne)
1796 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; // now there is one elt
1797 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
1798 rankLast[nBitsToDecrease]--;
1799 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
1800 rankLast[nBitsToDecrease] = noOne; // rank list emptied
1801 }
1802 while (totalCost < 0) // Sometimes, cost correction overshoot
1803 {
1804 if (rankLast[1] == noOne) /* special case, no weight 1, let's find it back at n */
1805 {
1806 while (huffNode[n].nbBits == maxNbBits) n--;
1807 huffNode[n+1].nbBits--;
1808 rankLast[1] = n+1;
1809 totalCost++;
1810 continue;
1811 }
1812 huffNode[ rankLast[1] + 1 ].nbBits--;
1813 rankLast[1]++;
1814 totalCost ++;
1815 }
1816 }
1817 }
1818
1819 return maxNbBits;
1820}
1821
1822
1823typedef struct {
1824 U32 base;
1825 U32 current;
1826} rankPos;
1827
1828static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
1829{
1830 rankPos rank[32];
1831 U32 n;
1832
1833 memset(rank, 0, sizeof(rank));
1834 for (n=0; n<=maxSymbolValue; n++)
1835 {
1836 U32 r = FSE_highbit32(count[n] + 1);
1837 rank[r].base ++;
1838 }
1839 for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
1840 for (n=0; n<32; n++) rank[n].current = rank[n].base;
1841 for (n=0; n<=maxSymbolValue; n++)
1842 {
1843 U32 c = count[n];
1844 U32 r = FSE_highbit32(c+1) + 1;
1845 U32 pos = rank[r].current++;
1846 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
1847 huffNode[pos].count = c;
1848 huffNode[pos].byte = (BYTE)n;
1849 }
1850}
1851
1852
1853#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
1854size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
1855{
1856 nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
1857 nodeElt* huffNode = huffNode0 + 1;
1858 U32 n, nonNullRank;
1859 int lowS, lowN;
1860 U16 nodeNb = STARTNODE;
1861 U32 nodeRoot;
1862
1863 // check
1864 if (maxSymbolValue > 255) return (size_t)-FSE_ERROR_GENERIC;
1865 memset(huffNode0, 0, sizeof(huffNode0));
1866
1867 // sort, decreasing order
1868 HUF_sort(huffNode, count, maxSymbolValue);
1869
1870 // init for parents
1871 nonNullRank = maxSymbolValue;
1872 while(huffNode[nonNullRank].count == 0) nonNullRank--;
1873 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
1874 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
1875 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
1876 nodeNb++; lowS-=2;
1877 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
1878 huffNode0[0].count = (U32)(1U<<31);
1879
1880 // create parents
1881 while (nodeNb <= nodeRoot)
1882 {
1883 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
1884 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
1885 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
1886 huffNode[n1].parent = huffNode[n2].parent = nodeNb;
1887 nodeNb++;
1888 }
1889
1890 // distribute weights (unlimited tree height)
1891 huffNode[nodeRoot].nbBits = 0;
1892 for (n=nodeRoot-1; n>=STARTNODE; n--)
1893 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1894 for (n=0; n<=nonNullRank; n++)
1895 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1896
1897 // enforce maxTableLog
1898 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
1899
1900 // fill result into tree (val, nbBits)
1901 {
1902 U16 nbPerRank[HUF_ABSOLUTEMAX_TABLELOG+1] = {0};
1903 U16 valPerRank[HUF_ABSOLUTEMAX_TABLELOG+1];
1904 if (maxNbBits > HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; // check
1905 for (n=0; n<=nonNullRank; n++)
1906 nbPerRank[huffNode[n].nbBits]++;
1907 {
1908 // determine stating value per rank
1909 U16 min = 0;
1910 for (n=maxNbBits; n>0; n--)
1911 {
1912 valPerRank[n] = min; // get starting value within each rank
1913 min += nbPerRank[n];
1914 min >>= 1;
Yann Collet4856a002015-01-24 01:58:16 +01001915 }
1916 }
Yann Collete8c6bb12015-07-26 00:23:57 +01001917 for (n=0; n<=maxSymbolValue; n++)
1918 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; // push nbBits per symbol, symbol order
1919 for (n=0; n<=maxSymbolValue; n++)
1920 tree[n].val = valPerRank[tree[n].nbBits]++; // assign value within rank, symbol order
Yann Collet4856a002015-01-24 01:58:16 +01001921 }
1922
Yann Collete8c6bb12015-07-26 00:23:57 +01001923 return maxNbBits;
Yann Collet4856a002015-01-24 01:58:16 +01001924}
1925
1926
Yann Collete8c6bb12015-07-26 00:23:57 +01001927static size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, HUF_CElt* CTable)
Yann Collet4856a002015-01-24 01:58:16 +01001928{
Yann Collete8c6bb12015-07-26 00:23:57 +01001929 const BYTE* ip = (const BYTE*) src;
1930 BYTE* const ostart = (BYTE*)dst;
1931 BYTE* op = (BYTE*) ostart;
1932 U16* jumpTable = (U16*) dst;
1933 size_t n, streamSize;
1934 FSE_CStream_t bitC;
Yann Collet4856a002015-01-24 01:58:16 +01001935
Yann Collete8c6bb12015-07-26 00:23:57 +01001936 /* init */
1937 (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
1938 op += 6; /* jump Table -- could be optimized by delta / deviation */
1939 FSE_initCStream(&bitC, op);
Yann Collet4856a002015-01-24 01:58:16 +01001940
Yann Collete8c6bb12015-07-26 00:23:57 +01001941#define FSE_FLUSHBITS_32(stream) \
1942 if (FSE_32bits()) FSE_flushBits(stream)
Yann Collet4856a002015-01-24 01:58:16 +01001943
Yann Collete8c6bb12015-07-26 00:23:57 +01001944 n = srcSize & ~15; // mod 16
1945 switch (srcSize & 15)
Yann Collet4856a002015-01-24 01:58:16 +01001946 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001947 case 15: FSE_addBitsFast(&bitC, CTable[ip[n+14]].val, CTable[ip[n+14]].nbBits);
1948 FSE_FLUSHBITS_32(&bitC);
1949 case 14: FSE_addBitsFast(&bitC, CTable[ip[n+13]].val, CTable[ip[n+13]].nbBits);
1950 FSE_FLUSHBITS_32(&bitC);
1951 case 13: FSE_addBitsFast(&bitC, CTable[ip[n+12]].val, CTable[ip[n+12]].nbBits);
1952 FSE_FLUSHBITS_32(&bitC);
1953 case 12: FSE_addBitsFast(&bitC, CTable[ip[n+11]].val, CTable[ip[n+11]].nbBits);
1954 FSE_flushBits(&bitC);
1955 case 11: FSE_addBitsFast(&bitC, CTable[ip[n+10]].val, CTable[ip[n+10]].nbBits);
1956 FSE_FLUSHBITS_32(&bitC);
1957 case 10: FSE_addBitsFast(&bitC, CTable[ip[n+9]].val, CTable[ip[n+9]].nbBits);
1958 FSE_FLUSHBITS_32(&bitC);
1959 case 9 : FSE_addBitsFast(&bitC, CTable[ip[n+8]].val, CTable[ip[n+8]].nbBits);
1960 FSE_FLUSHBITS_32(&bitC);
1961 case 8 : FSE_addBitsFast(&bitC, CTable[ip[n+7]].val, CTable[ip[n+7]].nbBits);
1962 FSE_flushBits(&bitC);
1963 case 7 : FSE_addBitsFast(&bitC, CTable[ip[n+6]].val, CTable[ip[n+6]].nbBits);
1964 FSE_FLUSHBITS_32(&bitC);
1965 case 6 : FSE_addBitsFast(&bitC, CTable[ip[n+5]].val, CTable[ip[n+5]].nbBits);
1966 FSE_FLUSHBITS_32(&bitC);
1967 case 5 : FSE_addBitsFast(&bitC, CTable[ip[n+4]].val, CTable[ip[n+4]].nbBits);
1968 FSE_FLUSHBITS_32(&bitC);
1969 case 4 : FSE_addBitsFast(&bitC, CTable[ip[n+3]].val, CTable[ip[n+3]].nbBits);
1970 FSE_flushBits(&bitC);
1971 case 3 : FSE_addBitsFast(&bitC, CTable[ip[n+2]].val, CTable[ip[n+2]].nbBits);
1972 FSE_FLUSHBITS_32(&bitC);
1973 case 2 : FSE_addBitsFast(&bitC, CTable[ip[n+1]].val, CTable[ip[n+1]].nbBits);
1974 FSE_FLUSHBITS_32(&bitC);
1975 case 1 : FSE_addBitsFast(&bitC, CTable[ip[n+0]].val, CTable[ip[n+0]].nbBits);
1976 FSE_flushBits(&bitC);
1977 case 0 :
1978 default: ;
Yann Collet4856a002015-01-24 01:58:16 +01001979 }
1980
Yann Collete8c6bb12015-07-26 00:23:57 +01001981 for (; n>0; n-=16)
Yann Collet4856a002015-01-24 01:58:16 +01001982 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001983 FSE_addBitsFast(&bitC, CTable[ip[n- 4]].val, CTable[ip[n- 4]].nbBits);
1984 FSE_FLUSHBITS_32(&bitC);
1985 FSE_addBitsFast(&bitC, CTable[ip[n- 8]].val, CTable[ip[n- 8]].nbBits);
1986 FSE_FLUSHBITS_32(&bitC);
1987 FSE_addBitsFast(&bitC, CTable[ip[n-12]].val, CTable[ip[n-12]].nbBits);
1988 FSE_FLUSHBITS_32(&bitC);
1989 FSE_addBitsFast(&bitC, CTable[ip[n-16]].val, CTable[ip[n-16]].nbBits);
1990 FSE_flushBits(&bitC);
1991 }
1992 streamSize = FSE_closeCStream(&bitC);
Yann Collet138db212015-07-27 20:19:21 +01001993 FSE_writeLE16(jumpTable, (U16)streamSize);
Yann Collete8c6bb12015-07-26 00:23:57 +01001994 op += streamSize;
1995
1996 FSE_initCStream(&bitC, op);
1997 n = srcSize & ~15; // mod 16
1998 for (; n>0; n-=16)
1999 {
2000 FSE_addBitsFast(&bitC, CTable[ip[n- 3]].val, CTable[ip[n- 3]].nbBits);
2001 FSE_FLUSHBITS_32(&bitC);
2002 FSE_addBitsFast(&bitC, CTable[ip[n- 7]].val, CTable[ip[n- 7]].nbBits);
2003 FSE_FLUSHBITS_32(&bitC);
2004 FSE_addBitsFast(&bitC, CTable[ip[n-11]].val, CTable[ip[n-11]].nbBits);
2005 FSE_FLUSHBITS_32(&bitC);
2006 FSE_addBitsFast(&bitC, CTable[ip[n-15]].val, CTable[ip[n-15]].nbBits);
2007 FSE_flushBits(&bitC);
2008 }
2009 streamSize = FSE_closeCStream(&bitC);
Yann Collet138db212015-07-27 20:19:21 +01002010 FSE_writeLE16(jumpTable+1, (U16)streamSize);
Yann Collete8c6bb12015-07-26 00:23:57 +01002011 op += streamSize;
2012
2013 FSE_initCStream(&bitC, op);
2014 n = srcSize & ~15; // mod 16
2015 for (; n>0; n-=16)
2016 {
2017 FSE_addBitsFast(&bitC, CTable[ip[n- 2]].val, CTable[ip[n- 2]].nbBits);
2018 FSE_FLUSHBITS_32(&bitC);
2019 FSE_addBitsFast(&bitC, CTable[ip[n- 6]].val, CTable[ip[n- 6]].nbBits);
2020 FSE_FLUSHBITS_32(&bitC);
2021 FSE_addBitsFast(&bitC, CTable[ip[n-10]].val, CTable[ip[n-10]].nbBits);
2022 FSE_FLUSHBITS_32(&bitC);
2023 FSE_addBitsFast(&bitC, CTable[ip[n-14]].val, CTable[ip[n-14]].nbBits);
2024 FSE_flushBits(&bitC);
2025 }
2026 streamSize = FSE_closeCStream(&bitC);
Yann Collet138db212015-07-27 20:19:21 +01002027 FSE_writeLE16(jumpTable+2, (U16)streamSize);
Yann Collete8c6bb12015-07-26 00:23:57 +01002028 op += streamSize;
2029
2030 FSE_initCStream(&bitC, op);
2031 n = srcSize & ~15; // mod 16
2032 for (; n>0; n-=16)
2033 {
2034 FSE_addBitsFast(&bitC, CTable[ip[n- 1]].val, CTable[ip[n- 1]].nbBits);
2035 FSE_FLUSHBITS_32(&bitC);
2036 FSE_addBitsFast(&bitC, CTable[ip[n- 5]].val, CTable[ip[n- 5]].nbBits);
2037 FSE_FLUSHBITS_32(&bitC);
2038 FSE_addBitsFast(&bitC, CTable[ip[n- 9]].val, CTable[ip[n- 9]].nbBits);
2039 FSE_FLUSHBITS_32(&bitC);
2040 FSE_addBitsFast(&bitC, CTable[ip[n-13]].val, CTable[ip[n-13]].nbBits);
2041 FSE_flushBits(&bitC);
2042 }
2043 streamSize = FSE_closeCStream(&bitC);
2044 op += streamSize;
2045
2046 return op-ostart;
2047}
2048
2049
2050size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog)
2051{
2052 BYTE* const ostart = (BYTE*)dst;
2053 BYTE* op = ostart;
2054 BYTE* const oend = ostart + dstSize;
2055
2056 U32 count[HUF_MAX_SYMBOL_VALUE+1];
2057 HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
2058 size_t errorCode;
2059
2060 /* early out */
2061 if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
2062 if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
2063 if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
2064 if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
2065
2066 /* Scan input and build symbol stats */
2067 errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
2068 if (FSE_isError(errorCode)) return errorCode;
2069 if (errorCode == srcSize) return 1;
2070 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
2071
2072 /* Build Huffman Tree */
2073 errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
2074 if (FSE_isError(errorCode)) return errorCode;
2075 huffLog = (U32)errorCode;
2076
2077 /* Write table description header */
2078 errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); /* don't write last symbol, implied */
2079 if (FSE_isError(errorCode)) return errorCode;
2080 op += errorCode;
2081
2082 /* Compress */
2083 op += HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable);
2084
2085 /* check compressibility */
2086 if ((size_t)(op-ostart) >= srcSize-1)
2087 return 0;
2088
2089 return op-ostart;
2090}
2091
2092size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2093{
2094 return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
2095}
2096
2097
2098/*********************************************************
2099* Huff0 : Huffman block decompression
2100*********************************************************/
2101typedef struct {
2102 BYTE byte;
2103 BYTE nbBits;
2104} HUF_DElt;
2105
2106size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
2107{
2108 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
2109 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1] = {0};
2110 U32 weightTotal = 0;
2111 U32 maxBits;
2112 const BYTE* ip = (const BYTE*) src;
2113 size_t iSize = ip[0];
2114 size_t oSize;
2115 U32 n;
2116 U32 nextRankStart;
2117 HUF_DElt* const dt = (HUF_DElt*)(DTable + 1);
2118
2119 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); // if compilation fails here, assertion is false
2120 if (iSize >= 128) return (size_t)-FSE_ERROR_GENERIC; // special case, not implemented
2121 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2122
2123 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); // max 255 values stored, last is implied
2124 if (FSE_isError(oSize)) return oSize;
2125
2126 // stats on weights
2127 for (n=0; n<oSize; n++)
2128 {
2129 rankVal[huffWeight[n]]++;
2130 weightTotal += (1 << huffWeight[n]) >> 1;
Yann Collet4856a002015-01-24 01:58:16 +01002131 }
2132
Yann Collete8c6bb12015-07-26 00:23:57 +01002133 // get last symbol weight(implied)
2134 maxBits = FSE_highbit32(weightTotal) + 1;
2135 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_GENERIC; // DTable is too small
2136 DTable[0] = (U16)maxBits;
2137 {
2138 U32 total = 1 << maxBits;
2139 U32 rest = total - weightTotal;
2140 U32 verif = 1 << FSE_highbit32(rest);
2141 if (verif != rest) return (size_t)-FSE_ERROR_GENERIC; // last value must be a clean power of 2
2142 huffWeight[oSize] = (BYTE)(FSE_highbit32(rest) + 1);
2143 rankVal[huffWeight[oSize]]++;
2144 }
Yann Collet4856a002015-01-24 01:58:16 +01002145
Yann Collete8c6bb12015-07-26 00:23:57 +01002146 // Prepare ranks
2147 nextRankStart = 0;
2148 for (n=1; n<=maxBits; n++)
2149 {
2150 U32 current = nextRankStart;
2151 nextRankStart += (rankVal[n] << (n-1));
2152 rankVal[n] = current;
2153 }
2154
2155 // fill table
2156 for (n=0; n<=oSize; n++)
Yann Collet4856a002015-01-24 01:58:16 +01002157 {
2158 U32 i;
Yann Collete8c6bb12015-07-26 00:23:57 +01002159 const U32 w = huffWeight[n];
2160 const U32 length = (1 << w) >> 1;
2161 HUF_DElt D;
2162 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
2163 for (i = rankVal[w]; i < rankVal[w] + length; i++)
2164 dt[i] = D;
2165 rankVal[w] += length;
Yann Collet4856a002015-01-24 01:58:16 +01002166 }
2167
Yann Collete8c6bb12015-07-26 00:23:57 +01002168 return iSize+1;
Yann Collet4856a002015-01-24 01:58:16 +01002169}
Yann Collete8c6bb12015-07-26 00:23:57 +01002170
2171/*
2172#define HUF_DECODE_SYMBOL(n, Dstream) \
2173 val = FSE_lookBitsFast(&Dstream, dtLog); \
2174 c = dt[val].byte; \
2175 FSE_skipBits(&Dstream, dt[val].nbBits); \
2176 op[n] = c;
2177*/
2178
2179static void HUF_decodeSymbol(BYTE* ptr, FSE_DStream_t* Dstream, const HUF_DElt* dt, U32 dtLog)
2180{
2181 size_t val = FSE_lookBitsFast(Dstream, dtLog);
2182 BYTE c = dt[val].byte;
2183 FSE_skipBits(Dstream, dt[val].nbBits);
2184 *ptr = c;
2185}
2186
2187static size_t HUF_decompress_usingDTable(
2188 void* dst, size_t maxDstSize,
2189 const void* cSrc, size_t cSrcSize,
2190 const U16* DTable)
2191{
2192 BYTE* const ostart = (BYTE*) dst;
2193 BYTE* op = ostart;
2194 BYTE* const omax = op + maxDstSize;
2195 BYTE* const olimit = omax-15;
2196
2197 const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1);
2198 const U32 dtLog = DTable[0];
2199 size_t errorCode;
2200 U32 reloadStatus;
2201
2202 /* Init */
2203
2204 const U16* jumpTable = (const U16*)cSrc;
Yann Collet138db212015-07-27 20:19:21 +01002205 const size_t length1 = FSE_readLE16(jumpTable);
2206 const size_t length2 = FSE_readLE16(jumpTable+1);
2207 const size_t length3 = FSE_readLE16(jumpTable+2);
Yann Collete8c6bb12015-07-26 00:23:57 +01002208 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
2209 const char* const start1 = (const char*)(cSrc) + 6;
2210 const char* const start2 = start1 + length1;
2211 const char* const start3 = start2 + length2;
2212 const char* const start4 = start3 + length3;
2213 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
2214
2215 errorCode = FSE_initDStream(&bitD1, start1, length1);
2216 if (FSE_isError(errorCode)) return errorCode;
2217 errorCode = FSE_initDStream(&bitD2, start2, length2);
2218 if (FSE_isError(errorCode)) return errorCode;
2219 errorCode = FSE_initDStream(&bitD3, start3, length3);
2220 if (FSE_isError(errorCode)) return errorCode;
2221 errorCode = FSE_initDStream(&bitD4, start4, length4);
2222 if (FSE_isError(errorCode)) return errorCode;
2223
2224 reloadStatus=FSE_reloadDStream(&bitD2);
2225
2226 /* 16 symbols per loop */
2227 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
2228 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
2229 {
Yann Colletfb8296f2015-07-27 19:34:27 +01002230#define HUF_DECODE_SYMBOL_0(n, Dstream) \
2231 HUF_decodeSymbol(op+n, &Dstream, dt, dtLog);
2232
2233#define HUF_DECODE_SYMBOL_1(n, Dstream) \
2234 HUF_decodeSymbol(op+n, &Dstream, dt, dtLog); \
2235 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
2236
2237#define HUF_DECODE_SYMBOL_2(n, Dstream) \
Yann Collete8c6bb12015-07-26 00:23:57 +01002238 HUF_decodeSymbol(op+n, &Dstream, dt, dtLog); \
2239 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
2240
Yann Colletfb8296f2015-07-27 19:34:27 +01002241 HUF_DECODE_SYMBOL_1( 0, bitD1);
2242 HUF_DECODE_SYMBOL_1( 1, bitD2);
2243 HUF_DECODE_SYMBOL_1( 2, bitD3);
2244 HUF_DECODE_SYMBOL_1( 3, bitD4);
2245 HUF_DECODE_SYMBOL_2( 4, bitD1);
2246 HUF_DECODE_SYMBOL_2( 5, bitD2);
2247 HUF_DECODE_SYMBOL_2( 6, bitD3);
2248 HUF_DECODE_SYMBOL_2( 7, bitD4);
2249 HUF_DECODE_SYMBOL_1( 8, bitD1);
2250 HUF_DECODE_SYMBOL_1( 9, bitD2);
2251 HUF_DECODE_SYMBOL_1(10, bitD3);
2252 HUF_DECODE_SYMBOL_1(11, bitD4);
2253 HUF_DECODE_SYMBOL_0(12, bitD1);
2254 HUF_DECODE_SYMBOL_0(13, bitD2);
2255 HUF_DECODE_SYMBOL_0(14, bitD3);
2256 HUF_DECODE_SYMBOL_0(15, bitD4);
Yann Collete8c6bb12015-07-26 00:23:57 +01002257 }
2258
2259 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be 0 (unfinished) */
2260 return (size_t)-FSE_ERROR_corruptionDetected;
2261
2262 /* tail */
2263 {
2264 // bitTail = bitD1; // *much* slower : -20% !??!
2265 FSE_DStream_t bitTail;
2266 bitTail.ptr = bitD1.ptr;
2267 bitTail.bitsConsumed = bitD1.bitsConsumed;
2268 bitTail.bitContainer = bitD1.bitContainer; // required in case FSE_DStream_partiallyFilled
2269 bitTail.start = start1;
2270 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
2271 {
Yann Colletfb8296f2015-07-27 19:34:27 +01002272 HUF_DECODE_SYMBOL_0(0, bitTail);
Yann Collete8c6bb12015-07-26 00:23:57 +01002273 }
2274
2275 if (FSE_endOfDStream(&bitTail))
2276 return op-ostart;
2277 }
2278
2279 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
2280
2281 return (size_t)-FSE_ERROR_corruptionDetected;
2282}
2283
2284
2285size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
2286{
2287 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
2288 const BYTE* ip = (const BYTE*) cSrc;
2289 size_t errorCode;
2290
2291 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
2292 if (FSE_isError(errorCode)) return errorCode;
2293 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2294 ip += errorCode;
2295 cSrcSize -= errorCode;
2296
2297 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
2298}
2299
2300
2301#endif /* FSE_COMMONDEFS_ONLY */