blob: 5390e9cdcb66a358de523eb64ced0ea1e683b4a8 [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/* ******************************************************************
2 FSE : Finite State Entropy coder
3 Copyright (C) 2013-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33****************************************************************** */
34
35#ifndef FSE_COMMONDEFS_ONLY
36
37/****************************************************************
38* Tuning parameters
39****************************************************************/
40/* MEMORY_USAGE :
41* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
42* Increasing memory usage improves compression ratio
43* Reduced memory usage can improve speed, due to cache effect
44* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
45#define FSE_MAX_MEMORY_USAGE 14
46#define FSE_DEFAULT_MEMORY_USAGE 13
47
48/* FSE_MAX_SYMBOL_VALUE :
49* Maximum symbol value authorized.
50* Required for proper stack allocation */
51#define FSE_MAX_SYMBOL_VALUE 255
52
53
54/****************************************************************
Yann Collete8c6bb12015-07-26 00:23:57 +010055* template functions type & suffix
Yann Collet4856a002015-01-24 01:58:16 +010056****************************************************************/
57#define FSE_FUNCTION_TYPE BYTE
58#define FSE_FUNCTION_EXTENSION
59
Yann Collete8c6bb12015-07-26 00:23:57 +010060
61/****************************************************************
62* Byte symbol type
63****************************************************************/
64typedef struct
65{
66 unsigned short newState;
67 unsigned char symbol;
68 unsigned char nbBits;
69} FSE_decode_t; /* size == U32 */
70
Yann Collet4856a002015-01-24 01:58:16 +010071#endif /* !FSE_COMMONDEFS_ONLY */
72
73
74/****************************************************************
75* Compiler specifics
76****************************************************************/
77#ifdef _MSC_VER /* Visual Studio */
78# define FORCE_INLINE static __forceinline
79# include <intrin.h> /* For Visual 2005 */
80# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
81# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
82#else
83# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
84# ifdef __GNUC__
85# define FORCE_INLINE static inline __attribute__((always_inline))
86# else
87# define FORCE_INLINE static inline
88# endif
89#endif
90
91
92/****************************************************************
93* Includes
94****************************************************************/
95#include <stdlib.h> /* malloc, free, qsort */
96#include <string.h> /* memcpy, memset */
97#include <stdio.h> /* printf (debug) */
98#include "fse_static.h"
99
100
Yann Colletad68a0e2015-02-26 00:29:15 +0100101#ifndef MEM_ACCESS_MODULE
102#define MEM_ACCESS_MODULE
Yann Collet4856a002015-01-24 01:58:16 +0100103/****************************************************************
104* Basic Types
105*****************************************************************/
106#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
107# include <stdint.h>
108typedef uint8_t BYTE;
109typedef uint16_t U16;
110typedef int16_t S16;
111typedef uint32_t U32;
112typedef int32_t S32;
113typedef uint64_t U64;
114typedef int64_t S64;
115#else
116typedef unsigned char BYTE;
117typedef unsigned short U16;
118typedef signed short S16;
119typedef unsigned int U32;
120typedef signed int S32;
121typedef unsigned long long U64;
122typedef signed long long S64;
123#endif
124
Yann Colletad68a0e2015-02-26 00:29:15 +0100125#endif /* MEM_ACCESS_MODULE */
Yann Collet4856a002015-01-24 01:58:16 +0100126
127/****************************************************************
128* Memory I/O
129*****************************************************************/
Yann Collete8c6bb12015-07-26 00:23:57 +0100130static unsigned FSE_32bits(void)
131{
132 return sizeof(void*)==4;
133}
134
Yann Collet4856a002015-01-24 01:58:16 +0100135static unsigned FSE_isLittleEndian(void)
136{
137 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
138 return one.c[0];
139}
140
141static U32 FSE_read32(const void* memPtr)
142{
143 U32 val32;
144 memcpy(&val32, memPtr, 4);
145 return val32;
146}
147
148static U32 FSE_readLE32(const void* memPtr)
149{
150 if (FSE_isLittleEndian())
151 return FSE_read32(memPtr);
152 else
153 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100154 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100155 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
156 }
157}
158
159static void FSE_writeLE32(void* memPtr, U32 val32)
160{
161 if (FSE_isLittleEndian())
162 {
163 memcpy(memPtr, &val32, 4);
164 }
165 else
166 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100167 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100168 p[0] = (BYTE)val32;
169 p[1] = (BYTE)(val32>>8);
170 p[2] = (BYTE)(val32>>16);
171 p[3] = (BYTE)(val32>>24);
172 }
173}
174
175static U64 FSE_read64(const void* memPtr)
176{
177 U64 val64;
178 memcpy(&val64, memPtr, 8);
179 return val64;
180}
181
182static U64 FSE_readLE64(const void* memPtr)
183{
184 if (FSE_isLittleEndian())
185 return FSE_read64(memPtr);
186 else
187 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100188 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100189 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
190 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
191 }
192}
193
194static void FSE_writeLE64(void* memPtr, U64 val64)
195{
196 if (FSE_isLittleEndian())
197 {
198 memcpy(memPtr, &val64, 8);
199 }
200 else
201 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100202 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100203 p[0] = (BYTE)val64;
204 p[1] = (BYTE)(val64>>8);
205 p[2] = (BYTE)(val64>>16);
206 p[3] = (BYTE)(val64>>24);
207 p[4] = (BYTE)(val64>>32);
208 p[5] = (BYTE)(val64>>40);
209 p[6] = (BYTE)(val64>>48);
210 p[7] = (BYTE)(val64>>56);
211 }
212}
213
214static size_t FSE_readLEST(const void* memPtr)
215{
Yann Collete8c6bb12015-07-26 00:23:57 +0100216 if (FSE_32bits())
Yann Colletfb814172015-01-25 13:19:12 +0100217 return (size_t)FSE_readLE32(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100218 else
Yann Colletfb814172015-01-25 13:19:12 +0100219 return (size_t)FSE_readLE64(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100220}
221
222static void FSE_writeLEST(void* memPtr, size_t val)
223{
Yann Collete8c6bb12015-07-26 00:23:57 +0100224 if (FSE_32bits())
Yann Collet4856a002015-01-24 01:58:16 +0100225 FSE_writeLE32(memPtr, (U32)val);
226 else
227 FSE_writeLE64(memPtr, (U64)val);
228}
229
230
231/****************************************************************
232* Constants
233*****************************************************************/
234#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
235#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
236#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
237#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
238#define FSE_MIN_TABLELOG 5
239
240#define FSE_TABLELOG_ABSOLUTE_MAX 15
241#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
242#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
243#endif
244
245
246/****************************************************************
247* Error Management
248****************************************************************/
249#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
250
251
252/****************************************************************
253* Complex types
254****************************************************************/
255typedef struct
256{
257 int deltaFindState;
258 U16 maxState;
259 BYTE minBitsOut;
Yann Collet1efa31f2015-07-04 15:56:41 -0800260 /* one byte padding ; total 8 bytes */
Yann Collet4856a002015-01-24 01:58:16 +0100261} FSE_symbolCompressionTransform;
262
Yann Collet1efa31f2015-07-04 15:56:41 -0800263typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
264typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
Yann Collet4856a002015-01-24 01:58:16 +0100265
266/****************************************************************
267* Internal functions
268****************************************************************/
269FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
270{
271# if defined(_MSC_VER) /* Visual */
272 unsigned long r;
273 _BitScanReverse ( &r, val );
274 return (unsigned) r;
275# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
276 return 31 - __builtin_clz (val);
277# else /* Software version */
278 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
279 U32 v = val;
280 unsigned r;
281 v |= v >> 1;
282 v |= v >> 2;
283 v |= v >> 4;
284 v |= v >> 8;
285 v |= v >> 16;
286 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
287 return r;
288# endif
289}
290
291
Yann Collete8c6bb12015-07-26 00:23:57 +0100292/****************************************************************
293* Templates
294****************************************************************/
295/*
296 designed to be included
297 for type-specific functions (template emulation in C)
298 Objective is to write these functions only once, for improved maintenance
299*/
300
301/* safety checks */
302#ifndef FSE_FUNCTION_EXTENSION
303# error "FSE_FUNCTION_EXTENSION must be defined"
304#endif
305#ifndef FSE_FUNCTION_TYPE
306# error "FSE_FUNCTION_TYPE must be defined"
307#endif
308
309/* Function names */
310#define FSE_CAT(X,Y) X##Y
311#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
312#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
313
314
315/* Function templates */
316size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
317(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
318{
319 const FSE_FUNCTION_TYPE* ip = source;
320 const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
321 unsigned maxSymbolValue = *maxSymbolValuePtr;
322 unsigned max=0;
323 int s;
324
325 U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
326 U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
327 U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
328 U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
329
330 /* safety checks */
331 if (!sourceSize)
332 {
333 memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
334 *maxSymbolValuePtr = 0;
335 return 0;
336 }
337 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
338 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
339
340 if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
341 {
342 /* check input values, to avoid count table overflow */
343 while (ip < iend-3)
344 {
345 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
346 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
347 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
348 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
349 }
350 }
351 else
352 {
353 U32 cached = FSE_read32(ip); ip += 4;
354 while (ip < iend-15)
355 {
356 U32 c = cached; cached = FSE_read32(ip); ip += 4;
357 Counting1[(BYTE) c ]++;
358 Counting2[(BYTE)(c>>8) ]++;
359 Counting3[(BYTE)(c>>16)]++;
360 Counting4[ c>>24 ]++;
361 c = cached; cached = FSE_read32(ip); ip += 4;
362 Counting1[(BYTE) c ]++;
363 Counting2[(BYTE)(c>>8) ]++;
364 Counting3[(BYTE)(c>>16)]++;
365 Counting4[ c>>24 ]++;
366 c = cached; cached = FSE_read32(ip); ip += 4;
367 Counting1[(BYTE) c ]++;
368 Counting2[(BYTE)(c>>8) ]++;
369 Counting3[(BYTE)(c>>16)]++;
370 Counting4[ c>>24 ]++;
371 c = cached; cached = FSE_read32(ip); ip += 4;
372 Counting1[(BYTE) c ]++;
373 Counting2[(BYTE)(c>>8) ]++;
374 Counting3[(BYTE)(c>>16)]++;
375 Counting4[ c>>24 ]++;
376 }
377 ip-=4;
378 }
379
380 /* finish last symbols */
381 while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
382
383 for (s=0; s<=(int)maxSymbolValue; s++)
384 {
385 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
386 if (count[s] > max) max = count[s];
387 }
388
389 while (!count[maxSymbolValue]) maxSymbolValue--;
390 *maxSymbolValuePtr = maxSymbolValue;
391 return (size_t)max;
392}
393
394/* hidden fast variant (unsafe) */
395size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION)
396(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
397{
398 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
399}
400
401size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
402(unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
403{
404 if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
405 {
406 *maxSymbolValuePtr = 255;
407 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
408 }
409 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 1);
410}
411
412
413static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
414
415size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
416(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
417{
418 const unsigned tableSize = 1 << tableLog;
419 const unsigned tableMask = tableSize - 1;
420 U16* tableU16 = ( (U16*) ct) + 2;
421 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)ct) + 1 + (tableLog ? tableSize>>1 : 1) );
422 const unsigned step = FSE_tableStep(tableSize);
423 unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
424 U32 position = 0;
425 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init not necessary, but analyzer complain about it */
426 U32 highThreshold = tableSize-1;
427 unsigned symbol;
428 unsigned i;
429
430 /* header */
431 tableU16[-2] = (U16) tableLog;
432 tableU16[-1] = (U16) maxSymbolValue;
433
434 /* For explanations on how to distribute symbol values over the table :
435 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
436
437 /* symbol start positions */
438 cumul[0] = 0;
439 for (i=1; i<=maxSymbolValue+1; i++)
440 {
441 if (normalizedCounter[i-1]==-1) /* Low prob symbol */
442 {
443 cumul[i] = cumul[i-1] + 1;
444 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
445 }
446 else
447 cumul[i] = cumul[i-1] + normalizedCounter[i-1];
448 }
449 cumul[maxSymbolValue+1] = tableSize+1;
450
451 /* Spread symbols */
452 for (symbol=0; symbol<=maxSymbolValue; symbol++)
453 {
454 int nbOccurences;
455 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
456 {
457 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
458 position = (position + step) & tableMask;
459 while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
460 }
461 }
462
463 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
464
465 /* Build table */
466 for (i=0; i<tableSize; i++)
467 {
468 FSE_FUNCTION_TYPE s = tableSymbol[i]; /* static analyzer doesn't understand tableSymbol is properly initialized */
469 tableU16[cumul[s]++] = (U16) (tableSize+i); /* Table U16 : sorted by symbol order; gives next state value */
470 }
471
472 /* Build Symbol Transformation Table */
473 {
474 unsigned s;
475 unsigned total = 0;
476 for (s=0; s<=maxSymbolValue; s++)
477 {
478 switch (normalizedCounter[s])
479 {
480 case 0:
481 break;
482 case -1:
483 case 1:
484 symbolTT[s].minBitsOut = (BYTE)tableLog;
485 symbolTT[s].deltaFindState = total - 1;
486 total ++;
487 symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
488 break;
489 default :
490 symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
491 symbolTT[s].deltaFindState = total - normalizedCounter[s];
492 total += normalizedCounter[s];
493 symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
494 }
495 }
496 }
497
498 return 0;
499}
500
501
502#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
503
504FSE_DTable* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
505{
506 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
507 return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
508}
509
510void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (FSE_DTable* dt)
511{
512 free(dt);
513}
514
515
516size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
517(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
518{
519 U32* const base32 = (U32*)dt;
520 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
521 const U32 tableSize = 1 << tableLog;
522 const U32 tableMask = tableSize-1;
523 const U32 step = FSE_tableStep(tableSize);
524 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
525 U32 position = 0;
526 U32 highThreshold = tableSize-1;
527 const S16 largeLimit= (S16)(1 << (tableLog-1));
528 U32 noLarge = 1;
529 U32 s;
530
531 /* Sanity Checks */
532 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
533 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
534
535 /* Init, lay down lowprob symbols */
536 base32[0] = tableLog;
537 for (s=0; s<=maxSymbolValue; s++)
538 {
539 if (normalizedCounter[s]==-1)
540 {
541 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
542 symbolNext[s] = 1;
543 }
544 else
545 {
546 if (normalizedCounter[s] >= largeLimit) noLarge=0;
547 symbolNext[s] = normalizedCounter[s];
548 }
549 }
550
551 /* Spread symbols */
552 for (s=0; s<=maxSymbolValue; s++)
553 {
554 int i;
555 for (i=0; i<normalizedCounter[s]; i++)
556 {
557 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
558 position = (position + step) & tableMask;
559 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
560 }
561 }
562
563 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
564
565 /* Build Decoding table */
566 {
567 U32 i;
568 for (i=0; i<tableSize; i++)
569 {
570 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
571 U16 nextState = symbolNext[symbol]++;
572 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
573 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
574 }
575 }
576
577 return noLarge;
578}
579
580
581/******************************************
582* FSE byte symbol
583******************************************/
Yann Collet4856a002015-01-24 01:58:16 +0100584#ifndef FSE_COMMONDEFS_ONLY
585
586unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
587
588#define FSE_GENERATE_STRING(STRING) #STRING,
589static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
590
591const char* FSE_getErrorName(size_t code)
592{
593 static const char* codeError = "Unspecified error code";
594 if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
595 return codeError;
596}
597
598static short FSE_abs(short a)
599{
600 return a<0? -a : a;
601}
602
603
604/****************************************************************
605* Header bitstream management
606****************************************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -0800607size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100608{
609 size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
610 return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
611}
612
Yann Collet1efa31f2015-07-04 15:56:41 -0800613static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
Yann Collet4856a002015-01-24 01:58:16 +0100614 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
615 unsigned safeWrite)
616{
617 BYTE* const ostart = (BYTE*) header;
618 BYTE* out = ostart;
619 BYTE* const oend = ostart + headerBufferSize;
620 int nbBits;
621 const int tableSize = 1 << tableLog;
622 int remaining;
623 int threshold;
624 U32 bitStream;
625 int bitCount;
626 unsigned charnum = 0;
627 int previous0 = 0;
628
629 bitStream = 0;
630 bitCount = 0;
631 /* Table Size */
632 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
633 bitCount += 4;
634
635 /* Init */
636 remaining = tableSize+1; /* +1 for extra accuracy */
637 threshold = tableSize;
638 nbBits = tableLog+1;
639
640 while (remaining>1) /* stops at 1 */
641 {
642 if (previous0)
643 {
644 unsigned start = charnum;
645 while (!normalizedCounter[charnum]) charnum++;
646 while (charnum >= start+24)
647 {
648 start+=24;
Yann Collet213089c2015-06-18 07:43:16 -0800649 bitStream += 0xFFFFU << bitCount;
Yann Collet4856a002015-01-24 01:58:16 +0100650 if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
Yann Collet213089c2015-06-18 07:43:16 -0800651 out[0] = (BYTE) bitStream;
Yann Collet4856a002015-01-24 01:58:16 +0100652 out[1] = (BYTE)(bitStream>>8);
653 out+=2;
654 bitStream>>=16;
655 }
656 while (charnum >= start+3)
657 {
658 start+=3;
659 bitStream += 3 << bitCount;
660 bitCount += 2;
661 }
662 bitStream += (charnum-start) << bitCount;
663 bitCount += 2;
664 if (bitCount>16)
665 {
666 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
667 out[0] = (BYTE)bitStream;
668 out[1] = (BYTE)(bitStream>>8);
669 out += 2;
670 bitStream >>= 16;
671 bitCount -= 16;
672 }
673 }
674 {
675 short count = normalizedCounter[charnum++];
676 const short max = (short)((2*threshold-1)-remaining);
677 remaining -= FSE_abs(count);
Yann Collet213089c2015-06-18 07:43:16 -0800678 if (remaining<1) return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +0100679 count++; /* +1 for extra accuracy */
680 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
681 bitStream += count << bitCount;
682 bitCount += nbBits;
683 bitCount -= (count<max);
684 previous0 = (count==1);
685 while (remaining<threshold) nbBits--, threshold>>=1;
686 }
687 if (bitCount>16)
688 {
689 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
690 out[0] = (BYTE)bitStream;
691 out[1] = (BYTE)(bitStream>>8);
692 out += 2;
693 bitStream >>= 16;
694 bitCount -= 16;
695 }
696 }
697
698 /* flush remaining bitStream */
699 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
700 out[0] = (BYTE)bitStream;
701 out[1] = (BYTE)(bitStream>>8);
702 out+= (bitCount+7) /8;
703
Yann Collete8c6bb12015-07-26 00:23:57 +0100704 if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +0100705
706 return (out-ostart);
707}
708
709
Yann Collet1efa31f2015-07-04 15:56:41 -0800710size_t FSE_writeNCount (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100711{
712 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
713 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
714
Yann Collet1efa31f2015-07-04 15:56:41 -0800715 if (headerBufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
716 return FSE_writeNCount_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
Yann Collet4856a002015-01-24 01:58:16 +0100717
Yann Collet1efa31f2015-07-04 15:56:41 -0800718 return FSE_writeNCount_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
Yann Collet4856a002015-01-24 01:58:16 +0100719}
720
721
Yann Collet1efa31f2015-07-04 15:56:41 -0800722size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
Yann Collet4856a002015-01-24 01:58:16 +0100723 const void* headerBuffer, size_t hbSize)
724{
725 const BYTE* const istart = (const BYTE*) headerBuffer;
Yann Collet1efa31f2015-07-04 15:56:41 -0800726 const BYTE* const iend = istart + hbSize;
Yann Collet4856a002015-01-24 01:58:16 +0100727 const BYTE* ip = istart;
728 int nbBits;
729 int remaining;
730 int threshold;
731 U32 bitStream;
732 int bitCount;
733 unsigned charnum = 0;
734 int previous0 = 0;
735
Yann Collet1efa31f2015-07-04 15:56:41 -0800736 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
Yann Collet4856a002015-01-24 01:58:16 +0100737 bitStream = FSE_readLE32(ip);
738 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
739 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
740 bitStream >>= 4;
741 bitCount = 4;
742 *tableLogPtr = nbBits;
743 remaining = (1<<nbBits)+1;
744 threshold = 1<<nbBits;
745 nbBits++;
746
747 while ((remaining>1) && (charnum<=*maxSVPtr))
748 {
749 if (previous0)
750 {
751 unsigned n0 = charnum;
752 while ((bitStream & 0xFFFF) == 0xFFFF)
753 {
754 n0+=24;
755 ip+=2;
756 bitStream = FSE_readLE32(ip) >> bitCount;
757 }
758 while ((bitStream & 3) == 3)
759 {
760 n0+=3;
761 bitStream>>=2;
762 bitCount+=2;
763 }
764 n0 += bitStream & 3;
765 bitCount += 2;
Yann Collet1efa31f2015-07-04 15:56:41 -0800766 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
Yann Collet4856a002015-01-24 01:58:16 +0100767 while (charnum < n0) normalizedCounter[charnum++] = 0;
768 ip += bitCount>>3;
769 bitCount &= 7;
770 bitStream = FSE_readLE32(ip) >> bitCount;
771 }
772 {
773 const short max = (short)((2*threshold-1)-remaining);
774 short count;
775
776 if ((bitStream & (threshold-1)) < (U32)max)
777 {
778 count = (short)(bitStream & (threshold-1));
779 bitCount += nbBits-1;
780 }
781 else
782 {
783 count = (short)(bitStream & (2*threshold-1));
784 if (count >= threshold) count -= max;
785 bitCount += nbBits;
786 }
787
788 count--; /* extra accuracy */
789 remaining -= FSE_abs(count);
790 normalizedCounter[charnum++] = count;
791 previous0 = !count;
792 while (remaining < threshold)
793 {
794 nbBits--;
795 threshold >>= 1;
796 }
797
Yann Collet1efa31f2015-07-04 15:56:41 -0800798 {
799 const BYTE* itarget = ip + (bitCount>>3);
800 if (itarget > iend - 4)
801 {
802 ip = iend - 4;
803 bitCount -= (int)(8 * (iend - 4 - ip));
804 }
805 else
806 {
807 ip = itarget;
808 bitCount &= 7;
809 }
810 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
811 }
Yann Collet4856a002015-01-24 01:58:16 +0100812 }
813 }
814 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
815 *maxSVPtr = charnum-1;
816
Yann Collet1efa31f2015-07-04 15:56:41 -0800817 ip += (bitCount+7)>>3;
818 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
Yann Collet4856a002015-01-24 01:58:16 +0100819 return ip-istart;
820}
821
822
823/****************************************************************
824* FSE Compression Code
825****************************************************************/
826/*
Yann Collet1efa31f2015-07-04 15:56:41 -0800827FSE_CTable[0] is a variable size structure which contains :
Yann Collet4856a002015-01-24 01:58:16 +0100828 U16 tableLog;
829 U16 maxSymbolValue;
830 U16 nextStateNumber[1 << tableLog]; // This size is variable
831 FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1]; // This size is variable
832Allocation is manual, since C standard does not support variable-size structures.
833*/
834
835size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
836{
837 size_t size;
838 FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
839 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;
840 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
841 return size;
842}
843
Yann Collet1efa31f2015-07-04 15:56:41 -0800844FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
Yann Collet4856a002015-01-24 01:58:16 +0100845{
846 size_t size;
847 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
848 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
Yann Collet1efa31f2015-07-04 15:56:41 -0800849 return (FSE_CTable*)malloc(size);
Yann Collet4856a002015-01-24 01:58:16 +0100850}
851
Yann Collet1efa31f2015-07-04 15:56:41 -0800852void FSE_freeCTable (FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +0100853{
Yann Collet1efa31f2015-07-04 15:56:41 -0800854 free(ct);
Yann Collet4856a002015-01-24 01:58:16 +0100855}
856
Yann Collet4856a002015-01-24 01:58:16 +0100857
858unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
859{
860 U32 tableLog = maxTableLog;
861 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
862 if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
Yann Colletbbfa7d72015-03-23 23:15:22 +0100863 if ((FSE_highbit32(maxSymbolValue)+2) > tableLog) tableLog = FSE_highbit32(maxSymbolValue)+2; /* Need a minimum to safely represent all symbol values */
Yann Collet4856a002015-01-24 01:58:16 +0100864 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
865 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
866 return tableLog;
867}
868
869
870typedef struct
871{
872 U32 id;
873 U32 count;
874} rank_t;
875
876int FSE_compareRankT(const void* r1, const void* r2)
877{
Yann Collet1cc58de2015-01-29 07:13:54 +0100878 const rank_t* R1 = (const rank_t*)r1;
879 const rank_t* R2 = (const rank_t*)r2;
Yann Collet4856a002015-01-24 01:58:16 +0100880
881 return 2 * (R1->count < R2->count) - 1;
882}
883
Yann Colleta3c75ba2015-02-21 03:31:59 +0100884
885#if 0
Yann Collet565b81d2015-01-29 06:51:30 +0100886static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
887{
Yann Collet2ddf7e92015-02-08 20:26:47 +0100888 rank_t rank[FSE_MAX_SYMBOL_VALUE+2];
Yann Collet565b81d2015-01-29 06:51:30 +0100889 U32 s;
890
891 /* Init */
892 for (s=0; s<=maxSymbolValue; s++)
893 {
894 rank[s].id = s;
895 rank[s].count = count[s];
896 if (norm[s] <= 1) rank[s].count = 0;
897 }
Yann Collet2ddf7e92015-02-08 20:26:47 +0100898 rank[maxSymbolValue+1].id = 0;
899 rank[maxSymbolValue+1].count = 0; /* ensures comparison ends here in worst case */
Yann Collet565b81d2015-01-29 06:51:30 +0100900
901 /* Sort according to count */
902 qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
903
904 while(pointsToRemove)
905 {
906 int newRank = 1;
907 rank_t savedR;
908 if (norm[rank[0].id] == 1)
909 return (size_t)-FSE_ERROR_GENERIC;
910 norm[rank[0].id]--;
911 pointsToRemove--;
912 rank[0].count -= (rank[0].count + 6) >> 3;
913 if (norm[rank[0].id] == 1)
914 rank[0].count=0;
915 savedR = rank[0];
916 while (rank[newRank].count > savedR.count)
917 {
918 rank[newRank-1] = rank[newRank];
919 newRank++;
920 }
921 rank[newRank-1] = savedR;
922 }
923
924 return 0;
925}
Yann Collet565b81d2015-01-29 06:51:30 +0100926
Yann Colleta3c75ba2015-02-21 03:31:59 +0100927#else
928
Yann Collet26aa1ec2015-02-24 09:05:58 +0100929/* Secondary normalization method.
930 To be used when primary method fails. */
931
932static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
Yann Colleta3c75ba2015-02-21 03:31:59 +0100933{
934 U32 s;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100935 U32 distributed = 0;
936 U32 ToDistribute;
937
938 /* Init */
Yann Colleta3c75ba2015-02-21 03:31:59 +0100939 U32 lowThreshold = (U32)(total >> tableLog);
940 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
941
942 for (s=0; s<=maxSymbolValue; s++)
943 {
944 if (count[s] == 0)
945 {
946 norm[s]=0;
947 continue;
948 }
949 if (count[s] <= lowThreshold)
950 {
951 norm[s] = -1;
952 distributed++;
953 total -= count[s];
954 continue;
955 }
956 if (count[s] <= lowOne)
957 {
958 norm[s] = 1;
959 distributed++;
960 total -= count[s];
961 continue;
962 }
963 norm[s]=-2;
964 }
965 ToDistribute = (1 << tableLog) - distributed;
966
967 if ((total / ToDistribute) > lowOne)
968 {
969 /* risk of rounding to zero */
970 lowOne = (U32)((total * 3) / (ToDistribute * 2));
971 for (s=0; s<=maxSymbolValue; s++)
972 {
973 if ((norm[s] == -2) && (count[s] <= lowOne))
974 {
975 norm[s] = 1;
976 distributed++;
977 total -= count[s];
978 continue;
979 }
980 }
981 ToDistribute = (1 << tableLog) - distributed;
982 }
983
984 if (distributed == maxSymbolValue+1)
985 {
Yann Collet26aa1ec2015-02-24 09:05:58 +0100986 /* all values are pretty poor;
987 probably incompressible data (should have already been detected);
988 find max, then give all remaining points to max */
Yann Colleta3c75ba2015-02-21 03:31:59 +0100989 U32 maxV = 0, maxC =0;
990 for (s=0; s<=maxSymbolValue; s++)
991 if (count[s] > maxC) maxV=s, maxC=count[s];
Yann Collet1efa31f2015-07-04 15:56:41 -0800992 norm[maxV] += (short)ToDistribute;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100993 return 0;
994 }
995
996 {
997 U64 const vStepLog = 62 - tableLog;
998 U64 const mid = (1ULL << (vStepLog-1)) - 1;
999 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
1000 U64 tmpTotal = mid;
1001 for (s=0; s<=maxSymbolValue; s++)
1002 {
1003 if (norm[s]==-2)
1004 {
1005 U64 end = tmpTotal + (count[s] * rStep);
1006 U32 sStart = (U32)(tmpTotal >> vStepLog);
1007 U32 sEnd = (U32)(end >> vStepLog);
1008 U32 weight = sEnd - sStart;
1009 if (weight < 1)
1010 return (size_t)-FSE_ERROR_GENERIC;
Yann Collet213089c2015-06-18 07:43:16 -08001011 norm[s] = (short)weight;
Yann Colleta3c75ba2015-02-21 03:31:59 +01001012 tmpTotal = end;
1013 }
1014 }
1015 }
1016
1017 return 0;
1018}
1019#endif
1020
Yann Collet4856a002015-01-24 01:58:16 +01001021
1022size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
1023 const unsigned* count, size_t total,
1024 unsigned maxSymbolValue)
1025{
1026 /* Sanity checks */
1027 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
1028 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
1029 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
1030 if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
1031
1032 {
1033 U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
1034 U64 const scale = 62 - tableLog;
1035 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
1036 U64 const vStep = 1ULL<<(scale-20);
1037 int stillToDistribute = 1<<tableLog;
1038 unsigned s;
1039 unsigned largest=0;
1040 short largestP=0;
1041 U32 lowThreshold = (U32)(total >> tableLog);
1042
1043 for (s=0; s<=maxSymbolValue; s++)
1044 {
1045 if (count[s] == total) return 0;
1046 if (count[s] == 0)
1047 {
1048 normalizedCounter[s]=0;
1049 continue;
1050 }
1051 if (count[s] <= lowThreshold)
1052 {
1053 normalizedCounter[s] = -1;
1054 stillToDistribute--;
1055 }
1056 else
1057 {
1058 short proba = (short)((count[s]*step) >> scale);
1059 if (proba<8)
1060 {
Yann Collet2ddf7e92015-02-08 20:26:47 +01001061 U64 restToBeat = vStep * rtbTable[proba];
Yann Collet4856a002015-01-24 01:58:16 +01001062 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
1063 }
1064 if (proba > largestP)
1065 {
1066 largestP=proba;
1067 largest=s;
1068 }
1069 normalizedCounter[s] = proba;
1070 stillToDistribute -= proba;
1071 }
1072 }
Yann Collet4856a002015-01-24 01:58:16 +01001073 if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
1074 {
Yann Collet26aa1ec2015-02-24 09:05:58 +01001075 /* corner case, need another normalization method */
1076 size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
Yann Collet565b81d2015-01-29 06:51:30 +01001077 if (FSE_isError(errorCode)) return errorCode;
Yann Collet4856a002015-01-24 01:58:16 +01001078 }
1079 else normalizedCounter[largest] += (short)stillToDistribute;
1080 }
1081
1082#if 0
1083 { /* Print Table (debug) */
Yann Collet565b81d2015-01-29 06:51:30 +01001084 U32 s;
1085 U32 nTotal = 0;
Yann Collet4856a002015-01-24 01:58:16 +01001086 for (s=0; s<=maxSymbolValue; s++)
1087 printf("%3i: %4i \n", s, normalizedCounter[s]);
Yann Collet565b81d2015-01-29 06:51:30 +01001088 for (s=0; s<=maxSymbolValue; s++)
1089 nTotal += abs(normalizedCounter[s]);
1090 if (nTotal != (1U<<tableLog))
1091 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001092 getchar();
1093 }
1094#endif
1095
1096 return tableLog;
1097}
1098
1099
Yann Collet1efa31f2015-07-04 15:56:41 -08001100/* fake FSE_CTable, for raw (uncompressed) input */
1101size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001102{
1103 const unsigned tableSize = 1 << nbBits;
1104 const unsigned tableMask = tableSize - 1;
1105 const unsigned maxSymbolValue = tableMask;
Yann Collet1efa31f2015-07-04 15:56:41 -08001106 U16* tableU16 = ( (U16*) ct) + 2;
1107 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)ct)+1) + (tableSize>>1));
Yann Collet4856a002015-01-24 01:58:16 +01001108 unsigned s;
1109
1110 /* Sanity checks */
1111 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
Yann Collet4856a002015-01-24 01:58:16 +01001112
1113 /* header */
1114 tableU16[-2] = (U16) nbBits;
1115 tableU16[-1] = (U16) maxSymbolValue;
1116
1117 /* Build table */
1118 for (s=0; s<tableSize; s++)
1119 tableU16[s] = (U16)(tableSize + s);
1120
1121 /* Build Symbol Transformation Table */
1122 for (s=0; s<=maxSymbolValue; s++)
1123 {
1124 symbolTT[s].minBitsOut = (BYTE)nbBits;
1125 symbolTT[s].deltaFindState = s-1;
1126 symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
1127 }
1128
1129 return 0;
1130}
1131
1132
Yann Collet1efa31f2015-07-04 15:56:41 -08001133/* fake FSE_CTable, for rle (100% always same symbol) input */
1134size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
Yann Collet4856a002015-01-24 01:58:16 +01001135{
1136 const unsigned tableSize = 1;
Yann Collet1efa31f2015-07-04 15:56:41 -08001137 U16* tableU16 = ( (U16*) ct) + 2;
1138 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)ct + 2);
Yann Collet4856a002015-01-24 01:58:16 +01001139
1140 /* header */
1141 tableU16[-2] = (U16) 0;
1142 tableU16[-1] = (U16) symbolValue;
1143
1144 /* Build table */
1145 tableU16[0] = 0;
1146 tableU16[1] = 0; /* just in case */
1147
1148 /* Build Symbol Transformation Table */
1149 {
1150 symbolTT[symbolValue].minBitsOut = 0;
1151 symbolTT[symbolValue].deltaFindState = 0;
1152 symbolTT[symbolValue].maxState = (U16)(2*tableSize-1); /* ensures state <= maxState */
1153 }
1154
1155 return 0;
1156}
1157
1158
1159void FSE_initCStream(FSE_CStream_t* bitC, void* start)
1160{
1161 bitC->bitContainer = 0;
1162 bitC->bitPos = 0; /* reserved for unusedBits */
1163 bitC->startPtr = (char*)start;
1164 bitC->ptr = bitC->startPtr;
1165}
1166
Yann Collet1efa31f2015-07-04 15:56:41 -08001167void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +01001168{
Yann Collet1efa31f2015-07-04 15:56:41 -08001169 const U32 tableLog = ( (const U16*) ct) [0];
Yann Collet4856a002015-01-24 01:58:16 +01001170 statePtr->value = (ptrdiff_t)1<<tableLog;
Yann Collet1efa31f2015-07-04 15:56:41 -08001171 statePtr->stateTable = ((const U16*) ct) + 2;
1172 statePtr->symbolTT = (const FSE_symbolCompressionTransform*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
Yann Collet4856a002015-01-24 01:58:16 +01001173 statePtr->stateLog = tableLog;
1174}
1175
Yann Collete8c6bb12015-07-26 00:23:57 +01001176void FSE_addBitsFast(FSE_CStream_t* bitC, size_t value, unsigned nbBits) /* only use if upper bits are clean 0 */
1177{
1178 bitC->bitContainer |= value << bitC->bitPos;
1179 bitC->bitPos += nbBits;
1180}
1181
Yann Collet4856a002015-01-24 01:58:16 +01001182void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
1183{
1184 static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */
1185 bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
1186 bitC->bitPos += nbBits;
1187}
1188
Yann Collet1efa31f2015-07-04 15:56:41 -08001189void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
Yann Collet4856a002015-01-24 01:58:16 +01001190{
Yann Collet1efa31f2015-07-04 15:56:41 -08001191 const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
1192 const U16* const stateTable = (const U16*)(statePtr->stateTable);
1193 int nbBitsOut = symbolTT.minBitsOut;
1194 nbBitsOut -= (int)((symbolTT.maxState - statePtr->value) >> 31);
Yann Collet4856a002015-01-24 01:58:16 +01001195 FSE_addBits(bitC, statePtr->value, nbBitsOut);
Yann Collet1efa31f2015-07-04 15:56:41 -08001196 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
Yann Collet4856a002015-01-24 01:58:16 +01001197}
1198
1199void FSE_flushBits(FSE_CStream_t* bitC)
1200{
1201 size_t nbBytes = bitC->bitPos >> 3;
1202 FSE_writeLEST(bitC->ptr, bitC->bitContainer);
1203 bitC->bitPos &= 7;
1204 bitC->ptr += nbBytes;
1205 bitC->bitContainer >>= nbBytes*8;
1206}
1207
1208void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr)
1209{
1210 FSE_addBits(bitC, statePtr->value, statePtr->stateLog);
1211 FSE_flushBits(bitC);
1212}
1213
1214
1215size_t FSE_closeCStream(FSE_CStream_t* bitC)
1216{
1217 char* endPtr;
1218
1219 FSE_addBits(bitC, 1, 1);
1220 FSE_flushBits(bitC);
1221
1222 endPtr = bitC->ptr;
1223 endPtr += bitC->bitPos > 0;
1224
1225 return (endPtr - bitC->startPtr);
1226}
1227
1228
1229size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
1230 const void* src, size_t srcSize,
Yann Collet1efa31f2015-07-04 15:56:41 -08001231 const FSE_CTable* ct)
Yann Collet4856a002015-01-24 01:58:16 +01001232{
1233 const BYTE* const istart = (const BYTE*) src;
1234 const BYTE* ip;
1235 const BYTE* const iend = istart + srcSize;
1236
1237 FSE_CStream_t bitC;
1238 FSE_CState_t CState1, CState2;
1239
1240
1241 /* init */
1242 (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
1243 FSE_initCStream(&bitC, dst);
Yann Collet1efa31f2015-07-04 15:56:41 -08001244 FSE_initCState(&CState1, ct);
Yann Collet4856a002015-01-24 01:58:16 +01001245 CState2 = CState1;
1246
1247 ip=iend;
1248
1249 /* join to even */
1250 if (srcSize & 1)
1251 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001252 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001253 FSE_flushBits(&bitC);
1254 }
1255
1256 /* join to mod 4 */
1257 if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
1258 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001259 FSE_encodeSymbol(&bitC, &CState2, *--ip);
1260 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001261 FSE_flushBits(&bitC);
1262 }
1263
1264 /* 2 or 4 encoding per loop */
1265 while (ip>istart)
1266 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001267 FSE_encodeSymbol(&bitC, &CState2, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001268
1269 if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
1270 FSE_flushBits(&bitC);
1271
Yann Collet1efa31f2015-07-04 15:56:41 -08001272 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001273
1274 if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
1275 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001276 FSE_encodeSymbol(&bitC, &CState2, *--ip);
1277 FSE_encodeSymbol(&bitC, &CState1, *--ip);
Yann Collet4856a002015-01-24 01:58:16 +01001278 }
1279
1280 FSE_flushBits(&bitC);
1281 }
1282
1283 FSE_flushCState(&bitC, &CState2);
1284 FSE_flushCState(&bitC, &CState1);
1285 return FSE_closeCStream(&bitC);
1286}
1287
1288
Yann Collet4856a002015-01-24 01:58:16 +01001289size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
1290
1291
1292size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
1293{
1294 const BYTE* const istart = (const BYTE*) src;
1295 const BYTE* ip = istart;
1296
1297 BYTE* const ostart = (BYTE*) dst;
1298 BYTE* op = ostart;
1299 BYTE* const oend = ostart + dstSize;
1300
1301 U32 count[FSE_MAX_SYMBOL_VALUE+1];
1302 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
Yann Collet1efa31f2015-07-04 15:56:41 -08001303 CTable_max_t ct;
Yann Collet4856a002015-01-24 01:58:16 +01001304 size_t errorCode;
1305
1306 /* early out */
1307 if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
1308 if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
1309 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1310 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
1311
1312 /* Scan input and build symbol stats */
Yann Collet1efa31f2015-07-04 15:56:41 -08001313 errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
Yann Collet4856a002015-01-24 01:58:16 +01001314 if (FSE_isError(errorCode)) return errorCode;
Yann Colleta3c75ba2015-02-21 03:31:59 +01001315 if (errorCode == srcSize) return 1;
1316 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
Yann Collet4856a002015-01-24 01:58:16 +01001317
1318 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
1319 errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
1320 if (FSE_isError(errorCode)) return errorCode;
1321
1322 /* Write table description header */
Yann Collet1efa31f2015-07-04 15:56:41 -08001323 errorCode = FSE_writeNCount (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001324 if (FSE_isError(errorCode)) return errorCode;
1325 op += errorCode;
1326
1327 /* Compress */
Yann Collet1efa31f2015-07-04 15:56:41 -08001328 errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001329 if (FSE_isError(errorCode)) return errorCode;
Yann Collet1efa31f2015-07-04 15:56:41 -08001330 op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
Yann Collet4856a002015-01-24 01:58:16 +01001331
1332 /* check compressibility */
1333 if ( (size_t)(op-ostart) >= srcSize-1 )
1334 return 0;
1335
1336 return op-ostart;
1337}
1338
Yann Collet4856a002015-01-24 01:58:16 +01001339size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
1340{
1341 return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
1342}
1343
1344
1345/*********************************************************
1346* Decompression (Byte symbols)
1347*********************************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -08001348size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
Yann Collet4856a002015-01-24 01:58:16 +01001349{
Yann Collet1efa31f2015-07-04 15:56:41 -08001350 U32* const base32 = (U32*)dt;
Yann Collet4856a002015-01-24 01:58:16 +01001351 FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
1352
Yann Collet4856a002015-01-24 01:58:16 +01001353 base32[0] = 0;
1354
1355 cell->newState = 0;
1356 cell->symbol = symbolValue;
1357 cell->nbBits = 0;
1358
1359 return 0;
1360}
1361
1362
Yann Collet1efa31f2015-07-04 15:56:41 -08001363size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001364{
Yann Collet1efa31f2015-07-04 15:56:41 -08001365 U32* const base32 = (U32*)dt;
Yann Collet4856a002015-01-24 01:58:16 +01001366 FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
1367 const unsigned tableSize = 1 << nbBits;
1368 const unsigned tableMask = tableSize - 1;
1369 const unsigned maxSymbolValue = tableMask;
1370 unsigned s;
1371
1372 /* Sanity checks */
1373 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
Yann Collet4856a002015-01-24 01:58:16 +01001374
1375 /* Build Decoding Table */
1376 base32[0] = nbBits;
1377 for (s=0; s<=maxSymbolValue; s++)
1378 {
1379 dinfo[s].newState = 0;
1380 dinfo[s].symbol = (BYTE)s;
1381 dinfo[s].nbBits = (BYTE)nbBits;
1382 }
1383
1384 return 0;
1385}
1386
1387
1388/* FSE_initDStream
1389 * Initialize a FSE_DStream_t.
1390 * srcBuffer must point at the beginning of an FSE block.
1391 * The function result is the size of the FSE_block (== srcSize).
1392 * If srcSize is too small, the function will return an errorCode;
1393 */
1394size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
1395{
1396 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1397
Yann Collet1efa31f2015-07-04 15:56:41 -08001398 if (srcSize >= sizeof(size_t))
Yann Collet4856a002015-01-24 01:58:16 +01001399 {
1400 U32 contain32;
Yann Collet213089c2015-06-18 07:43:16 -08001401 bitD->start = (const char*)srcBuffer;
Yann Collet1efa31f2015-07-04 15:56:41 -08001402 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
Yann Collet4856a002015-01-24 01:58:16 +01001403 bitD->bitContainer = FSE_readLEST(bitD->ptr);
Yann Collet213089c2015-06-18 07:43:16 -08001404 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
Yann Collet4856a002015-01-24 01:58:16 +01001405 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1406 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1407 }
1408 else
1409 {
1410 U32 contain32;
Yann Collet213089c2015-06-18 07:43:16 -08001411 bitD->start = (const char*)srcBuffer;
Yann Collet4856a002015-01-24 01:58:16 +01001412 bitD->ptr = bitD->start;
Yann Collet213089c2015-06-18 07:43:16 -08001413 bitD->bitContainer = *(const BYTE*)(bitD->start);
Yann Collet4856a002015-01-24 01:58:16 +01001414 switch(srcSize)
1415 {
Yann Collet1efa31f2015-07-04 15:56:41 -08001416 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
1417 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
1418 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
1419 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
1420 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
1421 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
Yann Collet4856a002015-01-24 01:58:16 +01001422 default:;
1423 }
Yann Collet213089c2015-06-18 07:43:16 -08001424 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
Yann Collet4856a002015-01-24 01:58:16 +01001425 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1426 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
Yann Collet1efa31f2015-07-04 15:56:41 -08001427 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
Yann Collet4856a002015-01-24 01:58:16 +01001428 }
1429
1430 return srcSize;
1431}
1432
1433
Yann Collete8c6bb12015-07-26 00:23:57 +01001434/* FSE_lookBits
1435 * Provides next n bits from the bitContainer.
1436 * bitContainer is not modified (bits are still present for next read/look)
1437 * On 32-bits, maxNbBits==25
1438 * On 64-bits, maxNbBits==57
1439 * return : value extracted.
1440 */
1441static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
1442{
1443 return ((bitD->bitContainer << (bitD->bitsConsumed & ((sizeof(bitD->bitContainer)*8)-1))) >> 1) >> (((sizeof(bitD->bitContainer)*8)-1)-nbBits);
1444}
1445
1446static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
1447{
1448 return (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD->bitContainer)*8)-nbBits);
1449}
1450
1451static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
1452{
1453 bitD->bitsConsumed += nbBits;
1454}
1455
1456
Yann Collet4856a002015-01-24 01:58:16 +01001457/* FSE_readBits
1458 * Read next n bits from the bitContainer.
Yann Collet213089c2015-06-18 07:43:16 -08001459 * On 32-bits, don't read more than maxNbBits==25
1460 * On 64-bits, don't read more than maxNbBits==57
1461 * Use the fast variant *only* if n >= 1.
Yann Collet4856a002015-01-24 01:58:16 +01001462 * return : value extracted.
1463 */
Yann Collet1efa31f2015-07-04 15:56:41 -08001464size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001465{
Yann Collete8c6bb12015-07-26 00:23:57 +01001466 size_t value = FSE_lookBits(bitD, nbBits);
1467 FSE_skipBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001468 return value;
1469}
1470
Yann Collet1efa31f2015-07-04 15:56:41 -08001471size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
Yann Collet4856a002015-01-24 01:58:16 +01001472{
Yann Collete8c6bb12015-07-26 00:23:57 +01001473 size_t value = FSE_lookBitsFast(bitD, nbBits);
1474 FSE_skipBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001475 return value;
1476}
1477
1478unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
1479{
Yann Collete8c6bb12015-07-26 00:23:57 +01001480 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
Yann Collet4856a002015-01-24 01:58:16 +01001481 {
1482 bitD->ptr -= bitD->bitsConsumed >> 3;
1483 bitD->bitsConsumed &= 7;
1484 bitD->bitContainer = FSE_readLEST(bitD->ptr);
Yann Collete8c6bb12015-07-26 00:23:57 +01001485 return FSE_DStream_unfinished;
Yann Collet4856a002015-01-24 01:58:16 +01001486 }
1487 if (bitD->ptr == bitD->start)
1488 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001489 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_partiallyFilled;
1490 if (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8) return FSE_DStream_completed;
1491 return FSE_DStream_tooFar;
Yann Collet4856a002015-01-24 01:58:16 +01001492 }
1493 {
1494 U32 nbBytes = bitD->bitsConsumed >> 3;
Yann Collete8c6bb12015-07-26 00:23:57 +01001495 U32 result = FSE_DStream_unfinished;
Yann Collet4856a002015-01-24 01:58:16 +01001496 if (bitD->ptr - nbBytes < bitD->start)
Yann Collete8c6bb12015-07-26 00:23:57 +01001497 {
Yann Collet4856a002015-01-24 01:58:16 +01001498 nbBytes = (U32)(bitD->ptr - bitD->start); /* note : necessarily ptr > start */
Yann Collete8c6bb12015-07-26 00:23:57 +01001499 result = FSE_DStream_partiallyFilled;
1500 }
Yann Collet4856a002015-01-24 01:58:16 +01001501 bitD->ptr -= nbBytes;
1502 bitD->bitsConsumed -= nbBytes*8;
1503 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* note : necessarily srcSize > sizeof(bitD) */
Yann Collete8c6bb12015-07-26 00:23:57 +01001504 return result;
Yann Collet4856a002015-01-24 01:58:16 +01001505 }
1506}
1507
1508
Yann Collet1efa31f2015-07-04 15:56:41 -08001509void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
Yann Collet4856a002015-01-24 01:58:16 +01001510{
Yann Collet1efa31f2015-07-04 15:56:41 -08001511 const U32* const base32 = (const U32*)dt;
Yann Collet4856a002015-01-24 01:58:16 +01001512 DStatePtr->state = FSE_readBits(bitD, base32[0]);
1513 FSE_reloadDStream(bitD);
1514 DStatePtr->table = base32 + 1;
1515}
1516
1517BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1518{
1519 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1520 const U32 nbBits = DInfo.nbBits;
1521 BYTE symbol = DInfo.symbol;
Yann Collet1efa31f2015-07-04 15:56:41 -08001522 size_t lowBits = FSE_readBits(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001523
1524 DStatePtr->state = DInfo.newState + lowBits;
1525 return symbol;
1526}
1527
1528BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1529{
1530 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1531 const U32 nbBits = DInfo.nbBits;
1532 BYTE symbol = DInfo.symbol;
Yann Collet1efa31f2015-07-04 15:56:41 -08001533 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
Yann Collet4856a002015-01-24 01:58:16 +01001534
1535 DStatePtr->state = DInfo.newState + lowBits;
1536 return symbol;
1537}
1538
1539/* FSE_endOfDStream
1540 Tells if bitD has reached end of bitStream or not */
1541
1542unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
1543{
Yann Collete8c6bb12015-07-26 00:23:57 +01001544 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
Yann Collet4856a002015-01-24 01:58:16 +01001545}
1546
Yann Collet1efa31f2015-07-04 15:56:41 -08001547unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
Yann Collet4856a002015-01-24 01:58:16 +01001548{
Yann Collet1efa31f2015-07-04 15:56:41 -08001549 return DStatePtr->state == 0;
Yann Collet4856a002015-01-24 01:58:16 +01001550}
1551
1552
1553FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1554 void* dst, size_t maxDstSize,
1555 const void* cSrc, size_t cSrcSize,
Yann Collet1efa31f2015-07-04 15:56:41 -08001556 const FSE_DTable* dt, unsigned fast)
Yann Collet4856a002015-01-24 01:58:16 +01001557{
1558 BYTE* const ostart = (BYTE*) dst;
1559 BYTE* op = ostart;
1560 BYTE* const omax = op + maxDstSize;
1561 BYTE* const olimit = omax-3;
1562
1563 FSE_DStream_t bitD;
Yann Collet1efa31f2015-07-04 15:56:41 -08001564 FSE_DState_t state1;
1565 FSE_DState_t state2;
Yann Collet4856a002015-01-24 01:58:16 +01001566 size_t errorCode;
1567
1568 /* Init */
1569 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
1570 if (FSE_isError(errorCode)) return errorCode;
1571
Yann Collet1efa31f2015-07-04 15:56:41 -08001572 FSE_initDState(&state1, &bitD, dt);
1573 FSE_initDState(&state2, &bitD, dt);
Yann Collet4856a002015-01-24 01:58:16 +01001574
1575
1576 /* 2 symbols per loop */
Yann Collete8c6bb12015-07-26 00:23:57 +01001577 while ((FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit))
Yann Collet4856a002015-01-24 01:58:16 +01001578 {
1579 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1580
Yann Collet1efa31f2015-07-04 15:56:41 -08001581 if (FSE_MAX_TABLELOG*2+7 > sizeof(size_t)*8) /* This test must be static */
Yann Collet4856a002015-01-24 01:58:16 +01001582 FSE_reloadDStream(&bitD);
1583
1584 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1585
Yann Collet1efa31f2015-07-04 15:56:41 -08001586 if (FSE_MAX_TABLELOG*4+7 < sizeof(size_t)*8) /* This test must be static */
Yann Collet4856a002015-01-24 01:58:16 +01001587 {
1588 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1589 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1590 }
1591 }
1592
1593 /* tail */
Yann Collete8c6bb12015-07-26 00:23:57 +01001594 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
Yann Collet4856a002015-01-24 01:58:16 +01001595 while (1)
1596 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001597 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
Yann Collet4856a002015-01-24 01:58:16 +01001598 break;
1599
1600 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1601
Yann Collete8c6bb12015-07-26 00:23:57 +01001602 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
Yann Collet4856a002015-01-24 01:58:16 +01001603 break;
1604
1605 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1606 }
1607
1608 /* end ? */
1609 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1610 return op-ostart;
1611
1612 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1613
1614 return (size_t)-FSE_ERROR_corruptionDetected;
1615}
1616
1617
1618size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
1619 const void* cSrc, size_t cSrcSize,
Yann Collet1efa31f2015-07-04 15:56:41 -08001620 const FSE_DTable* dt, size_t fastMode)
Yann Collet4856a002015-01-24 01:58:16 +01001621{
1622 /* select fast mode (static) */
Yann Collet1efa31f2015-07-04 15:56:41 -08001623 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
1624 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
Yann Collet4856a002015-01-24 01:58:16 +01001625}
1626
1627
1628size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1629{
1630 const BYTE* const istart = (const BYTE*)cSrc;
1631 const BYTE* ip = istart;
1632 short counting[FSE_MAX_SYMBOL_VALUE+1];
Yann Collet1efa31f2015-07-04 15:56:41 -08001633 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
Yann Collet4856a002015-01-24 01:58:16 +01001634 unsigned tableLog;
Yann Collet1efa31f2015-07-04 15:56:41 -08001635 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
Yann Collet4856a002015-01-24 01:58:16 +01001636 size_t errorCode, fastMode;
1637
1638 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1639
1640 /* normal FSE decoding mode */
Yann Collet1efa31f2015-07-04 15:56:41 -08001641 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
Yann Collet4856a002015-01-24 01:58:16 +01001642 if (FSE_isError(errorCode)) return errorCode;
1643 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1644 ip += errorCode;
1645 cSrcSize -= errorCode;
1646
Yann Collet1efa31f2015-07-04 15:56:41 -08001647 fastMode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
Yann Collet4856a002015-01-24 01:58:16 +01001648 if (FSE_isError(fastMode)) return fastMode;
1649
1650 /* always return, even if it is an error code */
Yann Collet1efa31f2015-07-04 15:56:41 -08001651 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt, fastMode);
Yann Collet4856a002015-01-24 01:58:16 +01001652}
1653
1654
Yann Collet4856a002015-01-24 01:58:16 +01001655
Yann Collete8c6bb12015-07-26 00:23:57 +01001656/*********************************************************
1657* Huff0 : Huffman block compression
1658*********************************************************/
1659#define HUF_ABSOLUTEMAX_TABLELOG 16
1660#define HUF_MAX_TABLELOG 13
1661#define HUF_DEFAULT_TABLELOG 12
1662#define HUF_MAX_SYMBOL_VALUE 255
Yann Collet4856a002015-01-24 01:58:16 +01001663
Yann Collete8c6bb12015-07-26 00:23:57 +01001664typedef struct HUF_CElt_s {
1665 U16 val;
1666 BYTE nbBits;
1667} HUF_CElt ;
Yann Collet4856a002015-01-24 01:58:16 +01001668
Yann Collete8c6bb12015-07-26 00:23:57 +01001669typedef struct nodeElt_s {
1670 U32 count;
1671 U16 parent;
1672 BYTE byte;
1673 BYTE nbBits;
1674} nodeElt;
Yann Collet4856a002015-01-24 01:58:16 +01001675
1676
Yann Collete8c6bb12015-07-26 00:23:57 +01001677#define HUF_HEADERLOG 8
1678size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* tree, U32 maxSymbolValue, U32 huffLog)
Yann Collet4856a002015-01-24 01:58:16 +01001679{
Yann Collete8c6bb12015-07-26 00:23:57 +01001680 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
1681 U32 n;
1682 BYTE* op = (BYTE*)dst;
1683 size_t size;
Yann Collet4856a002015-01-24 01:58:16 +01001684
Yann Collete8c6bb12015-07-26 00:23:57 +01001685 // check conditions
1686 if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
1687 return (size_t)-FSE_ERROR_GENERIC;
Yann Collet4856a002015-01-24 01:58:16 +01001688
Yann Collete8c6bb12015-07-26 00:23:57 +01001689 for (n=0; n<maxSymbolValue; n++)
1690 huffWeight[n] = tree[n].nbBits ? (BYTE)(huffLog + 1 - tree[n].nbBits) : 0;
Yann Collet4856a002015-01-24 01:58:16 +01001691
Yann Collete8c6bb12015-07-26 00:23:57 +01001692 size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue); // don't need last symbol stat : implied
1693 if (FSE_isError(size)) return size;
1694 if (size >= 128) return (size_t)-FSE_ERROR_GENERIC;
1695 if (size <= 1) return (size_t)-FSE_ERROR_GENERIC; // special case, not implemented
Yann Collet4856a002015-01-24 01:58:16 +01001696
Yann Collete8c6bb12015-07-26 00:23:57 +01001697 op[0] = (BYTE)size;
1698 return size+1;
Yann Collet4856a002015-01-24 01:58:16 +01001699}
1700
1701
Yann Collete8c6bb12015-07-26 00:23:57 +01001702static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001703{
Yann Collete8c6bb12015-07-26 00:23:57 +01001704 int totalCost = 0;
1705 const U32 largestBits = huffNode[lastNonNull].nbBits;
Yann Collet4856a002015-01-24 01:58:16 +01001706
Yann Collete8c6bb12015-07-26 00:23:57 +01001707 // early exit : all is fine
1708 if (largestBits <= maxNbBits) return largestBits;
Yann Collet4856a002015-01-24 01:58:16 +01001709
Yann Collete8c6bb12015-07-26 00:23:57 +01001710 // now we have a few too large elements (at least >= 2)
Yann Collet4856a002015-01-24 01:58:16 +01001711 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001712 const U32 baseCost = 1 << (largestBits - maxNbBits);
1713 U32 n = lastNonNull;
1714
1715 while (huffNode[n].nbBits > maxNbBits)
Yann Collet4856a002015-01-24 01:58:16 +01001716 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001717 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
1718 huffNode[n].nbBits = (BYTE)maxNbBits;
1719 n --;
Yann Collet4856a002015-01-24 01:58:16 +01001720 }
Yann Collet4856a002015-01-24 01:58:16 +01001721
Yann Collete8c6bb12015-07-26 00:23:57 +01001722 /* renorm totalCost */
1723 totalCost >>= (largestBits - maxNbBits); /* note : totalCost necessarily multiple of baseCost */
1724
1725 // repay cost
1726 while (huffNode[n].nbBits == maxNbBits) n--; // n at last of rank (maxNbBits-1)
1727
Yann Collet4856a002015-01-24 01:58:16 +01001728 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001729 const U32 noOne = 0xF0F0F0F0;
1730 // Get pos of last (smallest) symbol per rank
1731 U32 rankLast[HUF_MAX_TABLELOG];
1732 U32 currentNbBits = maxNbBits;
1733 int pos;
1734 memset(rankLast, 0xF0, sizeof(rankLast));
1735 for (pos=n ; pos >= 0; pos--)
Yann Collet4856a002015-01-24 01:58:16 +01001736 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001737 if (huffNode[pos].nbBits >= currentNbBits) continue;
1738 currentNbBits = huffNode[pos].nbBits;
1739 rankLast[maxNbBits-currentNbBits] = pos;
1740 }
1741
1742 while (totalCost > 0)
1743 {
1744 U32 nBitsToDecrease = FSE_highbit32(totalCost) + 1;
1745 for ( ; nBitsToDecrease > 1; nBitsToDecrease--)
1746 {
1747 U32 highPos = rankLast[nBitsToDecrease];
1748 U32 lowPos = rankLast[nBitsToDecrease-1];
1749 if (highPos == noOne) continue;
1750 if (lowPos == noOne) break;
1751 {
1752 U32 highTotal = huffNode[highPos].count;
1753 U32 lowTotal = 2 * huffNode[lowPos].count;
1754 if (highTotal <= lowTotal) break;
1755 }
1756 }
1757 while (rankLast[nBitsToDecrease] == noOne)
1758 nBitsToDecrease ++; // In some rare cases, no more rank 1 left => overshoot to closest
1759 totalCost -= 1 << (nBitsToDecrease-1);
1760 if (rankLast[nBitsToDecrease-1] == noOne)
1761 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; // now there is one elt
1762 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
1763 rankLast[nBitsToDecrease]--;
1764 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
1765 rankLast[nBitsToDecrease] = noOne; // rank list emptied
1766 }
1767 while (totalCost < 0) // Sometimes, cost correction overshoot
1768 {
1769 if (rankLast[1] == noOne) /* special case, no weight 1, let's find it back at n */
1770 {
1771 while (huffNode[n].nbBits == maxNbBits) n--;
1772 huffNode[n+1].nbBits--;
1773 rankLast[1] = n+1;
1774 totalCost++;
1775 continue;
1776 }
1777 huffNode[ rankLast[1] + 1 ].nbBits--;
1778 rankLast[1]++;
1779 totalCost ++;
1780 }
1781 }
1782 }
1783
1784 return maxNbBits;
1785}
1786
1787
1788typedef struct {
1789 U32 base;
1790 U32 current;
1791} rankPos;
1792
1793static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
1794{
1795 rankPos rank[32];
1796 U32 n;
1797
1798 memset(rank, 0, sizeof(rank));
1799 for (n=0; n<=maxSymbolValue; n++)
1800 {
1801 U32 r = FSE_highbit32(count[n] + 1);
1802 rank[r].base ++;
1803 }
1804 for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
1805 for (n=0; n<32; n++) rank[n].current = rank[n].base;
1806 for (n=0; n<=maxSymbolValue; n++)
1807 {
1808 U32 c = count[n];
1809 U32 r = FSE_highbit32(c+1) + 1;
1810 U32 pos = rank[r].current++;
1811 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
1812 huffNode[pos].count = c;
1813 huffNode[pos].byte = (BYTE)n;
1814 }
1815}
1816
1817
1818#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
1819size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
1820{
1821 nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
1822 nodeElt* huffNode = huffNode0 + 1;
1823 U32 n, nonNullRank;
1824 int lowS, lowN;
1825 U16 nodeNb = STARTNODE;
1826 U32 nodeRoot;
1827
1828 // check
1829 if (maxSymbolValue > 255) return (size_t)-FSE_ERROR_GENERIC;
1830 memset(huffNode0, 0, sizeof(huffNode0));
1831
1832 // sort, decreasing order
1833 HUF_sort(huffNode, count, maxSymbolValue);
1834
1835 // init for parents
1836 nonNullRank = maxSymbolValue;
1837 while(huffNode[nonNullRank].count == 0) nonNullRank--;
1838 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
1839 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
1840 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
1841 nodeNb++; lowS-=2;
1842 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
1843 huffNode0[0].count = (U32)(1U<<31);
1844
1845 // create parents
1846 while (nodeNb <= nodeRoot)
1847 {
1848 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
1849 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
1850 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
1851 huffNode[n1].parent = huffNode[n2].parent = nodeNb;
1852 nodeNb++;
1853 }
1854
1855 // distribute weights (unlimited tree height)
1856 huffNode[nodeRoot].nbBits = 0;
1857 for (n=nodeRoot-1; n>=STARTNODE; n--)
1858 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1859 for (n=0; n<=nonNullRank; n++)
1860 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1861
1862 // enforce maxTableLog
1863 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
1864
1865 // fill result into tree (val, nbBits)
1866 {
1867 U16 nbPerRank[HUF_ABSOLUTEMAX_TABLELOG+1] = {0};
1868 U16 valPerRank[HUF_ABSOLUTEMAX_TABLELOG+1];
1869 if (maxNbBits > HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; // check
1870 for (n=0; n<=nonNullRank; n++)
1871 nbPerRank[huffNode[n].nbBits]++;
1872 {
1873 // determine stating value per rank
1874 U16 min = 0;
1875 for (n=maxNbBits; n>0; n--)
1876 {
1877 valPerRank[n] = min; // get starting value within each rank
1878 min += nbPerRank[n];
1879 min >>= 1;
Yann Collet4856a002015-01-24 01:58:16 +01001880 }
1881 }
Yann Collete8c6bb12015-07-26 00:23:57 +01001882 for (n=0; n<=maxSymbolValue; n++)
1883 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; // push nbBits per symbol, symbol order
1884 for (n=0; n<=maxSymbolValue; n++)
1885 tree[n].val = valPerRank[tree[n].nbBits]++; // assign value within rank, symbol order
Yann Collet4856a002015-01-24 01:58:16 +01001886 }
1887
Yann Collete8c6bb12015-07-26 00:23:57 +01001888 return maxNbBits;
Yann Collet4856a002015-01-24 01:58:16 +01001889}
1890
1891
Yann Collete8c6bb12015-07-26 00:23:57 +01001892static size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, HUF_CElt* CTable)
Yann Collet4856a002015-01-24 01:58:16 +01001893{
Yann Collete8c6bb12015-07-26 00:23:57 +01001894 const BYTE* ip = (const BYTE*) src;
1895 BYTE* const ostart = (BYTE*)dst;
1896 BYTE* op = (BYTE*) ostart;
1897 U16* jumpTable = (U16*) dst;
1898 size_t n, streamSize;
1899 FSE_CStream_t bitC;
Yann Collet4856a002015-01-24 01:58:16 +01001900
Yann Collete8c6bb12015-07-26 00:23:57 +01001901 /* init */
1902 (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
1903 op += 6; /* jump Table -- could be optimized by delta / deviation */
1904 FSE_initCStream(&bitC, op);
Yann Collet4856a002015-01-24 01:58:16 +01001905
Yann Collete8c6bb12015-07-26 00:23:57 +01001906#define FSE_FLUSHBITS_32(stream) \
1907 if (FSE_32bits()) FSE_flushBits(stream)
Yann Collet4856a002015-01-24 01:58:16 +01001908
Yann Collete8c6bb12015-07-26 00:23:57 +01001909 n = srcSize & ~15; // mod 16
1910 switch (srcSize & 15)
Yann Collet4856a002015-01-24 01:58:16 +01001911 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001912 case 15: FSE_addBitsFast(&bitC, CTable[ip[n+14]].val, CTable[ip[n+14]].nbBits);
1913 FSE_FLUSHBITS_32(&bitC);
1914 case 14: FSE_addBitsFast(&bitC, CTable[ip[n+13]].val, CTable[ip[n+13]].nbBits);
1915 FSE_FLUSHBITS_32(&bitC);
1916 case 13: FSE_addBitsFast(&bitC, CTable[ip[n+12]].val, CTable[ip[n+12]].nbBits);
1917 FSE_FLUSHBITS_32(&bitC);
1918 case 12: FSE_addBitsFast(&bitC, CTable[ip[n+11]].val, CTable[ip[n+11]].nbBits);
1919 FSE_flushBits(&bitC);
1920 case 11: FSE_addBitsFast(&bitC, CTable[ip[n+10]].val, CTable[ip[n+10]].nbBits);
1921 FSE_FLUSHBITS_32(&bitC);
1922 case 10: FSE_addBitsFast(&bitC, CTable[ip[n+9]].val, CTable[ip[n+9]].nbBits);
1923 FSE_FLUSHBITS_32(&bitC);
1924 case 9 : FSE_addBitsFast(&bitC, CTable[ip[n+8]].val, CTable[ip[n+8]].nbBits);
1925 FSE_FLUSHBITS_32(&bitC);
1926 case 8 : FSE_addBitsFast(&bitC, CTable[ip[n+7]].val, CTable[ip[n+7]].nbBits);
1927 FSE_flushBits(&bitC);
1928 case 7 : FSE_addBitsFast(&bitC, CTable[ip[n+6]].val, CTable[ip[n+6]].nbBits);
1929 FSE_FLUSHBITS_32(&bitC);
1930 case 6 : FSE_addBitsFast(&bitC, CTable[ip[n+5]].val, CTable[ip[n+5]].nbBits);
1931 FSE_FLUSHBITS_32(&bitC);
1932 case 5 : FSE_addBitsFast(&bitC, CTable[ip[n+4]].val, CTable[ip[n+4]].nbBits);
1933 FSE_FLUSHBITS_32(&bitC);
1934 case 4 : FSE_addBitsFast(&bitC, CTable[ip[n+3]].val, CTable[ip[n+3]].nbBits);
1935 FSE_flushBits(&bitC);
1936 case 3 : FSE_addBitsFast(&bitC, CTable[ip[n+2]].val, CTable[ip[n+2]].nbBits);
1937 FSE_FLUSHBITS_32(&bitC);
1938 case 2 : FSE_addBitsFast(&bitC, CTable[ip[n+1]].val, CTable[ip[n+1]].nbBits);
1939 FSE_FLUSHBITS_32(&bitC);
1940 case 1 : FSE_addBitsFast(&bitC, CTable[ip[n+0]].val, CTable[ip[n+0]].nbBits);
1941 FSE_flushBits(&bitC);
1942 case 0 :
1943 default: ;
Yann Collet4856a002015-01-24 01:58:16 +01001944 }
1945
Yann Collete8c6bb12015-07-26 00:23:57 +01001946 for (; n>0; n-=16)
Yann Collet4856a002015-01-24 01:58:16 +01001947 {
Yann Collete8c6bb12015-07-26 00:23:57 +01001948 FSE_addBitsFast(&bitC, CTable[ip[n- 4]].val, CTable[ip[n- 4]].nbBits);
1949 FSE_FLUSHBITS_32(&bitC);
1950 FSE_addBitsFast(&bitC, CTable[ip[n- 8]].val, CTable[ip[n- 8]].nbBits);
1951 FSE_FLUSHBITS_32(&bitC);
1952 FSE_addBitsFast(&bitC, CTable[ip[n-12]].val, CTable[ip[n-12]].nbBits);
1953 FSE_FLUSHBITS_32(&bitC);
1954 FSE_addBitsFast(&bitC, CTable[ip[n-16]].val, CTable[ip[n-16]].nbBits);
1955 FSE_flushBits(&bitC);
1956 }
1957 streamSize = FSE_closeCStream(&bitC);
1958 jumpTable[0] = (U16)streamSize;
1959 op += streamSize;
1960
1961 FSE_initCStream(&bitC, op);
1962 n = srcSize & ~15; // mod 16
1963 for (; n>0; n-=16)
1964 {
1965 FSE_addBitsFast(&bitC, CTable[ip[n- 3]].val, CTable[ip[n- 3]].nbBits);
1966 FSE_FLUSHBITS_32(&bitC);
1967 FSE_addBitsFast(&bitC, CTable[ip[n- 7]].val, CTable[ip[n- 7]].nbBits);
1968 FSE_FLUSHBITS_32(&bitC);
1969 FSE_addBitsFast(&bitC, CTable[ip[n-11]].val, CTable[ip[n-11]].nbBits);
1970 FSE_FLUSHBITS_32(&bitC);
1971 FSE_addBitsFast(&bitC, CTable[ip[n-15]].val, CTable[ip[n-15]].nbBits);
1972 FSE_flushBits(&bitC);
1973 }
1974 streamSize = FSE_closeCStream(&bitC);
1975 jumpTable[1] = (U16)streamSize;
1976 op += streamSize;
1977
1978 FSE_initCStream(&bitC, op);
1979 n = srcSize & ~15; // mod 16
1980 for (; n>0; n-=16)
1981 {
1982 FSE_addBitsFast(&bitC, CTable[ip[n- 2]].val, CTable[ip[n- 2]].nbBits);
1983 FSE_FLUSHBITS_32(&bitC);
1984 FSE_addBitsFast(&bitC, CTable[ip[n- 6]].val, CTable[ip[n- 6]].nbBits);
1985 FSE_FLUSHBITS_32(&bitC);
1986 FSE_addBitsFast(&bitC, CTable[ip[n-10]].val, CTable[ip[n-10]].nbBits);
1987 FSE_FLUSHBITS_32(&bitC);
1988 FSE_addBitsFast(&bitC, CTable[ip[n-14]].val, CTable[ip[n-14]].nbBits);
1989 FSE_flushBits(&bitC);
1990 }
1991 streamSize = FSE_closeCStream(&bitC);
1992 jumpTable[2] = (U16)streamSize;
1993 op += streamSize;
1994
1995 FSE_initCStream(&bitC, op);
1996 n = srcSize & ~15; // mod 16
1997 for (; n>0; n-=16)
1998 {
1999 FSE_addBitsFast(&bitC, CTable[ip[n- 1]].val, CTable[ip[n- 1]].nbBits);
2000 FSE_FLUSHBITS_32(&bitC);
2001 FSE_addBitsFast(&bitC, CTable[ip[n- 5]].val, CTable[ip[n- 5]].nbBits);
2002 FSE_FLUSHBITS_32(&bitC);
2003 FSE_addBitsFast(&bitC, CTable[ip[n- 9]].val, CTable[ip[n- 9]].nbBits);
2004 FSE_FLUSHBITS_32(&bitC);
2005 FSE_addBitsFast(&bitC, CTable[ip[n-13]].val, CTable[ip[n-13]].nbBits);
2006 FSE_flushBits(&bitC);
2007 }
2008 streamSize = FSE_closeCStream(&bitC);
2009 op += streamSize;
2010
2011 return op-ostart;
2012}
2013
2014
2015size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog)
2016{
2017 BYTE* const ostart = (BYTE*)dst;
2018 BYTE* op = ostart;
2019 BYTE* const oend = ostart + dstSize;
2020
2021 U32 count[HUF_MAX_SYMBOL_VALUE+1];
2022 HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
2023 size_t errorCode;
2024
2025 /* early out */
2026 if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
2027 if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
2028 if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
2029 if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
2030
2031 /* Scan input and build symbol stats */
2032 errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
2033 if (FSE_isError(errorCode)) return errorCode;
2034 if (errorCode == srcSize) return 1;
2035 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
2036
2037 /* Build Huffman Tree */
2038 errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
2039 if (FSE_isError(errorCode)) return errorCode;
2040 huffLog = (U32)errorCode;
2041
2042 /* Write table description header */
2043 errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); /* don't write last symbol, implied */
2044 if (FSE_isError(errorCode)) return errorCode;
2045 op += errorCode;
2046
2047 /* Compress */
2048 op += HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable);
2049
2050 /* check compressibility */
2051 if ((size_t)(op-ostart) >= srcSize-1)
2052 return 0;
2053
2054 return op-ostart;
2055}
2056
2057size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2058{
2059 return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
2060}
2061
2062
2063/*********************************************************
2064* Huff0 : Huffman block decompression
2065*********************************************************/
2066typedef struct {
2067 BYTE byte;
2068 BYTE nbBits;
2069} HUF_DElt;
2070
2071size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
2072{
2073 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
2074 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1] = {0};
2075 U32 weightTotal = 0;
2076 U32 maxBits;
2077 const BYTE* ip = (const BYTE*) src;
2078 size_t iSize = ip[0];
2079 size_t oSize;
2080 U32 n;
2081 U32 nextRankStart;
2082 HUF_DElt* const dt = (HUF_DElt*)(DTable + 1);
2083
2084 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); // if compilation fails here, assertion is false
2085 if (iSize >= 128) return (size_t)-FSE_ERROR_GENERIC; // special case, not implemented
2086 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2087
2088 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); // max 255 values stored, last is implied
2089 if (FSE_isError(oSize)) return oSize;
2090
2091 // stats on weights
2092 for (n=0; n<oSize; n++)
2093 {
2094 rankVal[huffWeight[n]]++;
2095 weightTotal += (1 << huffWeight[n]) >> 1;
Yann Collet4856a002015-01-24 01:58:16 +01002096 }
2097
Yann Collete8c6bb12015-07-26 00:23:57 +01002098 // get last symbol weight(implied)
2099 maxBits = FSE_highbit32(weightTotal) + 1;
2100 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_GENERIC; // DTable is too small
2101 DTable[0] = (U16)maxBits;
2102 {
2103 U32 total = 1 << maxBits;
2104 U32 rest = total - weightTotal;
2105 U32 verif = 1 << FSE_highbit32(rest);
2106 if (verif != rest) return (size_t)-FSE_ERROR_GENERIC; // last value must be a clean power of 2
2107 huffWeight[oSize] = (BYTE)(FSE_highbit32(rest) + 1);
2108 rankVal[huffWeight[oSize]]++;
2109 }
Yann Collet4856a002015-01-24 01:58:16 +01002110
Yann Collete8c6bb12015-07-26 00:23:57 +01002111 // Prepare ranks
2112 nextRankStart = 0;
2113 for (n=1; n<=maxBits; n++)
2114 {
2115 U32 current = nextRankStart;
2116 nextRankStart += (rankVal[n] << (n-1));
2117 rankVal[n] = current;
2118 }
2119
2120 // fill table
2121 for (n=0; n<=oSize; n++)
Yann Collet4856a002015-01-24 01:58:16 +01002122 {
2123 U32 i;
Yann Collete8c6bb12015-07-26 00:23:57 +01002124 const U32 w = huffWeight[n];
2125 const U32 length = (1 << w) >> 1;
2126 HUF_DElt D;
2127 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
2128 for (i = rankVal[w]; i < rankVal[w] + length; i++)
2129 dt[i] = D;
2130 rankVal[w] += length;
Yann Collet4856a002015-01-24 01:58:16 +01002131 }
2132
Yann Collete8c6bb12015-07-26 00:23:57 +01002133 return iSize+1;
Yann Collet4856a002015-01-24 01:58:16 +01002134}
Yann Collete8c6bb12015-07-26 00:23:57 +01002135
2136/*
2137#define HUF_DECODE_SYMBOL(n, Dstream) \
2138 val = FSE_lookBitsFast(&Dstream, dtLog); \
2139 c = dt[val].byte; \
2140 FSE_skipBits(&Dstream, dt[val].nbBits); \
2141 op[n] = c;
2142*/
2143
2144static void HUF_decodeSymbol(BYTE* ptr, FSE_DStream_t* Dstream, const HUF_DElt* dt, U32 dtLog)
2145{
2146 size_t val = FSE_lookBitsFast(Dstream, dtLog);
2147 BYTE c = dt[val].byte;
2148 FSE_skipBits(Dstream, dt[val].nbBits);
2149 *ptr = c;
2150}
2151
2152static size_t HUF_decompress_usingDTable(
2153 void* dst, size_t maxDstSize,
2154 const void* cSrc, size_t cSrcSize,
2155 const U16* DTable)
2156{
2157 BYTE* const ostart = (BYTE*) dst;
2158 BYTE* op = ostart;
2159 BYTE* const omax = op + maxDstSize;
2160 BYTE* const olimit = omax-15;
2161
2162 const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1);
2163 const U32 dtLog = DTable[0];
2164 size_t errorCode;
2165 U32 reloadStatus;
2166
2167 /* Init */
2168
2169 const U16* jumpTable = (const U16*)cSrc;
2170 const size_t length1 = jumpTable[0];
2171 const size_t length2 = jumpTable[1];
2172 const size_t length3 = jumpTable[2];
2173 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
2174 const char* const start1 = (const char*)(cSrc) + 6;
2175 const char* const start2 = start1 + length1;
2176 const char* const start3 = start2 + length2;
2177 const char* const start4 = start3 + length3;
2178 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
2179
2180 errorCode = FSE_initDStream(&bitD1, start1, length1);
2181 if (FSE_isError(errorCode)) return errorCode;
2182 errorCode = FSE_initDStream(&bitD2, start2, length2);
2183 if (FSE_isError(errorCode)) return errorCode;
2184 errorCode = FSE_initDStream(&bitD3, start3, length3);
2185 if (FSE_isError(errorCode)) return errorCode;
2186 errorCode = FSE_initDStream(&bitD4, start4, length4);
2187 if (FSE_isError(errorCode)) return errorCode;
2188
2189 reloadStatus=FSE_reloadDStream(&bitD2);
2190
2191 /* 16 symbols per loop */
2192 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
2193 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
2194 {
2195#define HUF_DECODE_SYMBOL(n, Dstream) \
2196 HUF_decodeSymbol(op+n, &Dstream, dt, dtLog); \
2197 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
2198
2199 HUF_DECODE_SYMBOL( 0, bitD1);
2200 HUF_DECODE_SYMBOL( 1, bitD2);
2201 HUF_DECODE_SYMBOL( 2, bitD3);
2202 HUF_DECODE_SYMBOL( 3, bitD4);
2203 HUF_DECODE_SYMBOL( 4, bitD1);
2204 HUF_DECODE_SYMBOL( 5, bitD2);
2205 HUF_DECODE_SYMBOL( 6, bitD3);
2206 HUF_DECODE_SYMBOL( 7, bitD4);
2207 HUF_DECODE_SYMBOL( 8, bitD1);
2208 HUF_DECODE_SYMBOL( 9, bitD2);
2209 HUF_DECODE_SYMBOL(10, bitD3);
2210 HUF_DECODE_SYMBOL(11, bitD4);
2211 HUF_DECODE_SYMBOL(12, bitD1);
2212 HUF_DECODE_SYMBOL(13, bitD2);
2213 HUF_DECODE_SYMBOL(14, bitD3);
2214 HUF_DECODE_SYMBOL(15, bitD4);
2215 }
2216
2217 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be 0 (unfinished) */
2218 return (size_t)-FSE_ERROR_corruptionDetected;
2219
2220 /* tail */
2221 {
2222 // bitTail = bitD1; // *much* slower : -20% !??!
2223 FSE_DStream_t bitTail;
2224 bitTail.ptr = bitD1.ptr;
2225 bitTail.bitsConsumed = bitD1.bitsConsumed;
2226 bitTail.bitContainer = bitD1.bitContainer; // required in case FSE_DStream_partiallyFilled
2227 bitTail.start = start1;
2228 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
2229 {
2230 HUF_DECODE_SYMBOL(0, bitTail);
2231 }
2232
2233 if (FSE_endOfDStream(&bitTail))
2234 return op-ostart;
2235 }
2236
2237 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
2238
2239 return (size_t)-FSE_ERROR_corruptionDetected;
2240}
2241
2242
2243size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
2244{
2245 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
2246 const BYTE* ip = (const BYTE*) cSrc;
2247 size_t errorCode;
2248
2249 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
2250 if (FSE_isError(errorCode)) return errorCode;
2251 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2252 ip += errorCode;
2253 cSrcSize -= errorCode;
2254
2255 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
2256}
2257
2258
2259#endif /* FSE_COMMONDEFS_ONLY */