blob: 798ea266d8294bb2310ec6d360255318819effb3 [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/* ******************************************************************
2 FSE : Finite State Entropy coder
3 Copyright (C) 2013-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33****************************************************************** */
34
35#ifndef FSE_COMMONDEFS_ONLY
36
37/****************************************************************
38* Tuning parameters
39****************************************************************/
40/* MEMORY_USAGE :
41* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
42* Increasing memory usage improves compression ratio
43* Reduced memory usage can improve speed, due to cache effect
44* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
45#define FSE_MAX_MEMORY_USAGE 14
46#define FSE_DEFAULT_MEMORY_USAGE 13
47
48/* FSE_MAX_SYMBOL_VALUE :
49* Maximum symbol value authorized.
50* Required for proper stack allocation */
51#define FSE_MAX_SYMBOL_VALUE 255
52
53
54/****************************************************************
55* Generic function type & suffix (C template emulation)
56****************************************************************/
57#define FSE_FUNCTION_TYPE BYTE
58#define FSE_FUNCTION_EXTENSION
59
60#endif /* !FSE_COMMONDEFS_ONLY */
61
62
63/****************************************************************
64* Compiler specifics
65****************************************************************/
66#ifdef _MSC_VER /* Visual Studio */
67# define FORCE_INLINE static __forceinline
68# include <intrin.h> /* For Visual 2005 */
69# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
70# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
71#else
72# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
73# ifdef __GNUC__
74# define FORCE_INLINE static inline __attribute__((always_inline))
75# else
76# define FORCE_INLINE static inline
77# endif
78#endif
79
80
81/****************************************************************
82* Includes
83****************************************************************/
84#include <stdlib.h> /* malloc, free, qsort */
85#include <string.h> /* memcpy, memset */
86#include <stdio.h> /* printf (debug) */
87#include "fse_static.h"
88
89
90/****************************************************************
91* Basic Types
92*****************************************************************/
93#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
94# include <stdint.h>
95typedef uint8_t BYTE;
96typedef uint16_t U16;
97typedef int16_t S16;
98typedef uint32_t U32;
99typedef int32_t S32;
100typedef uint64_t U64;
101typedef int64_t S64;
102#else
103typedef unsigned char BYTE;
104typedef unsigned short U16;
105typedef signed short S16;
106typedef unsigned int U32;
107typedef signed int S32;
108typedef unsigned long long U64;
109typedef signed long long S64;
110#endif
111
112
113/****************************************************************
114* Memory I/O
115*****************************************************************/
116static unsigned FSE_isLittleEndian(void)
117{
118 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
119 return one.c[0];
120}
121
122static U32 FSE_read32(const void* memPtr)
123{
124 U32 val32;
125 memcpy(&val32, memPtr, 4);
126 return val32;
127}
128
129static U32 FSE_readLE32(const void* memPtr)
130{
131 if (FSE_isLittleEndian())
132 return FSE_read32(memPtr);
133 else
134 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100135 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100136 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
137 }
138}
139
140static void FSE_writeLE32(void* memPtr, U32 val32)
141{
142 if (FSE_isLittleEndian())
143 {
144 memcpy(memPtr, &val32, 4);
145 }
146 else
147 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100148 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100149 p[0] = (BYTE)val32;
150 p[1] = (BYTE)(val32>>8);
151 p[2] = (BYTE)(val32>>16);
152 p[3] = (BYTE)(val32>>24);
153 }
154}
155
156static U64 FSE_read64(const void* memPtr)
157{
158 U64 val64;
159 memcpy(&val64, memPtr, 8);
160 return val64;
161}
162
163static U64 FSE_readLE64(const void* memPtr)
164{
165 if (FSE_isLittleEndian())
166 return FSE_read64(memPtr);
167 else
168 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100169 const BYTE* p = (const BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100170 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
171 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
172 }
173}
174
175static void FSE_writeLE64(void* memPtr, U64 val64)
176{
177 if (FSE_isLittleEndian())
178 {
179 memcpy(memPtr, &val64, 8);
180 }
181 else
182 {
Yann Collet1cc58de2015-01-29 07:13:54 +0100183 BYTE* p = (BYTE*)memPtr;
Yann Collet4856a002015-01-24 01:58:16 +0100184 p[0] = (BYTE)val64;
185 p[1] = (BYTE)(val64>>8);
186 p[2] = (BYTE)(val64>>16);
187 p[3] = (BYTE)(val64>>24);
188 p[4] = (BYTE)(val64>>32);
189 p[5] = (BYTE)(val64>>40);
190 p[6] = (BYTE)(val64>>48);
191 p[7] = (BYTE)(val64>>56);
192 }
193}
194
195static size_t FSE_readLEST(const void* memPtr)
196{
197 if (sizeof(size_t)==4)
Yann Colletfb814172015-01-25 13:19:12 +0100198 return (size_t)FSE_readLE32(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100199 else
Yann Colletfb814172015-01-25 13:19:12 +0100200 return (size_t)FSE_readLE64(memPtr);
Yann Collet4856a002015-01-24 01:58:16 +0100201}
202
203static void FSE_writeLEST(void* memPtr, size_t val)
204{
205 if (sizeof(size_t)==4)
206 FSE_writeLE32(memPtr, (U32)val);
207 else
208 FSE_writeLE64(memPtr, (U64)val);
209}
210
211
212/****************************************************************
213* Constants
214*****************************************************************/
215#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
216#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
217#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
218#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
219#define FSE_MIN_TABLELOG 5
220
221#define FSE_TABLELOG_ABSOLUTE_MAX 15
222#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
223#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
224#endif
225
226
227/****************************************************************
228* Error Management
229****************************************************************/
230#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
231
232
233/****************************************************************
234* Complex types
235****************************************************************/
236typedef struct
237{
238 int deltaFindState;
239 U16 maxState;
240 BYTE minBitsOut;
241 /* one byte padding */
242} FSE_symbolCompressionTransform;
243
244typedef struct
245{
246 U32 fakeTable[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; /* compatible with FSE_compressU16() */
247} CTable_max_t;
248
249
250/****************************************************************
251* Internal functions
252****************************************************************/
253FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
254{
255# if defined(_MSC_VER) /* Visual */
256 unsigned long r;
257 _BitScanReverse ( &r, val );
258 return (unsigned) r;
259# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
260 return 31 - __builtin_clz (val);
261# else /* Software version */
262 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
263 U32 v = val;
264 unsigned r;
265 v |= v >> 1;
266 v |= v >> 2;
267 v |= v >> 4;
268 v |= v >> 8;
269 v |= v >> 16;
270 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
271 return r;
272# endif
273}
274
275
276#ifndef FSE_COMMONDEFS_ONLY
277
278unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
279
280#define FSE_GENERATE_STRING(STRING) #STRING,
281static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
282
283const char* FSE_getErrorName(size_t code)
284{
285 static const char* codeError = "Unspecified error code";
286 if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
287 return codeError;
288}
289
290static short FSE_abs(short a)
291{
292 return a<0? -a : a;
293}
294
295
296/****************************************************************
297* Header bitstream management
298****************************************************************/
299size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog)
300{
301 size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
302 return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
303}
304
305static size_t FSE_writeHeader_generic (void* header, size_t headerBufferSize,
306 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
307 unsigned safeWrite)
308{
309 BYTE* const ostart = (BYTE*) header;
310 BYTE* out = ostart;
311 BYTE* const oend = ostart + headerBufferSize;
312 int nbBits;
313 const int tableSize = 1 << tableLog;
314 int remaining;
315 int threshold;
316 U32 bitStream;
317 int bitCount;
318 unsigned charnum = 0;
319 int previous0 = 0;
320
321 bitStream = 0;
322 bitCount = 0;
323 /* Table Size */
324 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
325 bitCount += 4;
326
327 /* Init */
328 remaining = tableSize+1; /* +1 for extra accuracy */
329 threshold = tableSize;
330 nbBits = tableLog+1;
331
332 while (remaining>1) /* stops at 1 */
333 {
334 if (previous0)
335 {
336 unsigned start = charnum;
337 while (!normalizedCounter[charnum]) charnum++;
338 while (charnum >= start+24)
339 {
340 start+=24;
341 bitStream += 0xFFFF<<bitCount;
342 if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
343 out[0] = (BYTE)bitStream;
344 out[1] = (BYTE)(bitStream>>8);
345 out+=2;
346 bitStream>>=16;
347 }
348 while (charnum >= start+3)
349 {
350 start+=3;
351 bitStream += 3 << bitCount;
352 bitCount += 2;
353 }
354 bitStream += (charnum-start) << bitCount;
355 bitCount += 2;
356 if (bitCount>16)
357 {
358 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
359 out[0] = (BYTE)bitStream;
360 out[1] = (BYTE)(bitStream>>8);
361 out += 2;
362 bitStream >>= 16;
363 bitCount -= 16;
364 }
365 }
366 {
367 short count = normalizedCounter[charnum++];
368 const short max = (short)((2*threshold-1)-remaining);
369 remaining -= FSE_abs(count);
370 if (remaining<0) return (size_t)-FSE_ERROR_GENERIC;
371 count++; /* +1 for extra accuracy */
372 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
373 bitStream += count << bitCount;
374 bitCount += nbBits;
375 bitCount -= (count<max);
376 previous0 = (count==1);
377 while (remaining<threshold) nbBits--, threshold>>=1;
378 }
379 if (bitCount>16)
380 {
381 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
382 out[0] = (BYTE)bitStream;
383 out[1] = (BYTE)(bitStream>>8);
384 out += 2;
385 bitStream >>= 16;
386 bitCount -= 16;
387 }
388 }
389
390 /* flush remaining bitStream */
391 if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
392 out[0] = (BYTE)bitStream;
393 out[1] = (BYTE)(bitStream>>8);
394 out+= (bitCount+7) /8;
395
396 if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC; /* Too many symbols written (a bit too late?) */
397
398 return (out-ostart);
399}
400
401
402size_t FSE_writeHeader (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
403{
404 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
405 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
406
407 if (headerBufferSize < FSE_headerBound(maxSymbolValue, tableLog))
408 return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
409
410 return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
411}
412
413
414size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
415 const void* headerBuffer, size_t hbSize)
416{
417 const BYTE* const istart = (const BYTE*) headerBuffer;
418 const BYTE* ip = istart;
419 int nbBits;
420 int remaining;
421 int threshold;
422 U32 bitStream;
423 int bitCount;
424 unsigned charnum = 0;
425 int previous0 = 0;
426
427 bitStream = FSE_readLE32(ip);
428 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
429 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
430 bitStream >>= 4;
431 bitCount = 4;
432 *tableLogPtr = nbBits;
433 remaining = (1<<nbBits)+1;
434 threshold = 1<<nbBits;
435 nbBits++;
436
437 while ((remaining>1) && (charnum<=*maxSVPtr))
438 {
439 if (previous0)
440 {
441 unsigned n0 = charnum;
442 while ((bitStream & 0xFFFF) == 0xFFFF)
443 {
444 n0+=24;
445 ip+=2;
446 bitStream = FSE_readLE32(ip) >> bitCount;
447 }
448 while ((bitStream & 3) == 3)
449 {
450 n0+=3;
451 bitStream>>=2;
452 bitCount+=2;
453 }
454 n0 += bitStream & 3;
455 bitCount += 2;
456 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_GENERIC;
457 while (charnum < n0) normalizedCounter[charnum++] = 0;
458 ip += bitCount>>3;
459 bitCount &= 7;
460 bitStream = FSE_readLE32(ip) >> bitCount;
461 }
462 {
463 const short max = (short)((2*threshold-1)-remaining);
464 short count;
465
466 if ((bitStream & (threshold-1)) < (U32)max)
467 {
468 count = (short)(bitStream & (threshold-1));
469 bitCount += nbBits-1;
470 }
471 else
472 {
473 count = (short)(bitStream & (2*threshold-1));
474 if (count >= threshold) count -= max;
475 bitCount += nbBits;
476 }
477
478 count--; /* extra accuracy */
479 remaining -= FSE_abs(count);
480 normalizedCounter[charnum++] = count;
481 previous0 = !count;
482 while (remaining < threshold)
483 {
484 nbBits--;
485 threshold >>= 1;
486 }
487
488 ip += bitCount>>3;
489 bitCount &= 7;
490 bitStream = FSE_readLE32(ip) >> bitCount;
491 }
492 }
493 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
494 *maxSVPtr = charnum-1;
495
496 ip += bitCount>0;
497 if ((size_t)(ip-istart) >= hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* arguably a bit late , tbd */
498 return ip-istart;
499}
500
501
502/****************************************************************
503* FSE Compression Code
504****************************************************************/
505/*
506CTable is a variable size structure which contains :
507 U16 tableLog;
508 U16 maxSymbolValue;
509 U16 nextStateNumber[1 << tableLog]; // This size is variable
510 FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1]; // This size is variable
511Allocation is manual, since C standard does not support variable-size structures.
512*/
513
514size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
515{
516 size_t size;
517 FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
518 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;
519 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
520 return size;
521}
522
523void* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
524{
525 size_t size;
526 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
527 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
528 return malloc(size);
529}
530
531void FSE_freeCTable (void* CTable)
532{
533 free(CTable);
534}
535
Yann Collet4856a002015-01-24 01:58:16 +0100536
537unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
538{
539 U32 tableLog = maxTableLog;
540 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
541 if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
542 if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1; /* Need a minimum to safely represent all symbol values */
543 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
544 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
545 return tableLog;
546}
547
548
549typedef struct
550{
551 U32 id;
552 U32 count;
553} rank_t;
554
555int FSE_compareRankT(const void* r1, const void* r2)
556{
Yann Collet1cc58de2015-01-29 07:13:54 +0100557 const rank_t* R1 = (const rank_t*)r1;
558 const rank_t* R2 = (const rank_t*)r2;
Yann Collet4856a002015-01-24 01:58:16 +0100559
560 return 2 * (R1->count < R2->count) - 1;
561}
562
Yann Colleta3c75ba2015-02-21 03:31:59 +0100563
564#if 0
Yann Collet565b81d2015-01-29 06:51:30 +0100565static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
566{
Yann Collet2ddf7e92015-02-08 20:26:47 +0100567 rank_t rank[FSE_MAX_SYMBOL_VALUE+2];
Yann Collet565b81d2015-01-29 06:51:30 +0100568 U32 s;
569
570 /* Init */
571 for (s=0; s<=maxSymbolValue; s++)
572 {
573 rank[s].id = s;
574 rank[s].count = count[s];
575 if (norm[s] <= 1) rank[s].count = 0;
576 }
Yann Collet2ddf7e92015-02-08 20:26:47 +0100577 rank[maxSymbolValue+1].id = 0;
578 rank[maxSymbolValue+1].count = 0; /* ensures comparison ends here in worst case */
Yann Collet565b81d2015-01-29 06:51:30 +0100579
580 /* Sort according to count */
581 qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
582
583 while(pointsToRemove)
584 {
585 int newRank = 1;
586 rank_t savedR;
587 if (norm[rank[0].id] == 1)
588 return (size_t)-FSE_ERROR_GENERIC;
589 norm[rank[0].id]--;
590 pointsToRemove--;
591 rank[0].count -= (rank[0].count + 6) >> 3;
592 if (norm[rank[0].id] == 1)
593 rank[0].count=0;
594 savedR = rank[0];
595 while (rank[newRank].count > savedR.count)
596 {
597 rank[newRank-1] = rank[newRank];
598 newRank++;
599 }
600 rank[newRank-1] = savedR;
601 }
602
603 return 0;
604}
Yann Collet565b81d2015-01-29 06:51:30 +0100605
Yann Colleta3c75ba2015-02-21 03:31:59 +0100606#else
607
Yann Collet26aa1ec2015-02-24 09:05:58 +0100608/* Secondary normalization method.
609 To be used when primary method fails. */
610
611static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
Yann Colleta3c75ba2015-02-21 03:31:59 +0100612{
613 U32 s;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100614 U32 distributed = 0;
615 U32 ToDistribute;
616
617 /* Init */
Yann Colleta3c75ba2015-02-21 03:31:59 +0100618 U32 lowThreshold = (U32)(total >> tableLog);
619 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
620
621 for (s=0; s<=maxSymbolValue; s++)
622 {
623 if (count[s] == 0)
624 {
625 norm[s]=0;
626 continue;
627 }
628 if (count[s] <= lowThreshold)
629 {
630 norm[s] = -1;
631 distributed++;
632 total -= count[s];
633 continue;
634 }
635 if (count[s] <= lowOne)
636 {
637 norm[s] = 1;
638 distributed++;
639 total -= count[s];
640 continue;
641 }
642 norm[s]=-2;
643 }
644 ToDistribute = (1 << tableLog) - distributed;
645
646 if ((total / ToDistribute) > lowOne)
647 {
648 /* risk of rounding to zero */
649 lowOne = (U32)((total * 3) / (ToDistribute * 2));
650 for (s=0; s<=maxSymbolValue; s++)
651 {
652 if ((norm[s] == -2) && (count[s] <= lowOne))
653 {
654 norm[s] = 1;
655 distributed++;
656 total -= count[s];
657 continue;
658 }
659 }
660 ToDistribute = (1 << tableLog) - distributed;
661 }
662
663 if (distributed == maxSymbolValue+1)
664 {
Yann Collet26aa1ec2015-02-24 09:05:58 +0100665 /* all values are pretty poor;
666 probably incompressible data (should have already been detected);
667 find max, then give all remaining points to max */
Yann Colleta3c75ba2015-02-21 03:31:59 +0100668 U32 maxV = 0, maxC =0;
669 for (s=0; s<=maxSymbolValue; s++)
670 if (count[s] > maxC) maxV=s, maxC=count[s];
671 norm[maxV] += ToDistribute;
672 return 0;
673 }
674
675 {
676 U64 const vStepLog = 62 - tableLog;
677 U64 const mid = (1ULL << (vStepLog-1)) - 1;
678 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
679 U64 tmpTotal = mid;
680 for (s=0; s<=maxSymbolValue; s++)
681 {
682 if (norm[s]==-2)
683 {
684 U64 end = tmpTotal + (count[s] * rStep);
685 U32 sStart = (U32)(tmpTotal >> vStepLog);
686 U32 sEnd = (U32)(end >> vStepLog);
687 U32 weight = sEnd - sStart;
688 if (weight < 1)
689 return (size_t)-FSE_ERROR_GENERIC;
690 norm[s] = weight;
691 tmpTotal = end;
692 }
693 }
694 }
695
696 return 0;
697}
698#endif
699
Yann Collet4856a002015-01-24 01:58:16 +0100700
701size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
702 const unsigned* count, size_t total,
703 unsigned maxSymbolValue)
704{
705 /* Sanity checks */
706 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
707 if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
708 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
709 if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
710
711 {
712 U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
713 U64 const scale = 62 - tableLog;
714 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
715 U64 const vStep = 1ULL<<(scale-20);
716 int stillToDistribute = 1<<tableLog;
717 unsigned s;
718 unsigned largest=0;
719 short largestP=0;
720 U32 lowThreshold = (U32)(total >> tableLog);
721
722 for (s=0; s<=maxSymbolValue; s++)
723 {
724 if (count[s] == total) return 0;
725 if (count[s] == 0)
726 {
727 normalizedCounter[s]=0;
728 continue;
729 }
730 if (count[s] <= lowThreshold)
731 {
732 normalizedCounter[s] = -1;
733 stillToDistribute--;
734 }
735 else
736 {
737 short proba = (short)((count[s]*step) >> scale);
738 if (proba<8)
739 {
Yann Collet2ddf7e92015-02-08 20:26:47 +0100740 U64 restToBeat = vStep * rtbTable[proba];
Yann Collet4856a002015-01-24 01:58:16 +0100741 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
742 }
743 if (proba > largestP)
744 {
745 largestP=proba;
746 largest=s;
747 }
748 normalizedCounter[s] = proba;
749 stillToDistribute -= proba;
750 }
751 }
Yann Collet4856a002015-01-24 01:58:16 +0100752 if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
753 {
Yann Collet26aa1ec2015-02-24 09:05:58 +0100754 /* corner case, need another normalization method */
755 size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
Yann Collet565b81d2015-01-29 06:51:30 +0100756 if (FSE_isError(errorCode)) return errorCode;
Yann Collet4856a002015-01-24 01:58:16 +0100757 }
758 else normalizedCounter[largest] += (short)stillToDistribute;
759 }
760
761#if 0
762 { /* Print Table (debug) */
Yann Collet565b81d2015-01-29 06:51:30 +0100763 U32 s;
764 U32 nTotal = 0;
Yann Collet4856a002015-01-24 01:58:16 +0100765 for (s=0; s<=maxSymbolValue; s++)
766 printf("%3i: %4i \n", s, normalizedCounter[s]);
Yann Collet565b81d2015-01-29 06:51:30 +0100767 for (s=0; s<=maxSymbolValue; s++)
768 nTotal += abs(normalizedCounter[s]);
769 if (nTotal != (1U<<tableLog))
770 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
Yann Collet4856a002015-01-24 01:58:16 +0100771 getchar();
772 }
773#endif
774
775 return tableLog;
776}
777
778
779/* fake CTable, for raw (uncompressed) input */
780size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
781{
782 const unsigned tableSize = 1 << nbBits;
783 const unsigned tableMask = tableSize - 1;
784 const unsigned maxSymbolValue = tableMask;
785 U16* tableU16 = ( (U16*) CTable) + 2;
786 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)CTable)+1) + (tableSize>>1));
787 unsigned s;
788
789 /* Sanity checks */
790 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
791 if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
792
793 /* header */
794 tableU16[-2] = (U16) nbBits;
795 tableU16[-1] = (U16) maxSymbolValue;
796
797 /* Build table */
798 for (s=0; s<tableSize; s++)
799 tableU16[s] = (U16)(tableSize + s);
800
801 /* Build Symbol Transformation Table */
802 for (s=0; s<=maxSymbolValue; s++)
803 {
804 symbolTT[s].minBitsOut = (BYTE)nbBits;
805 symbolTT[s].deltaFindState = s-1;
806 symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
807 }
808
809 return 0;
810}
811
812
813/* fake CTable, for rle (100% always same symbol) input */
814size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
815{
816 const unsigned tableSize = 1;
817 U16* tableU16 = ( (U16*) CTable) + 2;
818 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)CTable + 2);
819
820 /* safety checks */
821 if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be 4 bytes aligned */
822
823 /* header */
824 tableU16[-2] = (U16) 0;
825 tableU16[-1] = (U16) symbolValue;
826
827 /* Build table */
828 tableU16[0] = 0;
829 tableU16[1] = 0; /* just in case */
830
831 /* Build Symbol Transformation Table */
832 {
833 symbolTT[symbolValue].minBitsOut = 0;
834 symbolTT[symbolValue].deltaFindState = 0;
835 symbolTT[symbolValue].maxState = (U16)(2*tableSize-1); /* ensures state <= maxState */
836 }
837
838 return 0;
839}
840
841
842void FSE_initCStream(FSE_CStream_t* bitC, void* start)
843{
844 bitC->bitContainer = 0;
845 bitC->bitPos = 0; /* reserved for unusedBits */
846 bitC->startPtr = (char*)start;
847 bitC->ptr = bitC->startPtr;
848}
849
850void FSE_initCState(FSE_CState_t* statePtr, const void* CTable)
851{
852 const U32 tableLog = ( (U16*) CTable) [0];
853 statePtr->value = (ptrdiff_t)1<<tableLog;
854 statePtr->stateTable = ((const U16*) CTable) + 2;
855 statePtr->symbolTT = (const U32*)CTable + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
856 statePtr->stateLog = tableLog;
857}
858
859void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
860{
861 static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */
862 bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
863 bitC->bitPos += nbBits;
864}
865
866void FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
867{
868 const FSE_symbolCompressionTransform* const symbolTT = (const FSE_symbolCompressionTransform*) statePtr->symbolTT;
869 const U16* const stateTable = (const U16*) statePtr->stateTable;
870 int nbBitsOut = symbolTT[symbol].minBitsOut;
871 nbBitsOut -= (int)((symbolTT[symbol].maxState - statePtr->value) >> 31);
872 FSE_addBits(bitC, statePtr->value, nbBitsOut);
873 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT[symbol].deltaFindState];
874}
875
876void FSE_flushBits(FSE_CStream_t* bitC)
877{
878 size_t nbBytes = bitC->bitPos >> 3;
879 FSE_writeLEST(bitC->ptr, bitC->bitContainer);
880 bitC->bitPos &= 7;
881 bitC->ptr += nbBytes;
882 bitC->bitContainer >>= nbBytes*8;
883}
884
885void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr)
886{
887 FSE_addBits(bitC, statePtr->value, statePtr->stateLog);
888 FSE_flushBits(bitC);
889}
890
891
892size_t FSE_closeCStream(FSE_CStream_t* bitC)
893{
894 char* endPtr;
895
896 FSE_addBits(bitC, 1, 1);
897 FSE_flushBits(bitC);
898
899 endPtr = bitC->ptr;
900 endPtr += bitC->bitPos > 0;
901
902 return (endPtr - bitC->startPtr);
903}
904
905
906size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
907 const void* src, size_t srcSize,
908 const void* CTable)
909{
910 const BYTE* const istart = (const BYTE*) src;
911 const BYTE* ip;
912 const BYTE* const iend = istart + srcSize;
913
914 FSE_CStream_t bitC;
915 FSE_CState_t CState1, CState2;
916
917
918 /* init */
919 (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
920 FSE_initCStream(&bitC, dst);
921 FSE_initCState(&CState1, CTable);
922 CState2 = CState1;
923
924 ip=iend;
925
926 /* join to even */
927 if (srcSize & 1)
928 {
929 FSE_encodeByte(&bitC, &CState1, *--ip);
930 FSE_flushBits(&bitC);
931 }
932
933 /* join to mod 4 */
934 if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
935 {
936 FSE_encodeByte(&bitC, &CState2, *--ip);
937 FSE_encodeByte(&bitC, &CState1, *--ip);
938 FSE_flushBits(&bitC);
939 }
940
941 /* 2 or 4 encoding per loop */
942 while (ip>istart)
943 {
944 FSE_encodeByte(&bitC, &CState2, *--ip);
945
946 if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
947 FSE_flushBits(&bitC);
948
949 FSE_encodeByte(&bitC, &CState1, *--ip);
950
951 if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
952 {
953 FSE_encodeByte(&bitC, &CState2, *--ip);
954 FSE_encodeByte(&bitC, &CState1, *--ip);
955 }
956
957 FSE_flushBits(&bitC);
958 }
959
960 FSE_flushCState(&bitC, &CState2);
961 FSE_flushCState(&bitC, &CState1);
962 return FSE_closeCStream(&bitC);
963}
964
965
Yann Collet4856a002015-01-24 01:58:16 +0100966size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
967
968
969size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
970{
971 const BYTE* const istart = (const BYTE*) src;
972 const BYTE* ip = istart;
973
974 BYTE* const ostart = (BYTE*) dst;
975 BYTE* op = ostart;
976 BYTE* const oend = ostart + dstSize;
977
978 U32 count[FSE_MAX_SYMBOL_VALUE+1];
979 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
980 CTable_max_t CTable;
981 size_t errorCode;
982
983 /* early out */
984 if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
985 if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
986 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
987 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
988
989 /* Scan input and build symbol stats */
990 errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
991 if (FSE_isError(errorCode)) return errorCode;
Yann Colleta3c75ba2015-02-21 03:31:59 +0100992 if (errorCode == srcSize) return 1;
993 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
Yann Collet4856a002015-01-24 01:58:16 +0100994
995 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
996 errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
997 if (FSE_isError(errorCode)) return errorCode;
998
999 /* Write table description header */
1000 errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
1001 if (FSE_isError(errorCode)) return errorCode;
1002 op += errorCode;
1003
1004 /* Compress */
1005 errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
1006 if (FSE_isError(errorCode)) return errorCode;
1007 op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, &CTable);
1008
1009 /* check compressibility */
1010 if ( (size_t)(op-ostart) >= srcSize-1 )
1011 return 0;
1012
1013 return op-ostart;
1014}
1015
1016
1017size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
1018{
1019 return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
1020}
1021
1022
1023/*********************************************************
1024* Decompression (Byte symbols)
1025*********************************************************/
1026typedef struct
1027{
1028 U16 newState;
1029 BYTE symbol;
1030 BYTE nbBits;
1031} FSE_decode_t; /* size == U32 */
1032
1033/* Specific corner case : RLE compression */
1034size_t FSE_decompressRLE(void* dst, size_t originalSize,
1035 const void* cSrc, size_t cSrcSize)
1036{
1037 if (cSrcSize != 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1038 memset(dst, *(BYTE*)cSrc, originalSize);
1039 return originalSize;
1040}
1041
1042
1043size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
1044{
Yann Collet1cc58de2015-01-29 07:13:54 +01001045 U32* const base32 = (U32*)DTable;
Yann Collet4856a002015-01-24 01:58:16 +01001046 FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
1047
1048 /* Sanity check */
1049 if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
1050
1051 base32[0] = 0;
1052
1053 cell->newState = 0;
1054 cell->symbol = symbolValue;
1055 cell->nbBits = 0;
1056
1057 return 0;
1058}
1059
1060
1061size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
1062{
Yann Collet1cc58de2015-01-29 07:13:54 +01001063 U32* const base32 = (U32*)DTable;
Yann Collet4856a002015-01-24 01:58:16 +01001064 FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
1065 const unsigned tableSize = 1 << nbBits;
1066 const unsigned tableMask = tableSize - 1;
1067 const unsigned maxSymbolValue = tableMask;
1068 unsigned s;
1069
1070 /* Sanity checks */
1071 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
1072 if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
1073
1074 /* Build Decoding Table */
1075 base32[0] = nbBits;
1076 for (s=0; s<=maxSymbolValue; s++)
1077 {
1078 dinfo[s].newState = 0;
1079 dinfo[s].symbol = (BYTE)s;
1080 dinfo[s].nbBits = (BYTE)nbBits;
1081 }
1082
1083 return 0;
1084}
1085
1086
1087/* FSE_initDStream
1088 * Initialize a FSE_DStream_t.
1089 * srcBuffer must point at the beginning of an FSE block.
1090 * The function result is the size of the FSE_block (== srcSize).
1091 * If srcSize is too small, the function will return an errorCode;
1092 */
1093size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
1094{
1095 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1096
1097 if (srcSize >= sizeof(bitD_t))
1098 {
1099 U32 contain32;
1100 bitD->start = (char*)srcBuffer;
1101 bitD->ptr = (char*)srcBuffer + srcSize - sizeof(bitD_t);
1102 bitD->bitContainer = FSE_readLEST(bitD->ptr);
1103 contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1104 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1105 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1106 }
1107 else
1108 {
1109 U32 contain32;
1110 bitD->start = (char*)srcBuffer;
1111 bitD->ptr = bitD->start;
1112 bitD->bitContainer = *(BYTE*)(bitD->start);
1113 switch(srcSize)
1114 {
1115 case 7: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[6]) << (sizeof(bitD_t)*8 - 16);
1116 case 6: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[5]) << (sizeof(bitD_t)*8 - 24);
1117 case 5: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[4]) << (sizeof(bitD_t)*8 - 32);
1118 case 4: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[3]) << 24;
1119 case 3: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[2]) << 16;
1120 case 2: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[1]) << 8;
1121 default:;
1122 }
1123 contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1124 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1125 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1126 bitD->bitsConsumed += (U32)(sizeof(bitD_t) - srcSize)*8;
1127 }
1128
1129 return srcSize;
1130}
1131
1132
1133/* FSE_readBits
1134 * Read next n bits from the bitContainer.
1135 * Use the fast variant *only* if n > 0.
1136 * Note : for this function to work properly on 32-bits, don't read more than maxNbBits==25
1137 * return : value extracted.
1138 */
1139bitD_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
1140{
1141 bitD_t value = ((bitD->bitContainer << bitD->bitsConsumed) >> 1) >> (((sizeof(bitD_t)*8)-1)-nbBits);
1142 bitD->bitsConsumed += nbBits;
1143 return value;
1144}
1145
1146bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 */
1147{
1148 bitD_t value = (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD_t)*8)-nbBits);
1149 bitD->bitsConsumed += nbBits;
1150 return value;
1151}
1152
1153unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
1154{
1155 if (bitD->ptr >= bitD->start + sizeof(bitD_t))
1156 {
1157 bitD->ptr -= bitD->bitsConsumed >> 3;
1158 bitD->bitsConsumed &= 7;
1159 bitD->bitContainer = FSE_readLEST(bitD->ptr);
1160 return 0;
1161 }
1162 if (bitD->ptr == bitD->start)
1163 {
1164 if (bitD->bitsConsumed < sizeof(bitD_t)*8) return 1;
1165 if (bitD->bitsConsumed == sizeof(bitD_t)*8) return 2;
1166 return 3;
1167 }
1168 {
1169 U32 nbBytes = bitD->bitsConsumed >> 3;
1170 if (bitD->ptr - nbBytes < bitD->start)
1171 nbBytes = (U32)(bitD->ptr - bitD->start); /* note : necessarily ptr > start */
1172 bitD->ptr -= nbBytes;
1173 bitD->bitsConsumed -= nbBytes*8;
1174 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* note : necessarily srcSize > sizeof(bitD) */
1175 return (bitD->ptr == bitD->start);
1176 }
1177}
1178
1179
1180void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable)
1181{
Yann Collet1cc58de2015-01-29 07:13:54 +01001182 const U32* const base32 = (const U32*)DTable;
Yann Collet4856a002015-01-24 01:58:16 +01001183 DStatePtr->state = FSE_readBits(bitD, base32[0]);
1184 FSE_reloadDStream(bitD);
1185 DStatePtr->table = base32 + 1;
1186}
1187
1188BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1189{
1190 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1191 const U32 nbBits = DInfo.nbBits;
1192 BYTE symbol = DInfo.symbol;
1193 bitD_t lowBits = FSE_readBits(bitD, nbBits);
1194
1195 DStatePtr->state = DInfo.newState + lowBits;
1196 return symbol;
1197}
1198
1199BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1200{
1201 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1202 const U32 nbBits = DInfo.nbBits;
1203 BYTE symbol = DInfo.symbol;
1204 bitD_t lowBits = FSE_readBitsFast(bitD, nbBits);
1205
1206 DStatePtr->state = DInfo.newState + lowBits;
1207 return symbol;
1208}
1209
1210/* FSE_endOfDStream
1211 Tells if bitD has reached end of bitStream or not */
1212
1213unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
1214{
1215 return FSE_reloadDStream((FSE_DStream_t*)bitD)==2;
1216}
1217
1218unsigned FSE_endOfDState(const FSE_DState_t* statePtr)
1219{
1220 return statePtr->state == 0;
1221}
1222
1223
1224FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1225 void* dst, size_t maxDstSize,
1226 const void* cSrc, size_t cSrcSize,
1227 const void* DTable, unsigned fast)
1228{
1229 BYTE* const ostart = (BYTE*) dst;
1230 BYTE* op = ostart;
1231 BYTE* const omax = op + maxDstSize;
1232 BYTE* const olimit = omax-3;
1233
1234 FSE_DStream_t bitD;
1235 FSE_DState_t state1, state2;
1236 size_t errorCode;
1237
1238 /* Init */
1239 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
1240 if (FSE_isError(errorCode)) return errorCode;
1241
1242 FSE_initDState(&state1, &bitD, DTable);
1243 FSE_initDState(&state2, &bitD, DTable);
1244
1245
1246 /* 2 symbols per loop */
1247 while (!FSE_reloadDStream(&bitD) && (op<olimit))
1248 {
1249 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1250
1251 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD_t)*8) /* This test must be static */
1252 FSE_reloadDStream(&bitD);
1253
1254 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1255
1256 if (FSE_MAX_TABLELOG*4+7 < sizeof(bitD_t)*8) /* This test must be static */
1257 {
1258 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1259 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1260 }
1261 }
1262
1263 /* tail */
1264 while (1)
1265 {
1266 if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1267 break;
1268
1269 *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1270
1271 if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1272 break;
1273
1274 *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1275 }
1276
1277 /* end ? */
1278 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1279 return op-ostart;
1280
1281 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1282
1283 return (size_t)-FSE_ERROR_corruptionDetected;
1284}
1285
1286
1287size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
1288 const void* cSrc, size_t cSrcSize,
1289 const void* DTable, size_t fastMode)
1290{
1291 /* select fast mode (static) */
1292 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 1);
1293 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 0);
1294}
1295
1296
1297size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1298{
1299 const BYTE* const istart = (const BYTE*)cSrc;
1300 const BYTE* ip = istart;
1301 short counting[FSE_MAX_SYMBOL_VALUE+1];
Yann Collet2ddf7e92015-02-08 20:26:47 +01001302 FSE_decode_t DTable[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
Yann Collet4856a002015-01-24 01:58:16 +01001303 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1304 unsigned tableLog;
1305 size_t errorCode, fastMode;
1306
1307 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1308
1309 /* normal FSE decoding mode */
1310 errorCode = FSE_readHeader (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
1311 if (FSE_isError(errorCode)) return errorCode;
1312 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1313 ip += errorCode;
1314 cSrcSize -= errorCode;
1315
1316 fastMode = FSE_buildDTable (DTable, counting, maxSymbolValue, tableLog);
1317 if (FSE_isError(fastMode)) return fastMode;
1318
1319 /* always return, even if it is an error code */
1320 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable, fastMode);
1321}
1322
1323
1324#endif /* FSE_COMMONDEFS_ONLY */
1325
1326/*
1327 2nd part of the file
1328 designed to be included
1329 for type-specific functions (template equivalent in C)
1330 Objective is to write such functions only once, for better maintenance
1331*/
1332
1333/* safety checks */
1334#ifndef FSE_FUNCTION_EXTENSION
1335# error "FSE_FUNCTION_EXTENSION must be defined"
1336#endif
1337#ifndef FSE_FUNCTION_TYPE
1338# error "FSE_FUNCTION_TYPE must be defined"
1339#endif
1340
1341/* Function names */
1342#define FSE_CAT(X,Y) X##Y
1343#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
1344#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
1345
1346
1347/* Function templates */
1348size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr, unsigned safe)
1349{
1350 const FSE_FUNCTION_TYPE* ip = source;
1351 const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
1352 unsigned maxSymbolValue = *maxSymbolValuePtr;
1353 unsigned max=0;
1354 int s;
1355
1356 U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1357 U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1358 U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1359 U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1360
1361 /* safety checks */
1362 if (!sourceSize)
1363 {
1364 memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
1365 *maxSymbolValuePtr = 0;
1366 return 0;
1367 }
1368 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
1369 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
1370
1371 if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
1372 {
1373 /* check input values, to avoid count table overflow */
1374 while (ip < iend-3)
1375 {
1376 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
1377 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
1378 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
1379 if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
1380 }
1381 }
1382 else
1383 {
1384 U32 cached = FSE_read32(ip); ip += 4;
1385 while (ip < iend-15)
1386 {
1387 U32 c = cached; cached = FSE_read32(ip); ip += 4;
1388 Counting1[(BYTE) c ]++;
1389 Counting2[(BYTE)(c>>8) ]++;
1390 Counting3[(BYTE)(c>>16)]++;
1391 Counting4[ c>>24 ]++;
1392 c = cached; cached = FSE_read32(ip); ip += 4;
1393 Counting1[(BYTE) c ]++;
1394 Counting2[(BYTE)(c>>8) ]++;
1395 Counting3[(BYTE)(c>>16)]++;
1396 Counting4[ c>>24 ]++;
1397 c = cached; cached = FSE_read32(ip); ip += 4;
1398 Counting1[(BYTE) c ]++;
1399 Counting2[(BYTE)(c>>8) ]++;
1400 Counting3[(BYTE)(c>>16)]++;
1401 Counting4[ c>>24 ]++;
1402 c = cached; cached = FSE_read32(ip); ip += 4;
1403 Counting1[(BYTE) c ]++;
1404 Counting2[(BYTE)(c>>8) ]++;
1405 Counting3[(BYTE)(c>>16)]++;
1406 Counting4[ c>>24 ]++;
1407 }
1408 ip-=4;
1409 }
1410
1411 /* finish last symbols */
1412 while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
1413
1414 for (s=0; s<=(int)maxSymbolValue; s++)
1415 {
1416 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
1417 if (count[s] > max) max = count[s];
1418 }
1419
1420 while (!count[maxSymbolValue]) maxSymbolValue--;
1421 *maxSymbolValuePtr = maxSymbolValue;
1422 return (int)max;
1423}
1424
1425/* hidden fast variant (unsafe) */
1426size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
1427{
1428 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
1429}
1430
1431size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
1432{
1433 if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
1434 {
1435 *maxSymbolValuePtr = 255;
1436 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
1437 }
1438 return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 1);
1439}
1440
1441
1442static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
1443
1444size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
1445(void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
1446{
1447 const unsigned tableSize = 1 << tableLog;
1448 const unsigned tableMask = tableSize - 1;
1449 U16* tableU16 = ( (U16*) CTable) + 2;
1450 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)CTable) + 1 + (tableLog ? tableSize>>1 : 1) );
1451 const unsigned step = FSE_tableStep(tableSize);
1452 unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
1453 U32 position = 0;
1454 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];
1455 U32 highThreshold = tableSize-1;
1456 unsigned symbol;
1457 unsigned i;
1458
1459 /* safety checks */
1460 if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
1461
1462 /* header */
1463 tableU16[-2] = (U16) tableLog;
1464 tableU16[-1] = (U16) maxSymbolValue;
1465
1466 /* For explanations on how to distribute symbol values over the table :
1467 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
1468
1469 /* symbol start positions */
1470 cumul[0] = 0;
1471 for (i=1; i<=maxSymbolValue+1; i++)
1472 {
1473 if (normalizedCounter[i-1]==-1) /* Low prob symbol */
1474 {
1475 cumul[i] = cumul[i-1] + 1;
1476 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
1477 }
1478 else
1479 cumul[i] = cumul[i-1] + normalizedCounter[i-1];
1480 }
1481 cumul[maxSymbolValue+1] = tableSize+1;
1482
1483 /* Spread symbols */
1484 for (symbol=0; symbol<=maxSymbolValue; symbol++)
1485 {
1486 int nbOccurences;
1487 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
1488 {
1489 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
1490 position = (position + step) & tableMask;
1491 while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
1492 }
1493 }
1494
1495 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
1496
1497 /* Build table */
1498 for (i=0; i<tableSize; i++)
1499 {
1500 FSE_FUNCTION_TYPE s = tableSymbol[i];
1501 tableU16[cumul[s]++] = (U16) (tableSize+i); // Table U16 : sorted by symbol order; gives next state value
1502 }
1503
1504 // Build Symbol Transformation Table
1505 {
1506 unsigned s;
1507 unsigned total = 0;
1508 for (s=0; s<=maxSymbolValue; s++)
1509 {
1510 switch (normalizedCounter[s])
1511 {
1512 case 0:
1513 break;
1514 case -1:
1515 case 1:
1516 symbolTT[s].minBitsOut = (BYTE)tableLog;
1517 symbolTT[s].deltaFindState = total - 1;
1518 total ++;
1519 symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
1520 break;
1521 default :
1522 symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
1523 symbolTT[s].deltaFindState = total - normalizedCounter[s];
1524 total += normalizedCounter[s];
1525 symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
1526 }
1527 }
1528 }
1529
1530 return 0;
1531}
1532
1533
1534#define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
1535
1536void* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
1537{
1538 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
1539 return malloc( ((size_t)1<<tableLog) * sizeof (FSE_DECODE_TYPE) );
1540}
1541
1542void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (void* DTable)
1543{
1544 free(DTable);
1545}
1546
1547
1548size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
1549(void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
1550{
Yann Collet1cc58de2015-01-29 07:13:54 +01001551 U32* const base32 = (U32*)DTable;
Yann Collet4856a002015-01-24 01:58:16 +01001552 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
1553 const U32 tableSize = 1 << tableLog;
1554 const U32 tableMask = tableSize-1;
1555 const U32 step = FSE_tableStep(tableSize);
1556 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
1557 U32 position = 0;
1558 U32 highThreshold = tableSize-1;
1559 const S16 largeLimit= 1 << (tableLog-1);
1560 U32 noLarge = 1;
1561 U32 s;
1562
1563 /* Sanity Checks */
1564 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
1565 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
1566
1567 /* Init, lay down lowprob symbols */
1568 base32[0] = tableLog;
1569 for (s=0; s<=maxSymbolValue; s++)
1570 {
1571 if (normalizedCounter[s]==-1)
1572 {
1573 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
1574 symbolNext[s] = 1;
1575 }
1576 else
1577 {
1578 if (normalizedCounter[s] >= largeLimit) noLarge=0;
1579 symbolNext[s] = normalizedCounter[s];
1580 }
1581 }
1582
1583 /* Spread symbols */
1584 for (s=0; s<=maxSymbolValue; s++)
1585 {
1586 int i;
1587 for (i=0; i<normalizedCounter[s]; i++)
1588 {
1589 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
1590 position = (position + step) & tableMask;
1591 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
1592 }
1593 }
1594
1595 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
1596
1597 /* Build Decoding table */
1598 {
1599 U32 i;
1600 for (i=0; i<tableSize; i++)
1601 {
1602 FSE_FUNCTION_TYPE symbol = tableDecode[i].symbol;
1603 U16 nextState = symbolNext[symbol]++;
1604 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
1605 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
1606 }
1607 }
1608
1609 return noLarge;
1610}