blob: b37a082fee2c4f00eec933919bce3b9c6904ba96 [file] [log] [blame]
inikep63ecd742016-05-13 11:27:56 +02001/*
2 Common functions of New Generation Entropy library
3 Copyright (C) 2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33*************************************************************************** */
34
35/* *************************************
36* Dependencies
37***************************************/
inikep63ecd742016-05-13 11:27:56 +020038#include "mem.h"
Yann Colleta91ca622016-06-05 01:33:55 +020039#include "error_private.h" /* ERR_*, ERROR */
Yann Colletd0e2cd12016-06-05 00:58:01 +020040#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
Yann Collet38b75dd2016-07-24 15:35:59 +020041#include "fse.h"
Yann Colleta91ca622016-06-05 01:33:55 +020042#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
Yann Collet38b75dd2016-07-24 15:35:59 +020043#include "huf.h"
inikep63ecd742016-05-13 11:27:56 +020044
45
Yann Collet1f2c95c2017-03-05 21:07:20 -080046/*=== Version ===*/
Yann Collet45960372017-02-15 12:00:03 -080047unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
48
49
Yann Collet1f2c95c2017-03-05 21:07:20 -080050/*=== Error Management ===*/
inikep63ecd742016-05-13 11:27:56 +020051unsigned FSE_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020052const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
53
inikep63ecd742016-05-13 11:27:56 +020054unsigned HUF_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020055const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
56
57
58/*-**************************************************************
59* FSE NCount encoding-decoding
60****************************************************************/
inikep63ecd742016-05-13 11:27:56 +020061size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
62 const void* headerBuffer, size_t hbSize)
63{
64 const BYTE* const istart = (const BYTE*) headerBuffer;
65 const BYTE* const iend = istart + hbSize;
66 const BYTE* ip = istart;
67 int nbBits;
68 int remaining;
69 int threshold;
70 U32 bitStream;
71 int bitCount;
72 unsigned charnum = 0;
73 int previous0 = 0;
74
75 if (hbSize < 4) return ERROR(srcSize_wrong);
76 bitStream = MEM_readLE32(ip);
77 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
78 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
79 bitStream >>= 4;
80 bitCount = 4;
81 *tableLogPtr = nbBits;
82 remaining = (1<<nbBits)+1;
83 threshold = 1<<nbBits;
84 nbBits++;
85
Yann Collet38b75dd2016-07-24 15:35:59 +020086 while ((remaining>1) & (charnum<=*maxSVPtr)) {
inikep63ecd742016-05-13 11:27:56 +020087 if (previous0) {
88 unsigned n0 = charnum;
89 while ((bitStream & 0xFFFF) == 0xFFFF) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +020090 n0 += 24;
inikep63ecd742016-05-13 11:27:56 +020091 if (ip < iend-5) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +020092 ip += 2;
inikep63ecd742016-05-13 11:27:56 +020093 bitStream = MEM_readLE32(ip) >> bitCount;
94 } else {
95 bitStream >>= 16;
Yann Colletcbc5e9d2016-07-24 18:02:04 +020096 bitCount += 16;
inikep63ecd742016-05-13 11:27:56 +020097 } }
98 while ((bitStream & 3) == 3) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +020099 n0 += 3;
100 bitStream >>= 2;
101 bitCount += 2;
inikep63ecd742016-05-13 11:27:56 +0200102 }
103 n0 += bitStream & 3;
104 bitCount += 2;
105 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
106 while (charnum < n0) normalizedCounter[charnum++] = 0;
107 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
108 ip += bitCount>>3;
109 bitCount &= 7;
110 bitStream = MEM_readLE32(ip) >> bitCount;
Yann Collet38b75dd2016-07-24 15:35:59 +0200111 } else {
inikep63ecd742016-05-13 11:27:56 +0200112 bitStream >>= 2;
Yann Collet38b75dd2016-07-24 15:35:59 +0200113 } }
Yann Collet45960372017-02-15 12:00:03 -0800114 { int const max = (2*threshold-1) - remaining;
115 int count;
inikep63ecd742016-05-13 11:27:56 +0200116
117 if ((bitStream & (threshold-1)) < (U32)max) {
Yann Collet45960372017-02-15 12:00:03 -0800118 count = bitStream & (threshold-1);
119 bitCount += nbBits-1;
inikep63ecd742016-05-13 11:27:56 +0200120 } else {
Yann Collet45960372017-02-15 12:00:03 -0800121 count = bitStream & (2*threshold-1);
inikep63ecd742016-05-13 11:27:56 +0200122 if (count >= threshold) count -= max;
Yann Collet45960372017-02-15 12:00:03 -0800123 bitCount += nbBits;
inikep63ecd742016-05-13 11:27:56 +0200124 }
125
126 count--; /* extra accuracy */
Yann Collet45960372017-02-15 12:00:03 -0800127 remaining -= count < 0 ? -count : count; /* -1 means +1 */
128 normalizedCounter[charnum++] = (short)count;
inikep63ecd742016-05-13 11:27:56 +0200129 previous0 = !count;
130 while (remaining < threshold) {
131 nbBits--;
132 threshold >>= 1;
133 }
134
135 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
136 ip += bitCount>>3;
137 bitCount &= 7;
138 } else {
139 bitCount -= (int)(8 * (iend - 4 - ip));
140 ip = iend - 4;
141 }
142 bitStream = MEM_readLE32(ip) >> (bitCount & 31);
Yann Collet38b75dd2016-07-24 15:35:59 +0200143 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
144 if (remaining != 1) return ERROR(corruption_detected);
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200145 if (bitCount > 32) return ERROR(corruption_detected);
inikep63ecd742016-05-13 11:27:56 +0200146 *maxSVPtr = charnum-1;
147
148 ip += (bitCount+7)>>3;
inikep63ecd742016-05-13 11:27:56 +0200149 return ip-istart;
150}
Yann Colleta91ca622016-06-05 01:33:55 +0200151
152
153/*! HUF_readStats() :
154 Read compact Huffman tree, saved by HUF_writeCTable().
155 `huffWeight` is destination buffer.
Yann Colletb89af202016-12-01 18:24:59 -0800156 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
Yann Colleta91ca622016-06-05 01:33:55 +0200157 @return : size read from `src` , or an error Code .
Yann Collet38b75dd2016-07-24 15:35:59 +0200158 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
Yann Colleta91ca622016-06-05 01:33:55 +0200159*/
160size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
161 U32* nbSymbolsPtr, U32* tableLogPtr,
162 const void* src, size_t srcSize)
163{
164 U32 weightTotal;
165 const BYTE* ip = (const BYTE*) src;
Nick Terrellccfcc642016-10-17 11:28:02 -0700166 size_t iSize;
Yann Colleta91ca622016-06-05 01:33:55 +0200167 size_t oSize;
168
Nick Terrellccfcc642016-10-17 11:28:02 -0700169 if (!srcSize) return ERROR(srcSize_wrong);
170 iSize = ip[0];
Yann Collet7ed5e332016-07-24 14:26:11 +0200171 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
Yann Colleta91ca622016-06-05 01:33:55 +0200172
Yann Collet7ed5e332016-07-24 14:26:11 +0200173 if (iSize >= 128) { /* special header */
Yann Collet38b75dd2016-07-24 15:35:59 +0200174 oSize = iSize - 127;
175 iSize = ((oSize+1)/2);
176 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
177 if (oSize >= hwSize) return ERROR(corruption_detected);
178 ip += 1;
179 { U32 n;
180 for (n=0; n<oSize; n+=2) {
181 huffWeight[n] = ip[n/2] >> 4;
182 huffWeight[n+1] = ip[n/2] & 15;
183 } } }
Yann Colleta91ca622016-06-05 01:33:55 +0200184 else { /* header compressed with FSE (normal case) */
Yann Colletb89af202016-12-01 18:24:59 -0800185 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
Yann Colleta91ca622016-06-05 01:33:55 +0200186 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
Yann Colletb89af202016-12-01 18:24:59 -0800187 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
Yann Colleta91ca622016-06-05 01:33:55 +0200188 if (FSE_isError(oSize)) return oSize;
189 }
190
191 /* collect weight stats */
Yann Colletb89af202016-12-01 18:24:59 -0800192 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
Yann Colleta91ca622016-06-05 01:33:55 +0200193 weightTotal = 0;
194 { U32 n; for (n=0; n<oSize; n++) {
Yann Colletb89af202016-12-01 18:24:59 -0800195 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200196 rankStats[huffWeight[n]]++;
197 weightTotal += (1 << huffWeight[n]) >> 1;
198 } }
Nick Terrelld7605292016-10-19 11:19:54 -0700199 if (weightTotal == 0) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200200
201 /* get last non-null symbol weight (implied, total must be 2^n) */
202 { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
Yann Colletb89af202016-12-01 18:24:59 -0800203 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200204 *tableLogPtr = tableLog;
205 /* determine last weight */
206 { U32 const total = 1 << tableLog;
207 U32 const rest = total - weightTotal;
208 U32 const verif = 1 << BIT_highbit32(rest);
209 U32 const lastWeight = BIT_highbit32(rest) + 1;
210 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
211 huffWeight[oSize] = (BYTE)lastWeight;
212 rankStats[lastWeight]++;
213 } }
214
215 /* check tree construction validity */
216 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
217
218 /* results */
219 *nbSymbolsPtr = (U32)(oSize+1);
220 return iSize+1;
221}