blob: b12944e1de93ad10a0349fd9075bb0258128697c [file] [log] [blame]
inikep63ecd742016-05-13 11:27:56 +02001/*
2 Common functions of New Generation Entropy library
3 Copyright (C) 2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33*************************************************************************** */
34
35/* *************************************
36* Dependencies
37***************************************/
inikep63ecd742016-05-13 11:27:56 +020038#include "mem.h"
Yann Colleta91ca622016-06-05 01:33:55 +020039#include "error_private.h" /* ERR_*, ERROR */
Yann Colletd0e2cd12016-06-05 00:58:01 +020040#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
Yann Collet38b75dd2016-07-24 15:35:59 +020041#include "fse.h"
Yann Colleta91ca622016-06-05 01:33:55 +020042#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
Yann Collet38b75dd2016-07-24 15:35:59 +020043#include "huf.h"
inikep63ecd742016-05-13 11:27:56 +020044
45
Yann Collet1f2c95c2017-03-05 21:07:20 -080046/*=== Version ===*/
Yann Collet45960372017-02-15 12:00:03 -080047unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
48
49
Yann Collet1f2c95c2017-03-05 21:07:20 -080050/*=== Error Management ===*/
inikep63ecd742016-05-13 11:27:56 +020051unsigned FSE_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020052const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
53
inikep63ecd742016-05-13 11:27:56 +020054unsigned HUF_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020055const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
56
57
58/*-**************************************************************
59* FSE NCount encoding-decoding
60****************************************************************/
inikep63ecd742016-05-13 11:27:56 +020061size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
62 const void* headerBuffer, size_t hbSize)
63{
64 const BYTE* const istart = (const BYTE*) headerBuffer;
65 const BYTE* const iend = istart + hbSize;
66 const BYTE* ip = istart;
67 int nbBits;
68 int remaining;
69 int threshold;
70 U32 bitStream;
71 int bitCount;
72 unsigned charnum = 0;
73 int previous0 = 0;
74
Nick Terrella97e9a62018-05-23 12:16:00 -070075 if (hbSize < 4) {
Nick Terrellf2d09242018-05-23 14:58:58 -070076 /* This function only works when hbSize >= 4 */
77 char buffer[4];
78 memset(buffer, 0, sizeof(buffer));
79 memcpy(buffer, headerBuffer, hbSize);
80 { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
81 buffer, sizeof(buffer));
82 if (FSE_isError(countSize)) return countSize;
83 if (countSize > hbSize) return ERROR(corruption_detected);
84 return countSize;
85 } }
Nick Terrellc92dd112018-05-23 14:47:20 -070086 assert(hbSize >= 4);
Nick Terrella97e9a62018-05-23 12:16:00 -070087
Yann Colletff773bf2018-06-26 17:24:41 -070088 /* init */
89 memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
inikep63ecd742016-05-13 11:27:56 +020090 bitStream = MEM_readLE32(ip);
91 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
92 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
93 bitStream >>= 4;
94 bitCount = 4;
95 *tableLogPtr = nbBits;
96 remaining = (1<<nbBits)+1;
97 threshold = 1<<nbBits;
98 nbBits++;
99
Yann Collet38b75dd2016-07-24 15:35:59 +0200100 while ((remaining>1) & (charnum<=*maxSVPtr)) {
inikep63ecd742016-05-13 11:27:56 +0200101 if (previous0) {
102 unsigned n0 = charnum;
103 while ((bitStream & 0xFFFF) == 0xFFFF) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200104 n0 += 24;
inikep63ecd742016-05-13 11:27:56 +0200105 if (ip < iend-5) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200106 ip += 2;
inikep63ecd742016-05-13 11:27:56 +0200107 bitStream = MEM_readLE32(ip) >> bitCount;
108 } else {
109 bitStream >>= 16;
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200110 bitCount += 16;
inikep63ecd742016-05-13 11:27:56 +0200111 } }
112 while ((bitStream & 3) == 3) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200113 n0 += 3;
114 bitStream >>= 2;
115 bitCount += 2;
inikep63ecd742016-05-13 11:27:56 +0200116 }
117 n0 += bitStream & 3;
118 bitCount += 2;
119 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
120 while (charnum < n0) normalizedCounter[charnum++] = 0;
121 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
Nick Terrella97e9a62018-05-23 12:16:00 -0700122 assert((bitCount >> 3) <= 3); /* For first condition to work */
inikep63ecd742016-05-13 11:27:56 +0200123 ip += bitCount>>3;
124 bitCount &= 7;
125 bitStream = MEM_readLE32(ip) >> bitCount;
Yann Collet38b75dd2016-07-24 15:35:59 +0200126 } else {
inikep63ecd742016-05-13 11:27:56 +0200127 bitStream >>= 2;
Yann Collet38b75dd2016-07-24 15:35:59 +0200128 } }
Yann Collet45960372017-02-15 12:00:03 -0800129 { int const max = (2*threshold-1) - remaining;
130 int count;
inikep63ecd742016-05-13 11:27:56 +0200131
132 if ((bitStream & (threshold-1)) < (U32)max) {
Yann Collet45960372017-02-15 12:00:03 -0800133 count = bitStream & (threshold-1);
134 bitCount += nbBits-1;
inikep63ecd742016-05-13 11:27:56 +0200135 } else {
Yann Collet45960372017-02-15 12:00:03 -0800136 count = bitStream & (2*threshold-1);
inikep63ecd742016-05-13 11:27:56 +0200137 if (count >= threshold) count -= max;
Yann Collet45960372017-02-15 12:00:03 -0800138 bitCount += nbBits;
inikep63ecd742016-05-13 11:27:56 +0200139 }
140
141 count--; /* extra accuracy */
Yann Collet45960372017-02-15 12:00:03 -0800142 remaining -= count < 0 ? -count : count; /* -1 means +1 */
143 normalizedCounter[charnum++] = (short)count;
inikep63ecd742016-05-13 11:27:56 +0200144 previous0 = !count;
145 while (remaining < threshold) {
146 nbBits--;
147 threshold >>= 1;
148 }
149
150 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
151 ip += bitCount>>3;
152 bitCount &= 7;
153 } else {
154 bitCount -= (int)(8 * (iend - 4 - ip));
155 ip = iend - 4;
156 }
157 bitStream = MEM_readLE32(ip) >> (bitCount & 31);
Yann Collet38b75dd2016-07-24 15:35:59 +0200158 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
159 if (remaining != 1) return ERROR(corruption_detected);
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200160 if (bitCount > 32) return ERROR(corruption_detected);
inikep63ecd742016-05-13 11:27:56 +0200161 *maxSVPtr = charnum-1;
162
163 ip += (bitCount+7)>>3;
inikep63ecd742016-05-13 11:27:56 +0200164 return ip-istart;
165}
Yann Colleta91ca622016-06-05 01:33:55 +0200166
167
168/*! HUF_readStats() :
169 Read compact Huffman tree, saved by HUF_writeCTable().
170 `huffWeight` is destination buffer.
Yann Colletb89af202016-12-01 18:24:59 -0800171 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
Yann Colleta91ca622016-06-05 01:33:55 +0200172 @return : size read from `src` , or an error Code .
Yann Collet38b75dd2016-07-24 15:35:59 +0200173 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
Yann Colleta91ca622016-06-05 01:33:55 +0200174*/
175size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
176 U32* nbSymbolsPtr, U32* tableLogPtr,
177 const void* src, size_t srcSize)
178{
179 U32 weightTotal;
180 const BYTE* ip = (const BYTE*) src;
Nick Terrellccfcc642016-10-17 11:28:02 -0700181 size_t iSize;
Yann Colleta91ca622016-06-05 01:33:55 +0200182 size_t oSize;
183
Nick Terrellccfcc642016-10-17 11:28:02 -0700184 if (!srcSize) return ERROR(srcSize_wrong);
185 iSize = ip[0];
Yann Collet7ed5e332016-07-24 14:26:11 +0200186 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
Yann Colleta91ca622016-06-05 01:33:55 +0200187
Yann Collet7ed5e332016-07-24 14:26:11 +0200188 if (iSize >= 128) { /* special header */
Yann Collet38b75dd2016-07-24 15:35:59 +0200189 oSize = iSize - 127;
190 iSize = ((oSize+1)/2);
191 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
192 if (oSize >= hwSize) return ERROR(corruption_detected);
193 ip += 1;
194 { U32 n;
195 for (n=0; n<oSize; n+=2) {
196 huffWeight[n] = ip[n/2] >> 4;
197 huffWeight[n+1] = ip[n/2] & 15;
198 } } }
Yann Colleta91ca622016-06-05 01:33:55 +0200199 else { /* header compressed with FSE (normal case) */
Yann Colletb89af202016-12-01 18:24:59 -0800200 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
Yann Colleta91ca622016-06-05 01:33:55 +0200201 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
Yann Colletb89af202016-12-01 18:24:59 -0800202 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
Yann Colleta91ca622016-06-05 01:33:55 +0200203 if (FSE_isError(oSize)) return oSize;
204 }
205
206 /* collect weight stats */
Yann Colletb89af202016-12-01 18:24:59 -0800207 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
Yann Colleta91ca622016-06-05 01:33:55 +0200208 weightTotal = 0;
209 { U32 n; for (n=0; n<oSize; n++) {
Yann Colletb89af202016-12-01 18:24:59 -0800210 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200211 rankStats[huffWeight[n]]++;
212 weightTotal += (1 << huffWeight[n]) >> 1;
213 } }
Nick Terrelld7605292016-10-19 11:19:54 -0700214 if (weightTotal == 0) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200215
216 /* get last non-null symbol weight (implied, total must be 2^n) */
217 { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
Yann Colletb89af202016-12-01 18:24:59 -0800218 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200219 *tableLogPtr = tableLog;
220 /* determine last weight */
221 { U32 const total = 1 << tableLog;
222 U32 const rest = total - weightTotal;
223 U32 const verif = 1 << BIT_highbit32(rest);
224 U32 const lastWeight = BIT_highbit32(rest) + 1;
225 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
226 huffWeight[oSize] = (BYTE)lastWeight;
227 rankStats[lastWeight]++;
228 } }
229
230 /* check tree construction validity */
231 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
232
233 /* results */
234 *nbSymbolsPtr = (U32)(oSize+1);
235 return iSize+1;
236}