blob: 33fd04bd660607cd11b2a1f06d8a5cf65cc1ef66 [file] [log] [blame]
inikep63ecd742016-05-13 11:27:56 +02001/*
2 Common functions of New Generation Entropy library
3 Copyright (C) 2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33*************************************************************************** */
34
35/* *************************************
36* Dependencies
37***************************************/
inikep63ecd742016-05-13 11:27:56 +020038#include "mem.h"
Yann Colleta91ca622016-06-05 01:33:55 +020039#include "error_private.h" /* ERR_*, ERROR */
Yann Colletd0e2cd12016-06-05 00:58:01 +020040#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
Yann Collet38b75dd2016-07-24 15:35:59 +020041#include "fse.h"
Yann Colleta91ca622016-06-05 01:33:55 +020042#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
Yann Collet38b75dd2016-07-24 15:35:59 +020043#include "huf.h"
inikep63ecd742016-05-13 11:27:56 +020044
45
Yann Collet1f2c95c2017-03-05 21:07:20 -080046/*=== Version ===*/
Yann Collet45960372017-02-15 12:00:03 -080047unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
48
49
Yann Collet1f2c95c2017-03-05 21:07:20 -080050/*=== Error Management ===*/
inikep63ecd742016-05-13 11:27:56 +020051unsigned FSE_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020052const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
53
inikep63ecd742016-05-13 11:27:56 +020054unsigned HUF_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020055const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
56
57
58/*-**************************************************************
59* FSE NCount encoding-decoding
60****************************************************************/
inikep63ecd742016-05-13 11:27:56 +020061size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
62 const void* headerBuffer, size_t hbSize)
63{
64 const BYTE* const istart = (const BYTE*) headerBuffer;
65 const BYTE* const iend = istart + hbSize;
66 const BYTE* ip = istart;
67 int nbBits;
68 int remaining;
69 int threshold;
70 U32 bitStream;
71 int bitCount;
72 unsigned charnum = 0;
73 int previous0 = 0;
74
Nick Terrella97e9a62018-05-23 12:16:00 -070075 if (hbSize < 4) {
Nick Terrellf2d09242018-05-23 14:58:58 -070076 /* This function only works when hbSize >= 4 */
77 char buffer[4];
78 memset(buffer, 0, sizeof(buffer));
79 memcpy(buffer, headerBuffer, hbSize);
80 { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
81 buffer, sizeof(buffer));
82 if (FSE_isError(countSize)) return countSize;
83 if (countSize > hbSize) return ERROR(corruption_detected);
84 return countSize;
85 } }
Nick Terrellc92dd112018-05-23 14:47:20 -070086 assert(hbSize >= 4);
Nick Terrella97e9a62018-05-23 12:16:00 -070087
inikep63ecd742016-05-13 11:27:56 +020088 bitStream = MEM_readLE32(ip);
89 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
90 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
91 bitStream >>= 4;
92 bitCount = 4;
93 *tableLogPtr = nbBits;
94 remaining = (1<<nbBits)+1;
95 threshold = 1<<nbBits;
96 nbBits++;
97
Yann Collet38b75dd2016-07-24 15:35:59 +020098 while ((remaining>1) & (charnum<=*maxSVPtr)) {
inikep63ecd742016-05-13 11:27:56 +020099 if (previous0) {
100 unsigned n0 = charnum;
101 while ((bitStream & 0xFFFF) == 0xFFFF) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200102 n0 += 24;
inikep63ecd742016-05-13 11:27:56 +0200103 if (ip < iend-5) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200104 ip += 2;
inikep63ecd742016-05-13 11:27:56 +0200105 bitStream = MEM_readLE32(ip) >> bitCount;
106 } else {
107 bitStream >>= 16;
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200108 bitCount += 16;
inikep63ecd742016-05-13 11:27:56 +0200109 } }
110 while ((bitStream & 3) == 3) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200111 n0 += 3;
112 bitStream >>= 2;
113 bitCount += 2;
inikep63ecd742016-05-13 11:27:56 +0200114 }
115 n0 += bitStream & 3;
116 bitCount += 2;
117 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
118 while (charnum < n0) normalizedCounter[charnum++] = 0;
119 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
Nick Terrella97e9a62018-05-23 12:16:00 -0700120 assert((bitCount >> 3) <= 3); /* For first condition to work */
inikep63ecd742016-05-13 11:27:56 +0200121 ip += bitCount>>3;
122 bitCount &= 7;
123 bitStream = MEM_readLE32(ip) >> bitCount;
Yann Collet38b75dd2016-07-24 15:35:59 +0200124 } else {
inikep63ecd742016-05-13 11:27:56 +0200125 bitStream >>= 2;
Yann Collet38b75dd2016-07-24 15:35:59 +0200126 } }
Yann Collet45960372017-02-15 12:00:03 -0800127 { int const max = (2*threshold-1) - remaining;
128 int count;
inikep63ecd742016-05-13 11:27:56 +0200129
130 if ((bitStream & (threshold-1)) < (U32)max) {
Yann Collet45960372017-02-15 12:00:03 -0800131 count = bitStream & (threshold-1);
132 bitCount += nbBits-1;
inikep63ecd742016-05-13 11:27:56 +0200133 } else {
Yann Collet45960372017-02-15 12:00:03 -0800134 count = bitStream & (2*threshold-1);
inikep63ecd742016-05-13 11:27:56 +0200135 if (count >= threshold) count -= max;
Yann Collet45960372017-02-15 12:00:03 -0800136 bitCount += nbBits;
inikep63ecd742016-05-13 11:27:56 +0200137 }
138
139 count--; /* extra accuracy */
Yann Collet45960372017-02-15 12:00:03 -0800140 remaining -= count < 0 ? -count : count; /* -1 means +1 */
141 normalizedCounter[charnum++] = (short)count;
inikep63ecd742016-05-13 11:27:56 +0200142 previous0 = !count;
143 while (remaining < threshold) {
144 nbBits--;
145 threshold >>= 1;
146 }
147
148 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
149 ip += bitCount>>3;
150 bitCount &= 7;
151 } else {
152 bitCount -= (int)(8 * (iend - 4 - ip));
153 ip = iend - 4;
154 }
155 bitStream = MEM_readLE32(ip) >> (bitCount & 31);
Yann Collet38b75dd2016-07-24 15:35:59 +0200156 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
157 if (remaining != 1) return ERROR(corruption_detected);
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200158 if (bitCount > 32) return ERROR(corruption_detected);
Yann Collet1a26ec62018-05-10 17:59:12 -0700159 /* zeroise the rest */
160 { unsigned symbNb = charnum;
161 for (symbNb=charnum; symbNb <= *maxSVPtr; symbNb++)
162 normalizedCounter[symbNb] = 0;
163 }
inikep63ecd742016-05-13 11:27:56 +0200164 *maxSVPtr = charnum-1;
165
166 ip += (bitCount+7)>>3;
inikep63ecd742016-05-13 11:27:56 +0200167 return ip-istart;
168}
Yann Colleta91ca622016-06-05 01:33:55 +0200169
170
171/*! HUF_readStats() :
172 Read compact Huffman tree, saved by HUF_writeCTable().
173 `huffWeight` is destination buffer.
Yann Colletb89af202016-12-01 18:24:59 -0800174 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
Yann Colleta91ca622016-06-05 01:33:55 +0200175 @return : size read from `src` , or an error Code .
Yann Collet38b75dd2016-07-24 15:35:59 +0200176 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
Yann Colleta91ca622016-06-05 01:33:55 +0200177*/
178size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
179 U32* nbSymbolsPtr, U32* tableLogPtr,
180 const void* src, size_t srcSize)
181{
182 U32 weightTotal;
183 const BYTE* ip = (const BYTE*) src;
Nick Terrellccfcc642016-10-17 11:28:02 -0700184 size_t iSize;
Yann Colleta91ca622016-06-05 01:33:55 +0200185 size_t oSize;
186
Nick Terrellccfcc642016-10-17 11:28:02 -0700187 if (!srcSize) return ERROR(srcSize_wrong);
188 iSize = ip[0];
Yann Collet7ed5e332016-07-24 14:26:11 +0200189 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
Yann Colleta91ca622016-06-05 01:33:55 +0200190
Yann Collet7ed5e332016-07-24 14:26:11 +0200191 if (iSize >= 128) { /* special header */
Yann Collet38b75dd2016-07-24 15:35:59 +0200192 oSize = iSize - 127;
193 iSize = ((oSize+1)/2);
194 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
195 if (oSize >= hwSize) return ERROR(corruption_detected);
196 ip += 1;
197 { U32 n;
198 for (n=0; n<oSize; n+=2) {
199 huffWeight[n] = ip[n/2] >> 4;
200 huffWeight[n+1] = ip[n/2] & 15;
201 } } }
Yann Colleta91ca622016-06-05 01:33:55 +0200202 else { /* header compressed with FSE (normal case) */
Yann Colletb89af202016-12-01 18:24:59 -0800203 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
Yann Colleta91ca622016-06-05 01:33:55 +0200204 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
Yann Colletb89af202016-12-01 18:24:59 -0800205 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
Yann Colleta91ca622016-06-05 01:33:55 +0200206 if (FSE_isError(oSize)) return oSize;
207 }
208
209 /* collect weight stats */
Yann Colletb89af202016-12-01 18:24:59 -0800210 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
Yann Colleta91ca622016-06-05 01:33:55 +0200211 weightTotal = 0;
212 { U32 n; for (n=0; n<oSize; n++) {
Yann Colletb89af202016-12-01 18:24:59 -0800213 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200214 rankStats[huffWeight[n]]++;
215 weightTotal += (1 << huffWeight[n]) >> 1;
216 } }
Nick Terrelld7605292016-10-19 11:19:54 -0700217 if (weightTotal == 0) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200218
219 /* get last non-null symbol weight (implied, total must be 2^n) */
220 { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
Yann Colletb89af202016-12-01 18:24:59 -0800221 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200222 *tableLogPtr = tableLog;
223 /* determine last weight */
224 { U32 const total = 1 << tableLog;
225 U32 const rest = total - weightTotal;
226 U32 const verif = 1 << BIT_highbit32(rest);
227 U32 const lastWeight = BIT_highbit32(rest) + 1;
228 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
229 huffWeight[oSize] = (BYTE)lastWeight;
230 rankStats[lastWeight]++;
231 } }
232
233 /* check tree construction validity */
234 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
235
236 /* results */
237 *nbSymbolsPtr = (U32)(oSize+1);
238 return iSize+1;
239}