blob: a8d0b146becf2df12dd2fcda2e65841054cc444c [file] [log] [blame]
inikep63ecd742016-05-13 11:27:56 +02001/*
2 Common functions of New Generation Entropy library
3 Copyright (C) 2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33*************************************************************************** */
34
35/* *************************************
36* Dependencies
37***************************************/
inikep63ecd742016-05-13 11:27:56 +020038#include "mem.h"
Yann Colleta91ca622016-06-05 01:33:55 +020039#include "error_private.h" /* ERR_*, ERROR */
Yann Colletd0e2cd12016-06-05 00:58:01 +020040#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
Yann Collet38b75dd2016-07-24 15:35:59 +020041#include "fse.h"
Yann Colleta91ca622016-06-05 01:33:55 +020042#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
Yann Collet38b75dd2016-07-24 15:35:59 +020043#include "huf.h"
inikep63ecd742016-05-13 11:27:56 +020044
45
Yann Collet1f2c95c2017-03-05 21:07:20 -080046/*=== Version ===*/
Yann Collet45960372017-02-15 12:00:03 -080047unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
48
49
Yann Collet1f2c95c2017-03-05 21:07:20 -080050/*=== Error Management ===*/
inikep63ecd742016-05-13 11:27:56 +020051unsigned FSE_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020052const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
53
inikep63ecd742016-05-13 11:27:56 +020054unsigned HUF_isError(size_t code) { return ERR_isError(code); }
inikep63ecd742016-05-13 11:27:56 +020055const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
56
57
58/*-**************************************************************
59* FSE NCount encoding-decoding
60****************************************************************/
inikep63ecd742016-05-13 11:27:56 +020061size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
62 const void* headerBuffer, size_t hbSize)
63{
64 const BYTE* const istart = (const BYTE*) headerBuffer;
65 const BYTE* const iend = istart + hbSize;
66 const BYTE* ip = istart;
67 int nbBits;
68 int remaining;
69 int threshold;
70 U32 bitStream;
71 int bitCount;
72 unsigned charnum = 0;
73 int previous0 = 0;
74
Nick Terrella97e9a62018-05-23 12:16:00 -070075 if (hbSize < 4) {
76 /* This function only works when hbSize >= 4 */
77 char buffer[4];
78 memset(buffer, 0, sizeof(buffer));
79 memcpy(buffer, headerBuffer, hbSize);
80 return FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, buffer, sizeof(buffer));
81 }
82
inikep63ecd742016-05-13 11:27:56 +020083 bitStream = MEM_readLE32(ip);
84 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
85 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
86 bitStream >>= 4;
87 bitCount = 4;
88 *tableLogPtr = nbBits;
89 remaining = (1<<nbBits)+1;
90 threshold = 1<<nbBits;
91 nbBits++;
92
Yann Collet38b75dd2016-07-24 15:35:59 +020093 while ((remaining>1) & (charnum<=*maxSVPtr)) {
inikep63ecd742016-05-13 11:27:56 +020094 if (previous0) {
95 unsigned n0 = charnum;
96 while ((bitStream & 0xFFFF) == 0xFFFF) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +020097 n0 += 24;
inikep63ecd742016-05-13 11:27:56 +020098 if (ip < iend-5) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +020099 ip += 2;
inikep63ecd742016-05-13 11:27:56 +0200100 bitStream = MEM_readLE32(ip) >> bitCount;
101 } else {
102 bitStream >>= 16;
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200103 bitCount += 16;
inikep63ecd742016-05-13 11:27:56 +0200104 } }
105 while ((bitStream & 3) == 3) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200106 n0 += 3;
107 bitStream >>= 2;
108 bitCount += 2;
inikep63ecd742016-05-13 11:27:56 +0200109 }
110 n0 += bitStream & 3;
111 bitCount += 2;
112 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
113 while (charnum < n0) normalizedCounter[charnum++] = 0;
114 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
Nick Terrella97e9a62018-05-23 12:16:00 -0700115 assert((bitCount >> 3) <= 3); /* For first condition to work */
inikep63ecd742016-05-13 11:27:56 +0200116 ip += bitCount>>3;
117 bitCount &= 7;
118 bitStream = MEM_readLE32(ip) >> bitCount;
Yann Collet38b75dd2016-07-24 15:35:59 +0200119 } else {
inikep63ecd742016-05-13 11:27:56 +0200120 bitStream >>= 2;
Yann Collet38b75dd2016-07-24 15:35:59 +0200121 } }
Yann Collet45960372017-02-15 12:00:03 -0800122 { int const max = (2*threshold-1) - remaining;
123 int count;
inikep63ecd742016-05-13 11:27:56 +0200124
125 if ((bitStream & (threshold-1)) < (U32)max) {
Yann Collet45960372017-02-15 12:00:03 -0800126 count = bitStream & (threshold-1);
127 bitCount += nbBits-1;
inikep63ecd742016-05-13 11:27:56 +0200128 } else {
Yann Collet45960372017-02-15 12:00:03 -0800129 count = bitStream & (2*threshold-1);
inikep63ecd742016-05-13 11:27:56 +0200130 if (count >= threshold) count -= max;
Yann Collet45960372017-02-15 12:00:03 -0800131 bitCount += nbBits;
inikep63ecd742016-05-13 11:27:56 +0200132 }
133
134 count--; /* extra accuracy */
Yann Collet45960372017-02-15 12:00:03 -0800135 remaining -= count < 0 ? -count : count; /* -1 means +1 */
136 normalizedCounter[charnum++] = (short)count;
inikep63ecd742016-05-13 11:27:56 +0200137 previous0 = !count;
138 while (remaining < threshold) {
139 nbBits--;
140 threshold >>= 1;
141 }
142
143 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
144 ip += bitCount>>3;
145 bitCount &= 7;
146 } else {
147 bitCount -= (int)(8 * (iend - 4 - ip));
148 ip = iend - 4;
149 }
150 bitStream = MEM_readLE32(ip) >> (bitCount & 31);
Yann Collet38b75dd2016-07-24 15:35:59 +0200151 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
152 if (remaining != 1) return ERROR(corruption_detected);
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200153 if (bitCount > 32) return ERROR(corruption_detected);
Yann Collet1a26ec62018-05-10 17:59:12 -0700154 /* zeroise the rest */
155 { unsigned symbNb = charnum;
156 for (symbNb=charnum; symbNb <= *maxSVPtr; symbNb++)
157 normalizedCounter[symbNb] = 0;
158 }
inikep63ecd742016-05-13 11:27:56 +0200159 *maxSVPtr = charnum-1;
160
161 ip += (bitCount+7)>>3;
inikep63ecd742016-05-13 11:27:56 +0200162 return ip-istart;
163}
Yann Colleta91ca622016-06-05 01:33:55 +0200164
165
166/*! HUF_readStats() :
167 Read compact Huffman tree, saved by HUF_writeCTable().
168 `huffWeight` is destination buffer.
Yann Colletb89af202016-12-01 18:24:59 -0800169 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
Yann Colleta91ca622016-06-05 01:33:55 +0200170 @return : size read from `src` , or an error Code .
Yann Collet38b75dd2016-07-24 15:35:59 +0200171 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
Yann Colleta91ca622016-06-05 01:33:55 +0200172*/
173size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
174 U32* nbSymbolsPtr, U32* tableLogPtr,
175 const void* src, size_t srcSize)
176{
177 U32 weightTotal;
178 const BYTE* ip = (const BYTE*) src;
Nick Terrellccfcc642016-10-17 11:28:02 -0700179 size_t iSize;
Yann Colleta91ca622016-06-05 01:33:55 +0200180 size_t oSize;
181
Nick Terrellccfcc642016-10-17 11:28:02 -0700182 if (!srcSize) return ERROR(srcSize_wrong);
183 iSize = ip[0];
Yann Collet7ed5e332016-07-24 14:26:11 +0200184 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
Yann Colleta91ca622016-06-05 01:33:55 +0200185
Yann Collet7ed5e332016-07-24 14:26:11 +0200186 if (iSize >= 128) { /* special header */
Yann Collet38b75dd2016-07-24 15:35:59 +0200187 oSize = iSize - 127;
188 iSize = ((oSize+1)/2);
189 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
190 if (oSize >= hwSize) return ERROR(corruption_detected);
191 ip += 1;
192 { U32 n;
193 for (n=0; n<oSize; n+=2) {
194 huffWeight[n] = ip[n/2] >> 4;
195 huffWeight[n+1] = ip[n/2] & 15;
196 } } }
Yann Colleta91ca622016-06-05 01:33:55 +0200197 else { /* header compressed with FSE (normal case) */
Yann Colletb89af202016-12-01 18:24:59 -0800198 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
Yann Colleta91ca622016-06-05 01:33:55 +0200199 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
Yann Colletb89af202016-12-01 18:24:59 -0800200 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
Yann Colleta91ca622016-06-05 01:33:55 +0200201 if (FSE_isError(oSize)) return oSize;
202 }
203
204 /* collect weight stats */
Yann Colletb89af202016-12-01 18:24:59 -0800205 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
Yann Colleta91ca622016-06-05 01:33:55 +0200206 weightTotal = 0;
207 { U32 n; for (n=0; n<oSize; n++) {
Yann Colletb89af202016-12-01 18:24:59 -0800208 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200209 rankStats[huffWeight[n]]++;
210 weightTotal += (1 << huffWeight[n]) >> 1;
211 } }
Nick Terrelld7605292016-10-19 11:19:54 -0700212 if (weightTotal == 0) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200213
214 /* get last non-null symbol weight (implied, total must be 2^n) */
215 { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
Yann Colletb89af202016-12-01 18:24:59 -0800216 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200217 *tableLogPtr = tableLog;
218 /* determine last weight */
219 { U32 const total = 1 << tableLog;
220 U32 const rest = total - weightTotal;
221 U32 const verif = 1 << BIT_highbit32(rest);
222 U32 const lastWeight = BIT_highbit32(rest) + 1;
223 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
224 huffWeight[oSize] = (BYTE)lastWeight;
225 rankStats[lastWeight]++;
226 } }
227
228 /* check tree construction validity */
229 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
230
231 /* results */
232 *nbSymbolsPtr = (U32)(oSize+1);
233 return iSize+1;
234}