blob: 72bc398da380985e9c2d268c00b2b32e0d47fa45 [file] [log] [blame]
inikep63ecd742016-05-13 11:27:56 +02001/*
2 Common functions of New Generation Entropy library
3 Copyright (C) 2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33*************************************************************************** */
34
35/* *************************************
36* Dependencies
37***************************************/
inikep63ecd742016-05-13 11:27:56 +020038#include "mem.h"
Yann Colleta91ca622016-06-05 01:33:55 +020039#include "error_private.h" /* ERR_*, ERROR */
Yann Colletd0e2cd12016-06-05 00:58:01 +020040#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
Yann Collet38b75dd2016-07-24 15:35:59 +020041#include "fse.h"
Yann Colleta91ca622016-06-05 01:33:55 +020042#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
Yann Collet38b75dd2016-07-24 15:35:59 +020043#include "huf.h"
inikep63ecd742016-05-13 11:27:56 +020044
45
46/*-****************************************
Yann Collet45960372017-02-15 12:00:03 -080047* Version
48******************************************/
49unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
50
51
52/*-****************************************
inikep63ecd742016-05-13 11:27:56 +020053* FSE Error Management
54******************************************/
55unsigned FSE_isError(size_t code) { return ERR_isError(code); }
56
57const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
58
59
60/* **************************************************************
61* HUF Error Management
62****************************************************************/
63unsigned HUF_isError(size_t code) { return ERR_isError(code); }
64
65const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
66
67
68/*-**************************************************************
69* FSE NCount encoding-decoding
70****************************************************************/
inikep63ecd742016-05-13 11:27:56 +020071size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
72 const void* headerBuffer, size_t hbSize)
73{
74 const BYTE* const istart = (const BYTE*) headerBuffer;
75 const BYTE* const iend = istart + hbSize;
76 const BYTE* ip = istart;
77 int nbBits;
78 int remaining;
79 int threshold;
80 U32 bitStream;
81 int bitCount;
82 unsigned charnum = 0;
83 int previous0 = 0;
84
85 if (hbSize < 4) return ERROR(srcSize_wrong);
86 bitStream = MEM_readLE32(ip);
87 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
88 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
89 bitStream >>= 4;
90 bitCount = 4;
91 *tableLogPtr = nbBits;
92 remaining = (1<<nbBits)+1;
93 threshold = 1<<nbBits;
94 nbBits++;
95
Yann Collet38b75dd2016-07-24 15:35:59 +020096 while ((remaining>1) & (charnum<=*maxSVPtr)) {
inikep63ecd742016-05-13 11:27:56 +020097 if (previous0) {
98 unsigned n0 = charnum;
99 while ((bitStream & 0xFFFF) == 0xFFFF) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200100 n0 += 24;
inikep63ecd742016-05-13 11:27:56 +0200101 if (ip < iend-5) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200102 ip += 2;
inikep63ecd742016-05-13 11:27:56 +0200103 bitStream = MEM_readLE32(ip) >> bitCount;
104 } else {
105 bitStream >>= 16;
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200106 bitCount += 16;
inikep63ecd742016-05-13 11:27:56 +0200107 } }
108 while ((bitStream & 3) == 3) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200109 n0 += 3;
110 bitStream >>= 2;
111 bitCount += 2;
inikep63ecd742016-05-13 11:27:56 +0200112 }
113 n0 += bitStream & 3;
114 bitCount += 2;
115 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
116 while (charnum < n0) normalizedCounter[charnum++] = 0;
117 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
118 ip += bitCount>>3;
119 bitCount &= 7;
120 bitStream = MEM_readLE32(ip) >> bitCount;
Yann Collet38b75dd2016-07-24 15:35:59 +0200121 } else {
inikep63ecd742016-05-13 11:27:56 +0200122 bitStream >>= 2;
Yann Collet38b75dd2016-07-24 15:35:59 +0200123 } }
Yann Collet45960372017-02-15 12:00:03 -0800124 { int const max = (2*threshold-1) - remaining;
125 int count;
inikep63ecd742016-05-13 11:27:56 +0200126
127 if ((bitStream & (threshold-1)) < (U32)max) {
Yann Collet45960372017-02-15 12:00:03 -0800128 count = bitStream & (threshold-1);
129 bitCount += nbBits-1;
inikep63ecd742016-05-13 11:27:56 +0200130 } else {
Yann Collet45960372017-02-15 12:00:03 -0800131 count = bitStream & (2*threshold-1);
inikep63ecd742016-05-13 11:27:56 +0200132 if (count >= threshold) count -= max;
Yann Collet45960372017-02-15 12:00:03 -0800133 bitCount += nbBits;
inikep63ecd742016-05-13 11:27:56 +0200134 }
135
136 count--; /* extra accuracy */
Yann Collet45960372017-02-15 12:00:03 -0800137 remaining -= count < 0 ? -count : count; /* -1 means +1 */
138 normalizedCounter[charnum++] = (short)count;
inikep63ecd742016-05-13 11:27:56 +0200139 previous0 = !count;
140 while (remaining < threshold) {
141 nbBits--;
142 threshold >>= 1;
143 }
144
145 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
146 ip += bitCount>>3;
147 bitCount &= 7;
148 } else {
149 bitCount -= (int)(8 * (iend - 4 - ip));
150 ip = iend - 4;
151 }
152 bitStream = MEM_readLE32(ip) >> (bitCount & 31);
Yann Collet38b75dd2016-07-24 15:35:59 +0200153 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
154 if (remaining != 1) return ERROR(corruption_detected);
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200155 if (bitCount > 32) return ERROR(corruption_detected);
inikep63ecd742016-05-13 11:27:56 +0200156 *maxSVPtr = charnum-1;
157
158 ip += (bitCount+7)>>3;
inikep63ecd742016-05-13 11:27:56 +0200159 return ip-istart;
160}
Yann Colleta91ca622016-06-05 01:33:55 +0200161
162
163/*! HUF_readStats() :
164 Read compact Huffman tree, saved by HUF_writeCTable().
165 `huffWeight` is destination buffer.
Yann Colletb89af202016-12-01 18:24:59 -0800166 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
Yann Colleta91ca622016-06-05 01:33:55 +0200167 @return : size read from `src` , or an error Code .
Yann Collet38b75dd2016-07-24 15:35:59 +0200168 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
Yann Colleta91ca622016-06-05 01:33:55 +0200169*/
170size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
171 U32* nbSymbolsPtr, U32* tableLogPtr,
172 const void* src, size_t srcSize)
173{
174 U32 weightTotal;
175 const BYTE* ip = (const BYTE*) src;
Nick Terrellccfcc642016-10-17 11:28:02 -0700176 size_t iSize;
Yann Colleta91ca622016-06-05 01:33:55 +0200177 size_t oSize;
178
Nick Terrellccfcc642016-10-17 11:28:02 -0700179 if (!srcSize) return ERROR(srcSize_wrong);
180 iSize = ip[0];
Yann Collet7ed5e332016-07-24 14:26:11 +0200181 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
Yann Colleta91ca622016-06-05 01:33:55 +0200182
Yann Collet7ed5e332016-07-24 14:26:11 +0200183 if (iSize >= 128) { /* special header */
Yann Collet38b75dd2016-07-24 15:35:59 +0200184 oSize = iSize - 127;
185 iSize = ((oSize+1)/2);
186 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
187 if (oSize >= hwSize) return ERROR(corruption_detected);
188 ip += 1;
189 { U32 n;
190 for (n=0; n<oSize; n+=2) {
191 huffWeight[n] = ip[n/2] >> 4;
192 huffWeight[n+1] = ip[n/2] & 15;
193 } } }
Yann Colleta91ca622016-06-05 01:33:55 +0200194 else { /* header compressed with FSE (normal case) */
Yann Colletb89af202016-12-01 18:24:59 -0800195 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
Yann Colleta91ca622016-06-05 01:33:55 +0200196 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
Yann Colletb89af202016-12-01 18:24:59 -0800197 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
Yann Colleta91ca622016-06-05 01:33:55 +0200198 if (FSE_isError(oSize)) return oSize;
199 }
200
201 /* collect weight stats */
Yann Colletb89af202016-12-01 18:24:59 -0800202 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
Yann Colleta91ca622016-06-05 01:33:55 +0200203 weightTotal = 0;
204 { U32 n; for (n=0; n<oSize; n++) {
Yann Colletb89af202016-12-01 18:24:59 -0800205 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200206 rankStats[huffWeight[n]]++;
207 weightTotal += (1 << huffWeight[n]) >> 1;
208 } }
Nick Terrelld7605292016-10-19 11:19:54 -0700209 if (weightTotal == 0) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200210
211 /* get last non-null symbol weight (implied, total must be 2^n) */
212 { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
Yann Colletb89af202016-12-01 18:24:59 -0800213 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200214 *tableLogPtr = tableLog;
215 /* determine last weight */
216 { U32 const total = 1 << tableLog;
217 U32 const rest = total - weightTotal;
218 U32 const verif = 1 << BIT_highbit32(rest);
219 U32 const lastWeight = BIT_highbit32(rest) + 1;
220 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
221 huffWeight[oSize] = (BYTE)lastWeight;
222 rankStats[lastWeight]++;
223 } }
224
225 /* check tree construction validity */
226 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
227
228 /* results */
229 *nbSymbolsPtr = (U32)(oSize+1);
230 return iSize+1;
231}