blob: 83fd971546b3786eef4c51af6a96d52eb42280fa [file] [log] [blame]
inikep63ecd742016-05-13 11:27:56 +02001/*
2 Common functions of New Generation Entropy library
3 Copyright (C) 2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33*************************************************************************** */
34
35/* *************************************
36* Dependencies
37***************************************/
inikep63ecd742016-05-13 11:27:56 +020038#include "mem.h"
Yann Colleta91ca622016-06-05 01:33:55 +020039#include "error_private.h" /* ERR_*, ERROR */
Yann Colletd0e2cd12016-06-05 00:58:01 +020040#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
Yann Collet38b75dd2016-07-24 15:35:59 +020041#include "fse.h"
Yann Colleta91ca622016-06-05 01:33:55 +020042#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
Yann Collet38b75dd2016-07-24 15:35:59 +020043#include "huf.h"
inikep63ecd742016-05-13 11:27:56 +020044
45
46/*-****************************************
47* FSE Error Management
48******************************************/
49unsigned FSE_isError(size_t code) { return ERR_isError(code); }
50
51const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
52
53
54/* **************************************************************
55* HUF Error Management
56****************************************************************/
57unsigned HUF_isError(size_t code) { return ERR_isError(code); }
58
59const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
60
61
62/*-**************************************************************
63* FSE NCount encoding-decoding
64****************************************************************/
Yann Colletd5c5a772016-07-19 15:06:55 +020065static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
inikep63ecd742016-05-13 11:27:56 +020066
67size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
68 const void* headerBuffer, size_t hbSize)
69{
70 const BYTE* const istart = (const BYTE*) headerBuffer;
71 const BYTE* const iend = istart + hbSize;
72 const BYTE* ip = istart;
73 int nbBits;
74 int remaining;
75 int threshold;
76 U32 bitStream;
77 int bitCount;
78 unsigned charnum = 0;
79 int previous0 = 0;
80
81 if (hbSize < 4) return ERROR(srcSize_wrong);
82 bitStream = MEM_readLE32(ip);
83 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
84 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
85 bitStream >>= 4;
86 bitCount = 4;
87 *tableLogPtr = nbBits;
88 remaining = (1<<nbBits)+1;
89 threshold = 1<<nbBits;
90 nbBits++;
91
Yann Collet38b75dd2016-07-24 15:35:59 +020092 while ((remaining>1) & (charnum<=*maxSVPtr)) {
inikep63ecd742016-05-13 11:27:56 +020093 if (previous0) {
94 unsigned n0 = charnum;
95 while ((bitStream & 0xFFFF) == 0xFFFF) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +020096 n0 += 24;
inikep63ecd742016-05-13 11:27:56 +020097 if (ip < iend-5) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +020098 ip += 2;
inikep63ecd742016-05-13 11:27:56 +020099 bitStream = MEM_readLE32(ip) >> bitCount;
100 } else {
101 bitStream >>= 16;
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200102 bitCount += 16;
inikep63ecd742016-05-13 11:27:56 +0200103 } }
104 while ((bitStream & 3) == 3) {
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200105 n0 += 3;
106 bitStream >>= 2;
107 bitCount += 2;
inikep63ecd742016-05-13 11:27:56 +0200108 }
109 n0 += bitStream & 3;
110 bitCount += 2;
111 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
112 while (charnum < n0) normalizedCounter[charnum++] = 0;
113 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
114 ip += bitCount>>3;
115 bitCount &= 7;
116 bitStream = MEM_readLE32(ip) >> bitCount;
Yann Collet38b75dd2016-07-24 15:35:59 +0200117 } else {
inikep63ecd742016-05-13 11:27:56 +0200118 bitStream >>= 2;
Yann Collet38b75dd2016-07-24 15:35:59 +0200119 } }
inikep63ecd742016-05-13 11:27:56 +0200120 { short const max = (short)((2*threshold-1)-remaining);
121 short count;
122
123 if ((bitStream & (threshold-1)) < (U32)max) {
124 count = (short)(bitStream & (threshold-1));
125 bitCount += nbBits-1;
126 } else {
127 count = (short)(bitStream & (2*threshold-1));
128 if (count >= threshold) count -= max;
129 bitCount += nbBits;
130 }
131
132 count--; /* extra accuracy */
133 remaining -= FSE_abs(count);
134 normalizedCounter[charnum++] = count;
135 previous0 = !count;
136 while (remaining < threshold) {
137 nbBits--;
138 threshold >>= 1;
139 }
140
141 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
142 ip += bitCount>>3;
143 bitCount &= 7;
144 } else {
145 bitCount -= (int)(8 * (iend - 4 - ip));
146 ip = iend - 4;
147 }
148 bitStream = MEM_readLE32(ip) >> (bitCount & 31);
Yann Collet38b75dd2016-07-24 15:35:59 +0200149 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
150 if (remaining != 1) return ERROR(corruption_detected);
Yann Colletcbc5e9d2016-07-24 18:02:04 +0200151 if (bitCount > 32) return ERROR(corruption_detected);
inikep63ecd742016-05-13 11:27:56 +0200152 *maxSVPtr = charnum-1;
153
154 ip += (bitCount+7)>>3;
inikep63ecd742016-05-13 11:27:56 +0200155 return ip-istart;
156}
Yann Colleta91ca622016-06-05 01:33:55 +0200157
158
159/*! HUF_readStats() :
160 Read compact Huffman tree, saved by HUF_writeCTable().
161 `huffWeight` is destination buffer.
Yann Colletb89af202016-12-01 18:24:59 -0800162 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
Yann Colleta91ca622016-06-05 01:33:55 +0200163 @return : size read from `src` , or an error Code .
Yann Collet38b75dd2016-07-24 15:35:59 +0200164 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
Yann Colleta91ca622016-06-05 01:33:55 +0200165*/
166size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
167 U32* nbSymbolsPtr, U32* tableLogPtr,
168 const void* src, size_t srcSize)
169{
170 U32 weightTotal;
171 const BYTE* ip = (const BYTE*) src;
Nick Terrellccfcc642016-10-17 11:28:02 -0700172 size_t iSize;
Yann Colleta91ca622016-06-05 01:33:55 +0200173 size_t oSize;
174
Nick Terrellccfcc642016-10-17 11:28:02 -0700175 if (!srcSize) return ERROR(srcSize_wrong);
176 iSize = ip[0];
Yann Collet7ed5e332016-07-24 14:26:11 +0200177 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
Yann Colleta91ca622016-06-05 01:33:55 +0200178
Yann Collet7ed5e332016-07-24 14:26:11 +0200179 if (iSize >= 128) { /* special header */
Yann Collet38b75dd2016-07-24 15:35:59 +0200180 oSize = iSize - 127;
181 iSize = ((oSize+1)/2);
182 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
183 if (oSize >= hwSize) return ERROR(corruption_detected);
184 ip += 1;
185 { U32 n;
186 for (n=0; n<oSize; n+=2) {
187 huffWeight[n] = ip[n/2] >> 4;
188 huffWeight[n+1] = ip[n/2] & 15;
189 } } }
Yann Colleta91ca622016-06-05 01:33:55 +0200190 else { /* header compressed with FSE (normal case) */
Yann Colletb89af202016-12-01 18:24:59 -0800191 FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
Yann Colleta91ca622016-06-05 01:33:55 +0200192 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
Yann Colletb89af202016-12-01 18:24:59 -0800193 oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
Yann Colleta91ca622016-06-05 01:33:55 +0200194 if (FSE_isError(oSize)) return oSize;
195 }
196
197 /* collect weight stats */
Yann Colletb89af202016-12-01 18:24:59 -0800198 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
Yann Colleta91ca622016-06-05 01:33:55 +0200199 weightTotal = 0;
200 { U32 n; for (n=0; n<oSize; n++) {
Yann Colletb89af202016-12-01 18:24:59 -0800201 if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200202 rankStats[huffWeight[n]]++;
203 weightTotal += (1 << huffWeight[n]) >> 1;
204 } }
Nick Terrelld7605292016-10-19 11:19:54 -0700205 if (weightTotal == 0) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200206
207 /* get last non-null symbol weight (implied, total must be 2^n) */
208 { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
Yann Colletb89af202016-12-01 18:24:59 -0800209 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
Yann Colleta91ca622016-06-05 01:33:55 +0200210 *tableLogPtr = tableLog;
211 /* determine last weight */
212 { U32 const total = 1 << tableLog;
213 U32 const rest = total - weightTotal;
214 U32 const verif = 1 << BIT_highbit32(rest);
215 U32 const lastWeight = BIT_highbit32(rest) + 1;
216 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
217 huffWeight[oSize] = (BYTE)lastWeight;
218 rankStats[lastWeight]++;
219 } }
220
221 /* check tree construction validity */
222 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
223
224 /* results */
225 *nbSymbolsPtr = (U32)(oSize+1);
226 return iSize+1;
227}