blob: db6f49cfae4ae0300e6e06c7858a24007e227c91 [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/* ******************************************************************
2 FSE : Finite State Entropy coder
3 header file
4 Copyright (C) 2013-2015, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 - Public forum : https://groups.google.com/forum/#!forum/lz4c
34****************************************************************** */
Yann Colletaa074052015-10-30 11:21:50 +010035#ifndef FSE_H
36#define FSE_H
Yann Collet4856a002015-01-24 01:58:16 +010037
38#if defined (__cplusplus)
39extern "C" {
40#endif
41
42
Yann Collet3b994cb2016-01-06 01:58:37 +010043/* *****************************************
Yann Collet4856a002015-01-24 01:58:16 +010044* Includes
45******************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -080046#include <stddef.h> /* size_t, ptrdiff_t */
Yann Collet4856a002015-01-24 01:58:16 +010047
48
Yann Colletfb810d62016-01-28 00:18:06 +010049/*-****************************************
Yann Collet4856a002015-01-24 01:58:16 +010050* FSE simple functions
51******************************************/
52size_t FSE_compress(void* dst, size_t maxDstSize,
53 const void* src, size_t srcSize);
Yann Collet1efa31f2015-07-04 15:56:41 -080054size_t FSE_decompress(void* dst, size_t maxDstSize,
Yann Collet4856a002015-01-24 01:58:16 +010055 const void* cSrc, size_t cSrcSize);
Yann Collet3b994cb2016-01-06 01:58:37 +010056/*!
Yann Collet4856a002015-01-24 01:58:16 +010057FSE_compress():
58 Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
Yann Colleta7875502015-08-07 15:21:00 +010059 'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize)
60 return : size of compressed data (<= maxDstSize)
61 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
62 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
63 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
Yann Collet4856a002015-01-24 01:58:16 +010064
65FSE_decompress():
66 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
67 into already allocated destination buffer 'dst', of size 'maxDstSize'.
Yann Collet4856a002015-01-24 01:58:16 +010068 return : size of regenerated data (<= maxDstSize)
69 or an error code, which can be tested using FSE_isError()
Yann Collet4856a002015-01-24 01:58:16 +010070
Yann Collet1efa31f2015-07-04 15:56:41 -080071 ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
72 Why ? : making this distinction requires a header.
Yann Collete8c6bb12015-07-26 00:23:57 +010073 Header management is intentionally delegated to the user layer, which can better manage special cases.
74*/
75
76
Yann Collet3b994cb2016-01-06 01:58:37 +010077/* *****************************************
Yann Collet4856a002015-01-24 01:58:16 +010078* Tool functions
79******************************************/
80size_t FSE_compressBound(size_t size); /* maximum compressed size */
81
82/* Error Management */
83unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
84const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
85
86
Yann Collet3b994cb2016-01-06 01:58:37 +010087/* *****************************************
Yann Collet4856a002015-01-24 01:58:16 +010088* FSE advanced functions
89******************************************/
Yann Collet3b994cb2016-01-06 01:58:37 +010090/*!
Yann Collet4856a002015-01-24 01:58:16 +010091FSE_compress2():
92 Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
93 Both parameters can be defined as '0' to mean : use default value
94 return : size of compressed data
Yann Collet1efa31f2015-07-04 15:56:41 -080095 Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
96 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
97 if FSE_isError(return), it's an error code.
Yann Collet4856a002015-01-24 01:58:16 +010098*/
99size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
100
101
Yann Collet3b994cb2016-01-06 01:58:37 +0100102/* *****************************************
Yann Collet1efa31f2015-07-04 15:56:41 -0800103* FSE detailed API
Yann Collet4856a002015-01-24 01:58:16 +0100104******************************************/
Yann Collet3b994cb2016-01-06 01:58:37 +0100105/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800106FSE_compress() does the following:
Yann Collet4856a002015-01-24 01:58:16 +01001071. count symbol occurrence from source[] into table count[]
1082. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
Yann Colleta7875502015-08-07 15:21:00 +01001093. save normalized counters to memory buffer using writeNCount()
Yann Collet4856a002015-01-24 01:58:16 +01001104. build encoding table 'CTable' from normalized counters
Yann Collet1efa31f2015-07-04 15:56:41 -08001115. encode the data stream using encoding table 'CTable'
Yann Collet4856a002015-01-24 01:58:16 +0100112
Yann Collet1efa31f2015-07-04 15:56:41 -0800113FSE_decompress() does the following:
Yann Colleta7875502015-08-07 15:21:00 +01001141. read normalized counters with readNCount()
Yann Collet4856a002015-01-24 01:58:16 +01001152. build decoding table 'DTable' from normalized counters
Yann Collet1efa31f2015-07-04 15:56:41 -08001163. decode the data stream using decoding table 'DTable'
Yann Collet4856a002015-01-24 01:58:16 +0100117
Yann Colleta7875502015-08-07 15:21:00 +0100118The following API allows targeting specific sub-functions for advanced tasks.
Yann Collet1efa31f2015-07-04 15:56:41 -0800119For example, it's possible to compress several blocks using the same 'CTable',
Yann Colleta7875502015-08-07 15:21:00 +0100120or to save and provide normalized distribution using external method.
Yann Collet4856a002015-01-24 01:58:16 +0100121*/
122
123/* *** COMPRESSION *** */
124
Yann Collet3b994cb2016-01-06 01:58:37 +0100125/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800126FSE_count():
Yann Collet4ddb1f52016-01-28 03:24:53 +0100127 Provides the precise count of each byte within a table 'count'
128 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
129 *maxSymbolValuePtr will be updated if detected smaller than initial value.
130 @return : the count of the most frequent symbol (which is not identified)
131 if return == srcSize, there is only one symbol.
132 Can also return an error code, which can be tested with FSE_isError() */
133size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
Yann Collet4856a002015-01-24 01:58:16 +0100134
Yann Collet3b994cb2016-01-06 01:58:37 +0100135/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800136FSE_optimalTableLog():
137 dynamically downsize 'tableLog' when conditions are met.
138 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
139 return : recommended tableLog (necessarily <= initial 'tableLog') */
140unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
141
Yann Collet3b994cb2016-01-06 01:58:37 +0100142/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800143FSE_normalizeCount():
144 normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
145 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
146 return : tableLog,
147 or an errorCode, which can be tested using FSE_isError() */
148size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
149
Yann Collet3b994cb2016-01-06 01:58:37 +0100150/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800151FSE_NCountWriteBound():
152 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
153 Typically useful for allocation purpose. */
154size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
155
Yann Collet3b994cb2016-01-06 01:58:37 +0100156/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800157FSE_writeNCount():
158 Compactly save 'normalizedCounter' into 'buffer'.
159 return : size of the compressed table
160 or an errorCode, which can be tested using FSE_isError() */
161size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
162
163
Yann Collet3b994cb2016-01-06 01:58:37 +0100164/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800165Constructor and Destructor of type FSE_CTable
Yann Colleta7875502015-08-07 15:21:00 +0100166 Note that its size depends on 'tableLog' and 'maxSymbolValue' */
Yann Collet3b994cb2016-01-06 01:58:37 +0100167typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
Yann Collet1efa31f2015-07-04 15:56:41 -0800168FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
169void FSE_freeCTable (FSE_CTable* ct);
170
Yann Collet3b994cb2016-01-06 01:58:37 +0100171/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800172FSE_buildCTable():
Yann Colletae7aa062016-02-03 02:46:46 +0100173 Builds @ct, which must be already allocated, using FSE_createCTable()
Yann Collet1efa31f2015-07-04 15:56:41 -0800174 return : 0
175 or an errorCode, which can be tested using FSE_isError() */
Yann Colleta7875502015-08-07 15:21:00 +0100176size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
Yann Collet1efa31f2015-07-04 15:56:41 -0800177
Yann Collet3b994cb2016-01-06 01:58:37 +0100178/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800179FSE_compress_usingCTable():
Yann Colletae7aa062016-02-03 02:46:46 +0100180 Compress @src using @ct into @dst which must be already allocated
181 return : size of compressed data (<= @dstCapacity)
182 or 0 if compressed data could not fit into @dst
Yann Collet1efa31f2015-07-04 15:56:41 -0800183 or an errorCode, which can be tested using FSE_isError() */
Yann Colletae7aa062016-02-03 02:46:46 +0100184size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
Yann Collet1efa31f2015-07-04 15:56:41 -0800185
Yann Collet3b994cb2016-01-06 01:58:37 +0100186/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800187Tutorial :
188----------
Yann Colleta7875502015-08-07 15:21:00 +0100189The first step is to count all symbols. FSE_count() does this job very fast.
Yann Collet1efa31f2015-07-04 15:56:41 -0800190Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
191'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
192maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
Yann Collet4856a002015-01-24 01:58:16 +0100193FSE_count() will return the number of occurrence of the most frequent symbol.
Yann Colleta7875502015-08-07 15:21:00 +0100194This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
Yann Collet4856a002015-01-24 01:58:16 +0100195If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
196
197The next step is to normalize the frequencies.
198FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
Yann Colleta7875502015-08-07 15:21:00 +0100199It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
200You can use 'tableLog'==0 to mean "use default tableLog value".
201If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
Yann Collet4856a002015-01-24 01:58:16 +0100202which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
203
204The result of FSE_normalizeCount() will be saved into a table,
205called 'normalizedCounter', which is a table of signed short.
206'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
207The return value is tableLog if everything proceeded as expected.
208It is 0 if there is a single symbol within distribution.
Yann Colleta7875502015-08-07 15:21:00 +0100209If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100210
Yann Colleta7875502015-08-07 15:21:00 +0100211'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
212'buffer' must be already allocated.
Yann Collet4856a002015-01-24 01:58:16 +0100213For guaranteed success, buffer size must be at least FSE_headerBound().
Yann Colleta7875502015-08-07 15:21:00 +0100214The result of the function is the number of bytes written into 'buffer'.
215If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
Yann Collet4856a002015-01-24 01:58:16 +0100216
Yann Collet1efa31f2015-07-04 15:56:41 -0800217'normalizedCounter' can then be used to create the compression table 'CTable'.
Yann Colleta7875502015-08-07 15:21:00 +0100218The space required by 'CTable' must be already allocated, using FSE_createCTable().
Yann Collet4856a002015-01-24 01:58:16 +0100219You can then use FSE_buildCTable() to fill 'CTable'.
Yann Colleta7875502015-08-07 15:21:00 +0100220If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100221
Yann Collet1efa31f2015-07-04 15:56:41 -0800222'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
223Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
Yann Colletae7aa062016-02-03 02:46:46 +0100224The function returns the size of compressed data (without header), necessarily <= @dstCapacity.
Yann Colleta7875502015-08-07 15:21:00 +0100225If it returns '0', compressed data could not fit into 'dst'.
Yann Collet1efa31f2015-07-04 15:56:41 -0800226If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100227*/
228
229
230/* *** DECOMPRESSION *** */
231
Yann Collet3b994cb2016-01-06 01:58:37 +0100232/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800233FSE_readNCount():
234 Read compactly saved 'normalizedCounter' from 'rBuffer'.
235 return : size read from 'rBuffer'
236 or an errorCode, which can be tested using FSE_isError()
237 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
238size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
Yann Collet4856a002015-01-24 01:58:16 +0100239
Yann Collet3b994cb2016-01-06 01:58:37 +0100240/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800241Constructor and Destructor of type FSE_DTable
Yann Colleta7875502015-08-07 15:21:00 +0100242 Note that its size depends on 'tableLog' */
243typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
Yann Collet1efa31f2015-07-04 15:56:41 -0800244FSE_DTable* FSE_createDTable(unsigned tableLog);
245void FSE_freeDTable(FSE_DTable* dt);
246
Yann Collet3b994cb2016-01-06 01:58:37 +0100247/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800248FSE_buildDTable():
249 Builds 'dt', which must be already allocated, using FSE_createDTable()
Yann Colleta7875502015-08-07 15:21:00 +0100250 return : 0,
Yann Collet1efa31f2015-07-04 15:56:41 -0800251 or an errorCode, which can be tested using FSE_isError() */
252size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
253
Yann Collet3b994cb2016-01-06 01:58:37 +0100254/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800255FSE_decompress_usingDTable():
Yann Colletae7aa062016-02-03 02:46:46 +0100256 Decompress compressed source @cSrc of size @cSrcSize using @dt
257 into @dst which must be already allocated.
258 return : size of regenerated data (necessarily <= @dstCapacity)
Yann Collet1efa31f2015-07-04 15:56:41 -0800259 or an errorCode, which can be tested using FSE_isError() */
Yann Colletae7aa062016-02-03 02:46:46 +0100260size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
Yann Collet1efa31f2015-07-04 15:56:41 -0800261
Yann Collet3b994cb2016-01-06 01:58:37 +0100262/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800263Tutorial :
264----------
265(Note : these functions only decompress FSE-compressed blocks.
266 If block is uncompressed, use memcpy() instead
267 If block is a single repeated byte, use memset() instead )
Yann Collet4856a002015-01-24 01:58:16 +0100268
269The first step is to obtain the normalized frequencies of symbols.
Yann Colleta7875502015-08-07 15:21:00 +0100270This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
271'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
Yann Collet4856a002015-01-24 01:58:16 +0100272In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
273or size the table to handle worst case situations (typically 256).
Yann Colleta7875502015-08-07 15:21:00 +0100274FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
275The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
276Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
Yann Collet4856a002015-01-24 01:58:16 +0100277If there is an error, the function will return an error code, which can be tested using FSE_isError().
278
Yann Colleta7875502015-08-07 15:21:00 +0100279The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
Yann Collet4856a002015-01-24 01:58:16 +0100280This is performed by the function FSE_buildDTable().
Yann Collet1efa31f2015-07-04 15:56:41 -0800281The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
Yann Collet4856a002015-01-24 01:58:16 +0100282If there is an error, the function will return an error code, which can be tested using FSE_isError().
283
Yann Collet1efa31f2015-07-04 15:56:41 -0800284'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
Yann Colleta7875502015-08-07 15:21:00 +0100285'cSrcSize' must be strictly correct, otherwise decompression will fail.
286FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
287If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
Yann Collet4856a002015-01-24 01:58:16 +0100288*/
289
290
Yann Collet4856a002015-01-24 01:58:16 +0100291#if defined (__cplusplus)
292}
293#endif
Yann Colletaa074052015-10-30 11:21:50 +0100294
295#endif /* FSE_H */