blob: 6dce683002d0148bd7e31b3c7a6f8bee5e81c296 [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/* ******************************************************************
Yann Collet944d0d22016-03-04 19:26:59 +01002 FSE : Finite State Entropy codec
3 Public Prototypes declaration
4 Copyright (C) 2013-2016, Yann Collet.
Yann Collet4856a002015-01-24 01:58:16 +01005
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
Yann Collet4856a002015-01-24 01:58:16 +010033****************************************************************** */
Yann Colletaa074052015-10-30 11:21:50 +010034#ifndef FSE_H
35#define FSE_H
Yann Collet4856a002015-01-24 01:58:16 +010036
37#if defined (__cplusplus)
38extern "C" {
39#endif
40
41
Yann Collet944d0d22016-03-04 19:26:59 +010042/*-*****************************************
43* Dependencies
Yann Collet4856a002015-01-24 01:58:16 +010044******************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -080045#include <stddef.h> /* size_t, ptrdiff_t */
Yann Collet4856a002015-01-24 01:58:16 +010046
47
Yann Colletfb810d62016-01-28 00:18:06 +010048/*-****************************************
Yann Collet4856a002015-01-24 01:58:16 +010049* FSE simple functions
50******************************************/
Yann Collet944d0d22016-03-04 19:26:59 +010051/*! FSE_compress() :
Yann Collet4856a002015-01-24 01:58:16 +010052 Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
Yann Collet944d0d22016-03-04 19:26:59 +010053 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
54 @return : size of compressed data (<= dstCapacity).
Yann Colleta7875502015-08-07 15:21:00 +010055 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
56 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
57 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
Yann Collet944d0d22016-03-04 19:26:59 +010058*/
59size_t FSE_compress(void* dst, size_t dstCapacity,
60 const void* src, size_t srcSize);
Yann Collet4856a002015-01-24 01:58:16 +010061
Yann Collet944d0d22016-03-04 19:26:59 +010062/*! FSE_decompress():
Yann Collet4856a002015-01-24 01:58:16 +010063 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
Yann Collet944d0d22016-03-04 19:26:59 +010064 into already allocated destination buffer 'dst', of size 'dstCapacity'.
65 @return : size of regenerated data (<= maxDstSize),
66 or an error code, which can be tested using FSE_isError() .
Yann Collet4856a002015-01-24 01:58:16 +010067
Yann Collet944d0d22016-03-04 19:26:59 +010068 ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
Yann Collet1efa31f2015-07-04 15:56:41 -080069 Why ? : making this distinction requires a header.
Yann Collete8c6bb12015-07-26 00:23:57 +010070 Header management is intentionally delegated to the user layer, which can better manage special cases.
71*/
Yann Collet944d0d22016-03-04 19:26:59 +010072size_t FSE_decompress(void* dst, size_t dstCapacity,
73 const void* cSrc, size_t cSrcSize);
Yann Collete8c6bb12015-07-26 00:23:57 +010074
75
Yann Collet944d0d22016-03-04 19:26:59 +010076/*-*****************************************
Yann Collet4856a002015-01-24 01:58:16 +010077* Tool functions
78******************************************/
79size_t FSE_compressBound(size_t size); /* maximum compressed size */
80
81/* Error Management */
82unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
83const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
84
85
Yann Collet944d0d22016-03-04 19:26:59 +010086/*-*****************************************
Yann Collet4856a002015-01-24 01:58:16 +010087* FSE advanced functions
88******************************************/
Yann Collet944d0d22016-03-04 19:26:59 +010089/*! FSE_compress2() :
Yann Collet4856a002015-01-24 01:58:16 +010090 Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
91 Both parameters can be defined as '0' to mean : use default value
Yann Collet944d0d22016-03-04 19:26:59 +010092 @return : size of compressed data
Yann Collet1efa31f2015-07-04 15:56:41 -080093 Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
94 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
95 if FSE_isError(return), it's an error code.
Yann Collet4856a002015-01-24 01:58:16 +010096*/
97size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
98
99
Yann Collet944d0d22016-03-04 19:26:59 +0100100/*-*****************************************
Yann Collet1efa31f2015-07-04 15:56:41 -0800101* FSE detailed API
Yann Collet4856a002015-01-24 01:58:16 +0100102******************************************/
Yann Collet3b994cb2016-01-06 01:58:37 +0100103/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800104FSE_compress() does the following:
Yann Collet4856a002015-01-24 01:58:16 +01001051. count symbol occurrence from source[] into table count[]
1062. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
Yann Colleta7875502015-08-07 15:21:00 +01001073. save normalized counters to memory buffer using writeNCount()
Yann Collet4856a002015-01-24 01:58:16 +01001084. build encoding table 'CTable' from normalized counters
Yann Collet1efa31f2015-07-04 15:56:41 -08001095. encode the data stream using encoding table 'CTable'
Yann Collet4856a002015-01-24 01:58:16 +0100110
Yann Collet1efa31f2015-07-04 15:56:41 -0800111FSE_decompress() does the following:
Yann Colleta7875502015-08-07 15:21:00 +01001121. read normalized counters with readNCount()
Yann Collet4856a002015-01-24 01:58:16 +01001132. build decoding table 'DTable' from normalized counters
Yann Collet1efa31f2015-07-04 15:56:41 -08001143. decode the data stream using decoding table 'DTable'
Yann Collet4856a002015-01-24 01:58:16 +0100115
Yann Colleta7875502015-08-07 15:21:00 +0100116The following API allows targeting specific sub-functions for advanced tasks.
Yann Collet1efa31f2015-07-04 15:56:41 -0800117For example, it's possible to compress several blocks using the same 'CTable',
Yann Colleta7875502015-08-07 15:21:00 +0100118or to save and provide normalized distribution using external method.
Yann Collet4856a002015-01-24 01:58:16 +0100119*/
120
121/* *** COMPRESSION *** */
122
Yann Collet944d0d22016-03-04 19:26:59 +0100123/*! FSE_count():
124 Provides the precise count of each byte within a table 'count'.
125 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
126 *maxSymbolValuePtr will be updated if detected smaller than initial value.
127 @return : the count of the most frequent symbol (which is not identified).
128 if return == srcSize, there is only one symbol.
129 Can also return an error code, which can be tested with FSE_isError(). */
Yann Collet4ddb1f52016-01-28 03:24:53 +0100130size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
Yann Collet4856a002015-01-24 01:58:16 +0100131
Yann Collet944d0d22016-03-04 19:26:59 +0100132/*! FSE_optimalTableLog():
133 dynamically downsize 'tableLog' when conditions are met.
134 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
135 @return : recommended tableLog (necessarily <= initial 'tableLog') */
Yann Collet1efa31f2015-07-04 15:56:41 -0800136unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
137
Yann Collet944d0d22016-03-04 19:26:59 +0100138/*! FSE_normalizeCount():
139 normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
140 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
141 @return : tableLog,
142 or an errorCode, which can be tested using FSE_isError() */
Yann Collet1efa31f2015-07-04 15:56:41 -0800143size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
144
Yann Collet944d0d22016-03-04 19:26:59 +0100145/*! FSE_NCountWriteBound():
146 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
147 Typically useful for allocation purpose. */
Yann Collet1efa31f2015-07-04 15:56:41 -0800148size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
149
Yann Collet944d0d22016-03-04 19:26:59 +0100150/*! FSE_writeNCount():
151 Compactly save 'normalizedCounter' into 'buffer'.
152 @return : size of the compressed table,
153 or an errorCode, which can be tested using FSE_isError(). */
Yann Collet1efa31f2015-07-04 15:56:41 -0800154size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
155
156
Yann Collet944d0d22016-03-04 19:26:59 +0100157/*! Constructor and Destructor of FSE_CTable.
158 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
Yann Collet3b994cb2016-01-06 01:58:37 +0100159typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
Yann Collet1efa31f2015-07-04 15:56:41 -0800160FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
161void FSE_freeCTable (FSE_CTable* ct);
162
Yann Collet944d0d22016-03-04 19:26:59 +0100163/*! FSE_buildCTable():
164 Builds `ct`, which must be already allocated, using FSE_createCTable().
165 @return : 0, or an errorCode, which can be tested using FSE_isError() */
Yann Colleta7875502015-08-07 15:21:00 +0100166size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
Yann Collet1efa31f2015-07-04 15:56:41 -0800167
Yann Collet944d0d22016-03-04 19:26:59 +0100168/*! FSE_compress_usingCTable():
169 Compress `src` using `ct` into `dst` which must be already allocated.
170 @return : size of compressed data (<= `dstCapacity`),
171 or 0 if compressed data could not fit into `dst`,
172 or an errorCode, which can be tested using FSE_isError() */
Yann Colletae7aa062016-02-03 02:46:46 +0100173size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
Yann Collet1efa31f2015-07-04 15:56:41 -0800174
Yann Collet3b994cb2016-01-06 01:58:37 +0100175/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800176Tutorial :
177----------
Yann Colleta7875502015-08-07 15:21:00 +0100178The first step is to count all symbols. FSE_count() does this job very fast.
Yann Collet1efa31f2015-07-04 15:56:41 -0800179Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
180'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
181maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
Yann Collet4856a002015-01-24 01:58:16 +0100182FSE_count() will return the number of occurrence of the most frequent symbol.
Yann Colleta7875502015-08-07 15:21:00 +0100183This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
Yann Collet4856a002015-01-24 01:58:16 +0100184If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
185
186The next step is to normalize the frequencies.
187FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
Yann Colleta7875502015-08-07 15:21:00 +0100188It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
189You can use 'tableLog'==0 to mean "use default tableLog value".
190If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
Yann Collet4856a002015-01-24 01:58:16 +0100191which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
192
193The result of FSE_normalizeCount() will be saved into a table,
194called 'normalizedCounter', which is a table of signed short.
195'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
196The return value is tableLog if everything proceeded as expected.
197It is 0 if there is a single symbol within distribution.
Yann Colleta7875502015-08-07 15:21:00 +0100198If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100199
Yann Colleta7875502015-08-07 15:21:00 +0100200'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
201'buffer' must be already allocated.
Yann Collet4856a002015-01-24 01:58:16 +0100202For guaranteed success, buffer size must be at least FSE_headerBound().
Yann Colleta7875502015-08-07 15:21:00 +0100203The result of the function is the number of bytes written into 'buffer'.
204If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
Yann Collet4856a002015-01-24 01:58:16 +0100205
Yann Collet1efa31f2015-07-04 15:56:41 -0800206'normalizedCounter' can then be used to create the compression table 'CTable'.
Yann Colleta7875502015-08-07 15:21:00 +0100207The space required by 'CTable' must be already allocated, using FSE_createCTable().
Yann Collet4856a002015-01-24 01:58:16 +0100208You can then use FSE_buildCTable() to fill 'CTable'.
Yann Colleta7875502015-08-07 15:21:00 +0100209If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100210
Yann Collet1efa31f2015-07-04 15:56:41 -0800211'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
212Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
Yann Collet944d0d22016-03-04 19:26:59 +0100213The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
Yann Colleta7875502015-08-07 15:21:00 +0100214If it returns '0', compressed data could not fit into 'dst'.
Yann Collet1efa31f2015-07-04 15:56:41 -0800215If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100216*/
217
218
219/* *** DECOMPRESSION *** */
220
Yann Collet944d0d22016-03-04 19:26:59 +0100221/*! FSE_readNCount():
222 Read compactly saved 'normalizedCounter' from 'rBuffer'.
223 @return : size read from 'rBuffer',
224 or an errorCode, which can be tested using FSE_isError().
225 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
Yann Collet1efa31f2015-07-04 15:56:41 -0800226size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
Yann Collet4856a002015-01-24 01:58:16 +0100227
Yann Collet944d0d22016-03-04 19:26:59 +0100228/*! Constructor and Destructor of FSE_DTable.
Yann Colleta7875502015-08-07 15:21:00 +0100229 Note that its size depends on 'tableLog' */
230typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
Yann Collet1efa31f2015-07-04 15:56:41 -0800231FSE_DTable* FSE_createDTable(unsigned tableLog);
232void FSE_freeDTable(FSE_DTable* dt);
233
Yann Collet944d0d22016-03-04 19:26:59 +0100234/*! FSE_buildDTable():
235 Builds 'dt', which must be already allocated, using FSE_createDTable().
236 return : 0, or an errorCode, which can be tested using FSE_isError() */
Yann Collet1efa31f2015-07-04 15:56:41 -0800237size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
238
Yann Collet944d0d22016-03-04 19:26:59 +0100239/*! FSE_decompress_usingDTable():
240 Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
241 into `dst` which must be already allocated.
242 @return : size of regenerated data (necessarily <= `dstCapacity`),
243 or an errorCode, which can be tested using FSE_isError() */
Yann Colletae7aa062016-02-03 02:46:46 +0100244size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
Yann Collet1efa31f2015-07-04 15:56:41 -0800245
Yann Collet3b994cb2016-01-06 01:58:37 +0100246/*!
Yann Collet1efa31f2015-07-04 15:56:41 -0800247Tutorial :
248----------
249(Note : these functions only decompress FSE-compressed blocks.
250 If block is uncompressed, use memcpy() instead
251 If block is a single repeated byte, use memset() instead )
Yann Collet4856a002015-01-24 01:58:16 +0100252
253The first step is to obtain the normalized frequencies of symbols.
Yann Colleta7875502015-08-07 15:21:00 +0100254This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
255'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
Yann Collet4856a002015-01-24 01:58:16 +0100256In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
257or size the table to handle worst case situations (typically 256).
Yann Colleta7875502015-08-07 15:21:00 +0100258FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
259The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
260Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
Yann Collet4856a002015-01-24 01:58:16 +0100261If there is an error, the function will return an error code, which can be tested using FSE_isError().
262
Yann Colleta7875502015-08-07 15:21:00 +0100263The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
Yann Collet4856a002015-01-24 01:58:16 +0100264This is performed by the function FSE_buildDTable().
Yann Collet1efa31f2015-07-04 15:56:41 -0800265The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
Yann Collet4856a002015-01-24 01:58:16 +0100266If there is an error, the function will return an error code, which can be tested using FSE_isError().
267
Yann Collet944d0d22016-03-04 19:26:59 +0100268`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
269`cSrcSize` must be strictly correct, otherwise decompression will fail.
270FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
Yann Colleta7875502015-08-07 15:21:00 +0100271If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
Yann Collet4856a002015-01-24 01:58:16 +0100272*/
273
274
Yann Collet4856a002015-01-24 01:58:16 +0100275#if defined (__cplusplus)
276}
277#endif
Yann Colletaa074052015-10-30 11:21:50 +0100278
279#endif /* FSE_H */