blob: 4b1ecb15b7471d7af5c08435c08cf45643869d52 [file] [log] [blame]
Yann Collet4856a002015-01-24 01:58:16 +01001/* ******************************************************************
2 FSE : Finite State Entropy coder
3 header file
4 Copyright (C) 2013-2015, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 - Public forum : https://groups.google.com/forum/#!forum/lz4c
34****************************************************************** */
35#pragma once
36
37#if defined (__cplusplus)
38extern "C" {
39#endif
40
41
42/******************************************
43* Includes
44******************************************/
Yann Collet1efa31f2015-07-04 15:56:41 -080045#include <stddef.h> /* size_t, ptrdiff_t */
Yann Collet4856a002015-01-24 01:58:16 +010046
47
48/******************************************
49* FSE simple functions
50******************************************/
51size_t FSE_compress(void* dst, size_t maxDstSize,
52 const void* src, size_t srcSize);
Yann Collet1efa31f2015-07-04 15:56:41 -080053size_t FSE_decompress(void* dst, size_t maxDstSize,
Yann Collet4856a002015-01-24 01:58:16 +010054 const void* cSrc, size_t cSrcSize);
55/*
56FSE_compress():
57 Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
Yann Colleta7875502015-08-07 15:21:00 +010058 'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= FSE_compressBound(srcSize)
59 return : size of compressed data (<= maxDstSize)
60 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
61 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
62 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
Yann Collet4856a002015-01-24 01:58:16 +010063
64FSE_decompress():
65 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
66 into already allocated destination buffer 'dst', of size 'maxDstSize'.
Yann Collet4856a002015-01-24 01:58:16 +010067 return : size of regenerated data (<= maxDstSize)
68 or an error code, which can be tested using FSE_isError()
Yann Collet4856a002015-01-24 01:58:16 +010069
Yann Collet1efa31f2015-07-04 15:56:41 -080070 ** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
71 Why ? : making this distinction requires a header.
Yann Collete8c6bb12015-07-26 00:23:57 +010072 Header management is intentionally delegated to the user layer, which can better manage special cases.
73*/
74
75
76/******************************************
Yann Collet4856a002015-01-24 01:58:16 +010077* Tool functions
78******************************************/
79size_t FSE_compressBound(size_t size); /* maximum compressed size */
80
81/* Error Management */
82unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
83const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
84
85
86/******************************************
87* FSE advanced functions
88******************************************/
89/*
90FSE_compress2():
91 Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
92 Both parameters can be defined as '0' to mean : use default value
93 return : size of compressed data
Yann Collet1efa31f2015-07-04 15:56:41 -080094 Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
95 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
96 if FSE_isError(return), it's an error code.
Yann Collet4856a002015-01-24 01:58:16 +010097*/
98size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
99
100
101/******************************************
Yann Collet1efa31f2015-07-04 15:56:41 -0800102* FSE detailed API
Yann Collet4856a002015-01-24 01:58:16 +0100103******************************************/
104/*
Yann Collet1efa31f2015-07-04 15:56:41 -0800105FSE_compress() does the following:
Yann Collet4856a002015-01-24 01:58:16 +01001061. count symbol occurrence from source[] into table count[]
1072. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
Yann Colleta7875502015-08-07 15:21:00 +01001083. save normalized counters to memory buffer using writeNCount()
Yann Collet4856a002015-01-24 01:58:16 +01001094. build encoding table 'CTable' from normalized counters
Yann Collet1efa31f2015-07-04 15:56:41 -08001105. encode the data stream using encoding table 'CTable'
Yann Collet4856a002015-01-24 01:58:16 +0100111
Yann Collet1efa31f2015-07-04 15:56:41 -0800112FSE_decompress() does the following:
Yann Colleta7875502015-08-07 15:21:00 +01001131. read normalized counters with readNCount()
Yann Collet4856a002015-01-24 01:58:16 +01001142. build decoding table 'DTable' from normalized counters
Yann Collet1efa31f2015-07-04 15:56:41 -08001153. decode the data stream using decoding table 'DTable'
Yann Collet4856a002015-01-24 01:58:16 +0100116
Yann Colleta7875502015-08-07 15:21:00 +0100117The following API allows targeting specific sub-functions for advanced tasks.
Yann Collet1efa31f2015-07-04 15:56:41 -0800118For example, it's possible to compress several blocks using the same 'CTable',
Yann Colleta7875502015-08-07 15:21:00 +0100119or to save and provide normalized distribution using external method.
Yann Collet4856a002015-01-24 01:58:16 +0100120*/
121
122/* *** COMPRESSION *** */
123
Yann Collet1efa31f2015-07-04 15:56:41 -0800124/*
125FSE_count():
126 Provides the precise count of each symbol within a table 'count'
127 'count' is a table of unsigned int, of minimum size (maxSymbolValuePtr[0]+1).
128 maxSymbolValuePtr[0] will be updated if detected smaller than initially expected
129 return : the count of the most frequent symbol (which is not identified)
130 if return == srcSize, there is only one symbol.
131 if FSE_isError(return), it's an error code. */
132size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
Yann Collet4856a002015-01-24 01:58:16 +0100133
134/*
Yann Collet1efa31f2015-07-04 15:56:41 -0800135FSE_optimalTableLog():
136 dynamically downsize 'tableLog' when conditions are met.
137 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
138 return : recommended tableLog (necessarily <= initial 'tableLog') */
139unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
140
141/*
142FSE_normalizeCount():
143 normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
144 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
145 return : tableLog,
146 or an errorCode, which can be tested using FSE_isError() */
147size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
148
149/*
150FSE_NCountWriteBound():
151 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
152 Typically useful for allocation purpose. */
153size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
154
155/*
156FSE_writeNCount():
157 Compactly save 'normalizedCounter' into 'buffer'.
158 return : size of the compressed table
159 or an errorCode, which can be tested using FSE_isError() */
160size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
161
162
163/*
164Constructor and Destructor of type FSE_CTable
Yann Colleta7875502015-08-07 15:21:00 +0100165 Note that its size depends on 'tableLog' and 'maxSymbolValue' */
166typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */
Yann Collet1efa31f2015-07-04 15:56:41 -0800167FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
168void FSE_freeCTable (FSE_CTable* ct);
169
170/*
171FSE_buildCTable():
172 Builds 'ct', which must be already allocated, using FSE_createCTable()
173 return : 0
174 or an errorCode, which can be tested using FSE_isError() */
Yann Colleta7875502015-08-07 15:21:00 +0100175size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
Yann Collet1efa31f2015-07-04 15:56:41 -0800176
177/*
178FSE_compress_usingCTable():
179 Compress 'src' using 'ct' into 'dst' which must be already allocated
Yann Colleta7875502015-08-07 15:21:00 +0100180 return : size of compressed data (<= maxDstSize)
181 or 0 if compressed data could not fit into 'dst'
Yann Collet1efa31f2015-07-04 15:56:41 -0800182 or an errorCode, which can be tested using FSE_isError() */
Yann Colleta7875502015-08-07 15:21:00 +0100183size_t FSE_compress_usingCTable (void* dst, size_t maxDstSize, const void* src, size_t srcSize, const FSE_CTable* ct);
Yann Collet1efa31f2015-07-04 15:56:41 -0800184
185/*
186Tutorial :
187----------
Yann Colleta7875502015-08-07 15:21:00 +0100188The first step is to count all symbols. FSE_count() does this job very fast.
Yann Collet1efa31f2015-07-04 15:56:41 -0800189Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
190'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
191maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
Yann Collet4856a002015-01-24 01:58:16 +0100192FSE_count() will return the number of occurrence of the most frequent symbol.
Yann Colleta7875502015-08-07 15:21:00 +0100193This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
Yann Collet4856a002015-01-24 01:58:16 +0100194If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
195
196The next step is to normalize the frequencies.
197FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
Yann Colleta7875502015-08-07 15:21:00 +0100198It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
199You can use 'tableLog'==0 to mean "use default tableLog value".
200If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
Yann Collet4856a002015-01-24 01:58:16 +0100201which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
202
203The result of FSE_normalizeCount() will be saved into a table,
204called 'normalizedCounter', which is a table of signed short.
205'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
206The return value is tableLog if everything proceeded as expected.
207It is 0 if there is a single symbol within distribution.
Yann Colleta7875502015-08-07 15:21:00 +0100208If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100209
Yann Colleta7875502015-08-07 15:21:00 +0100210'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
211'buffer' must be already allocated.
Yann Collet4856a002015-01-24 01:58:16 +0100212For guaranteed success, buffer size must be at least FSE_headerBound().
Yann Colleta7875502015-08-07 15:21:00 +0100213The result of the function is the number of bytes written into 'buffer'.
214If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
Yann Collet4856a002015-01-24 01:58:16 +0100215
Yann Collet1efa31f2015-07-04 15:56:41 -0800216'normalizedCounter' can then be used to create the compression table 'CTable'.
Yann Colleta7875502015-08-07 15:21:00 +0100217The space required by 'CTable' must be already allocated, using FSE_createCTable().
Yann Collet4856a002015-01-24 01:58:16 +0100218You can then use FSE_buildCTable() to fill 'CTable'.
Yann Colleta7875502015-08-07 15:21:00 +0100219If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100220
Yann Collet1efa31f2015-07-04 15:56:41 -0800221'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
222Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
Yann Colleta7875502015-08-07 15:21:00 +0100223The function returns the size of compressed data (without header), necessarily <= maxDstSize.
224If it returns '0', compressed data could not fit into 'dst'.
Yann Collet1efa31f2015-07-04 15:56:41 -0800225If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
Yann Collet4856a002015-01-24 01:58:16 +0100226*/
227
228
229/* *** DECOMPRESSION *** */
230
Yann Collet1efa31f2015-07-04 15:56:41 -0800231/*
232FSE_readNCount():
233 Read compactly saved 'normalizedCounter' from 'rBuffer'.
234 return : size read from 'rBuffer'
235 or an errorCode, which can be tested using FSE_isError()
236 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
237size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
Yann Collet4856a002015-01-24 01:58:16 +0100238
239/*
Yann Collet1efa31f2015-07-04 15:56:41 -0800240Constructor and Destructor of type FSE_DTable
Yann Colleta7875502015-08-07 15:21:00 +0100241 Note that its size depends on 'tableLog' */
242typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
Yann Collet1efa31f2015-07-04 15:56:41 -0800243FSE_DTable* FSE_createDTable(unsigned tableLog);
244void FSE_freeDTable(FSE_DTable* dt);
245
246/*
247FSE_buildDTable():
248 Builds 'dt', which must be already allocated, using FSE_createDTable()
Yann Colleta7875502015-08-07 15:21:00 +0100249 return : 0,
Yann Collet1efa31f2015-07-04 15:56:41 -0800250 or an errorCode, which can be tested using FSE_isError() */
251size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
252
253/*
254FSE_decompress_usingDTable():
Yann Colleta7875502015-08-07 15:21:00 +0100255 Decompress compressed source 'cSrc' of size 'cSrcSize' using 'dt'
256 into 'dst' which must be already allocated.
Yann Collet1efa31f2015-07-04 15:56:41 -0800257 return : size of regenerated data (necessarily <= maxDstSize)
258 or an errorCode, which can be tested using FSE_isError() */
Yann Colleta7875502015-08-07 15:21:00 +0100259size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
Yann Collet1efa31f2015-07-04 15:56:41 -0800260
261/*
262Tutorial :
263----------
264(Note : these functions only decompress FSE-compressed blocks.
265 If block is uncompressed, use memcpy() instead
266 If block is a single repeated byte, use memset() instead )
Yann Collet4856a002015-01-24 01:58:16 +0100267
268The first step is to obtain the normalized frequencies of symbols.
Yann Colleta7875502015-08-07 15:21:00 +0100269This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
270'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
Yann Collet4856a002015-01-24 01:58:16 +0100271In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
272or size the table to handle worst case situations (typically 256).
Yann Colleta7875502015-08-07 15:21:00 +0100273FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
274The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
275Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
Yann Collet4856a002015-01-24 01:58:16 +0100276If there is an error, the function will return an error code, which can be tested using FSE_isError().
277
Yann Colleta7875502015-08-07 15:21:00 +0100278The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
Yann Collet4856a002015-01-24 01:58:16 +0100279This is performed by the function FSE_buildDTable().
Yann Collet1efa31f2015-07-04 15:56:41 -0800280The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
Yann Collet4856a002015-01-24 01:58:16 +0100281If there is an error, the function will return an error code, which can be tested using FSE_isError().
282
Yann Collet1efa31f2015-07-04 15:56:41 -0800283'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
Yann Colleta7875502015-08-07 15:21:00 +0100284'cSrcSize' must be strictly correct, otherwise decompression will fail.
285FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=maxDstSize).
286If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
Yann Collet4856a002015-01-24 01:58:16 +0100287*/
288
289
Yann Collet4856a002015-01-24 01:58:16 +0100290#if defined (__cplusplus)
291}
292#endif