| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 1 | /* |
| 2 | zstd - standard compression library |
| 3 | Header File for static linking only |
| 4 | Copyright (C) 2014-2015, Yann Collet. |
| 5 | |
| 6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
| 7 | |
| 8 | Redistribution and use in source and binary forms, with or without |
| 9 | modification, are permitted provided that the following conditions are |
| 10 | met: |
| 11 | * Redistributions of source code must retain the above copyright |
| 12 | notice, this list of conditions and the following disclaimer. |
| 13 | * Redistributions in binary form must reproduce the above |
| 14 | copyright notice, this list of conditions and the following disclaimer |
| 15 | in the documentation and/or other materials provided with the |
| 16 | distribution. |
| 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | |
| 29 | You can contact the author at : |
| 30 | - zstd source repository : https://github.com/Cyan4973/zstd |
| 31 | - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c |
| 32 | */ |
| 33 | #pragma once |
| 34 | |
| Yann Collet | 59aac5f | 2015-10-14 16:28:19 +0100 | [diff] [blame] | 35 | /* The objects defined into this file should be considered experimental. |
| 36 | * They are not labelled stable, as their prototype may change in the future. |
| 37 | * You can use them for tests, provide feedback, or if you can endure risk of future changes. |
| 38 | */ |
| 39 | |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 40 | #if defined (__cplusplus) |
| 41 | extern "C" { |
| 42 | #endif |
| 43 | |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 44 | /* ************************************* |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 45 | * Includes |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 46 | ***************************************/ |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 47 | #include "zstd.h" |
| 48 | |
| 49 | |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 50 | /* ************************************* |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 51 | * Streaming functions |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 52 | ***************************************/ |
| 53 | size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); |
| 54 | size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); |
| 55 | size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 56 | |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 57 | |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 58 | typedef struct ZSTD_DCtx_s ZSTD_DCtx; |
| 59 | ZSTD_DCtx* ZSTD_createDCtx(void); |
| 60 | size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx); |
| 61 | size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); |
| Yann Collet | 7393c5a | 2015-07-04 18:20:42 -0800 | [diff] [blame] | 62 | |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 63 | size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); |
| 64 | size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); |
| Yann Collet | 00be343 | 2015-02-20 18:53:31 +0100 | [diff] [blame] | 65 | /* |
| 66 | Use above functions alternatively. |
| Yann Collet | 59aac5f | 2015-10-14 16:28:19 +0100 | [diff] [blame] | 67 | ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). |
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 68 | ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. |
| Yann Collet | 59aac5f | 2015-10-14 16:28:19 +0100 | [diff] [blame] | 69 | Result is the number of bytes regenerated within 'dst'. |
| Yann Collet | 00be343 | 2015-02-20 18:53:31 +0100 | [diff] [blame] | 70 | It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. |
| 71 | */ |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 72 | |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 73 | /* ************************************* |
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 74 | * Prefix - version detection |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 75 | ***************************************/ |
| Yann Collet | 083fcc8 | 2015-10-25 14:06:35 +0100 | [diff] [blame^] | 76 | #define ZSTD_magicNumber 0xFD2FB523 /* v0.3 (current)*/ |
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 77 | |
| 78 | |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 79 | /* ************************************* |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 80 | * Error management |
| Yann Collet | 353c5d2 | 2015-10-21 14:39:26 +0100 | [diff] [blame] | 81 | ***************************************/ |
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 82 | #include "error.h" |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 83 | |
| 84 | |
| Yann Collet | f3eca25 | 2015-10-22 15:31:46 +0100 | [diff] [blame] | 85 | /* ************************************* |
| 86 | * Function body to include |
| 87 | ***************************************/ |
| 88 | #include "mem.h" |
| 89 | static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } |
| 90 | |
| 91 | MEM_STATIC unsigned ZSTD_NbCommonBytes (register size_t val) |
| 92 | { |
| 93 | if (MEM_isLittleEndian()) |
| 94 | { |
| 95 | if (MEM_64bits()) |
| 96 | { |
| 97 | # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 98 | unsigned long r = 0; |
| 99 | _BitScanForward64( &r, (U64)val ); |
| 100 | return (int)(r>>3); |
| 101 | # elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 102 | return (__builtin_ctzll((U64)val) >> 3); |
| 103 | # else |
| 104 | static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; |
| 105 | return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; |
| 106 | # endif |
| 107 | } |
| 108 | else /* 32 bits */ |
| 109 | { |
| 110 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 111 | unsigned long r; |
| 112 | _BitScanForward( &r, (U32)val ); |
| 113 | return (int)(r>>3); |
| 114 | # elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 115 | return (__builtin_ctz((U32)val) >> 3); |
| 116 | # else |
| 117 | static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; |
| 118 | return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; |
| 119 | # endif |
| 120 | } |
| 121 | } |
| 122 | else /* Big Endian CPU */ |
| 123 | { |
| 124 | if (MEM_32bits()) |
| 125 | { |
| 126 | # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 127 | unsigned long r = 0; |
| 128 | _BitScanReverse64( &r, val ); |
| 129 | return (unsigned)(r>>3); |
| 130 | # elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 131 | return (__builtin_clzll(val) >> 3); |
| 132 | # else |
| 133 | unsigned r; |
| 134 | const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ |
| 135 | if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } |
| 136 | if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } |
| 137 | r += (!val); |
| 138 | return r; |
| 139 | # endif |
| 140 | } |
| 141 | else /* 32 bits */ |
| 142 | { |
| 143 | # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 144 | unsigned long r = 0; |
| 145 | _BitScanReverse( &r, (unsigned long)val ); |
| 146 | return (unsigned)(r>>3); |
| 147 | # elif defined(__GNUC__) && (__GNUC__ >= 3) && !defined(LZ4_FORCE_SW_BITCOUNT) |
| 148 | return (__builtin_clz((U32)val) >> 3); |
| 149 | # else |
| 150 | unsigned r; |
| 151 | if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } |
| 152 | r += (!val); |
| 153 | return r; |
| 154 | # endif |
| 155 | } |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | |
| 160 | MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) |
| 161 | { |
| 162 | const BYTE* const pStart = pIn; |
| 163 | |
| 164 | while ((pIn<pInLimit-(sizeof(size_t)-1))) |
| 165 | { |
| 166 | size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn); |
| 167 | if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } |
| 168 | pIn += ZSTD_NbCommonBytes(diff); |
| 169 | return (size_t)(pIn - pStart); |
| 170 | } |
| 171 | |
| 172 | if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } |
| 173 | if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } |
| 174 | if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; |
| 175 | return (size_t)(pIn - pStart); |
| 176 | } |
| 177 | |
| 178 | |
| 179 | static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } |
| 180 | |
| 181 | #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } |
| 182 | |
| 183 | /*! ZSTD_wildcopy : custom version of memcpy(), can copy up to 7-8 bytes too many */ |
| 184 | static void ZSTD_wildcopy(void* dst, const void* src, size_t length) |
| 185 | { |
| 186 | const BYTE* ip = (const BYTE*)src; |
| 187 | BYTE* op = (BYTE*)dst; |
| 188 | BYTE* const oend = op + length; |
| 189 | do COPY8(op, ip) while (op < oend); |
| 190 | } |
| 191 | |
| 192 | |
| 193 | typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; |
| 194 | |
| 195 | typedef struct |
| 196 | { |
| 197 | blockType_t blockType; |
| 198 | U32 origSize; |
| 199 | } blockProperties_t; |
| 200 | |
| 201 | size_t ZSTD_noCompressBlock(void* op, size_t maxDstSize, const void* ip, size_t blockSize); |
| 202 | |
| 203 | |
| 204 | typedef struct { |
| 205 | void* buffer; |
| 206 | U32* offsetStart; |
| 207 | U32* offset; |
| 208 | BYTE* offCodeStart; |
| 209 | BYTE* offCode; |
| 210 | BYTE* litStart; |
| 211 | BYTE* lit; |
| 212 | BYTE* litLengthStart; |
| 213 | BYTE* litLength; |
| 214 | BYTE* matchLengthStart; |
| 215 | BYTE* matchLength; |
| 216 | BYTE* dumpsStart; |
| 217 | BYTE* dumps; |
| 218 | } seqStore_t; |
| 219 | |
| 220 | void ZSTD_resetSeqStore(seqStore_t* ssPtr); |
| 221 | |
| Yann Collet | ed0a781 | 2015-10-23 19:25:06 +0100 | [diff] [blame] | 222 | #define REPCODE_STARTVALUE 4 |
| Yann Collet | f3eca25 | 2015-10-22 15:31:46 +0100 | [diff] [blame] | 223 | #define MLbits 7 |
| 224 | #define LLbits 6 |
| 225 | #define Offbits 5 |
| 226 | #define MaxML ((1<<MLbits) - 1) |
| 227 | #define MaxLL ((1<<LLbits) - 1) |
| 228 | #define MaxOff 31 |
| 229 | |
| 230 | /** ZSTD_storeSeq |
| 231 | Store a sequence (literal length, literals, offset code and match length) into seqStore_t |
| 232 | @offsetCode : distance to match, or 0 == repCode |
| 233 | @matchCode : matchLength - MINMATCH |
| 234 | */ |
| 235 | MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) |
| 236 | { |
| 237 | /* copy Literals */ |
| 238 | ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); |
| 239 | seqStorePtr->lit += litLength; |
| 240 | |
| 241 | /* literal Length */ |
| 242 | if (litLength >= MaxLL) |
| 243 | { |
| 244 | *(seqStorePtr->litLength++) = MaxLL; |
| 245 | if (litLength<255 + MaxLL) |
| 246 | *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL); |
| 247 | else |
| 248 | { |
| 249 | *(seqStorePtr->dumps++) = 255; |
| 250 | MEM_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3; |
| 251 | } |
| 252 | } |
| 253 | else *(seqStorePtr->litLength++) = (BYTE)litLength; |
| 254 | |
| 255 | /* match offset */ |
| 256 | *(seqStorePtr->offset++) = (U32)offsetCode; |
| 257 | |
| 258 | /* match Length */ |
| 259 | if (matchCode >= MaxML) |
| 260 | { |
| 261 | *(seqStorePtr->matchLength++) = MaxML; |
| 262 | if (matchCode < 255+MaxML) |
| 263 | *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML); |
| 264 | else |
| 265 | { |
| 266 | *(seqStorePtr->dumps++) = 255; |
| 267 | MEM_writeLE32(seqStorePtr->dumps, (U32)matchCode); seqStorePtr->dumps += 3; |
| 268 | } |
| 269 | } |
| 270 | else *(seqStorePtr->matchLength++) = (BYTE)matchCode; |
| 271 | } |
| 272 | |
| 273 | size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize, const seqStore_t* seqStorePtr, size_t srcSize); |
| 274 | |
| Yann Collet | 439eb77 | 2015-01-31 10:52:59 +0100 | [diff] [blame] | 275 | #if defined (__cplusplus) |
| 276 | } |
| Yann Collet | c5d46b5 | 2015-02-16 18:06:26 +0100 | [diff] [blame] | 277 | #endif |