Chanho Min | c72ac7a | 2013-07-08 16:01:49 -0700 | [diff] [blame] | 1 | /* |
| 2 | * LZ4 HC - High Compression Mode of LZ4 |
| 3 | * Copyright (C) 2011-2012, Yann Collet. |
| 4 | * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions are |
| 8 | * met: |
| 9 | * |
| 10 | * * Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * * Redistributions in binary form must reproduce the above |
| 13 | * copyright notice, this list of conditions and the following disclaimer |
| 14 | * in the documentation and/or other materials provided with the |
| 15 | * distribution. |
| 16 | * |
| 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 18 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 21 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 22 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 23 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 24 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 25 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 27 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | * |
| 29 | * You can contact the author at : |
| 30 | * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html |
| 31 | * - LZ4 source repository : http://code.google.com/p/lz4/ |
| 32 | * |
| 33 | * Changed for kernel use by: |
| 34 | * Chanho Min <chanho.min@lge.com> |
| 35 | */ |
| 36 | |
| 37 | #include <linux/module.h> |
| 38 | #include <linux/kernel.h> |
| 39 | #include <linux/lz4.h> |
| 40 | #include <asm/unaligned.h> |
| 41 | #include "lz4defs.h" |
| 42 | |
| 43 | struct lz4hc_data { |
| 44 | const u8 *base; |
| 45 | HTYPE hashtable[HASHTABLESIZE]; |
| 46 | u16 chaintable[MAXD]; |
| 47 | const u8 *nexttoupdate; |
| 48 | } __attribute__((__packed__)); |
| 49 | |
| 50 | static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) |
| 51 | { |
| 52 | memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); |
| 53 | memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); |
| 54 | |
| 55 | #if LZ4_ARCH64 |
| 56 | hc4->nexttoupdate = base + 1; |
| 57 | #else |
| 58 | hc4->nexttoupdate = base; |
| 59 | #endif |
| 60 | hc4->base = base; |
| 61 | return 1; |
| 62 | } |
| 63 | |
| 64 | /* Update chains up to ip (excluded) */ |
| 65 | static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) |
| 66 | { |
| 67 | u16 *chaintable = hc4->chaintable; |
| 68 | HTYPE *hashtable = hc4->hashtable; |
| 69 | #if LZ4_ARCH64 |
| 70 | const BYTE * const base = hc4->base; |
| 71 | #else |
| 72 | const int base = 0; |
| 73 | #endif |
| 74 | |
| 75 | while (hc4->nexttoupdate < ip) { |
| 76 | const u8 *p = hc4->nexttoupdate; |
| 77 | size_t delta = p - (hashtable[HASH_VALUE(p)] + base); |
| 78 | if (delta > MAX_DISTANCE) |
| 79 | delta = MAX_DISTANCE; |
| 80 | chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; |
| 81 | hashtable[HASH_VALUE(p)] = (p) - base; |
| 82 | hc4->nexttoupdate++; |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, |
| 87 | const u8 *const matchlimit) |
| 88 | { |
| 89 | const u8 *p1t = p1; |
| 90 | |
| 91 | while (p1t < matchlimit - (STEPSIZE - 1)) { |
| 92 | #if LZ4_ARCH64 |
| 93 | u64 diff = A64(p2) ^ A64(p1t); |
| 94 | #else |
| 95 | u32 diff = A32(p2) ^ A32(p1t); |
| 96 | #endif |
| 97 | if (!diff) { |
| 98 | p1t += STEPSIZE; |
| 99 | p2 += STEPSIZE; |
| 100 | continue; |
| 101 | } |
| 102 | p1t += LZ4_NBCOMMONBYTES(diff); |
| 103 | return p1t - p1; |
| 104 | } |
| 105 | #if LZ4_ARCH64 |
| 106 | if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { |
| 107 | p1t += 4; |
| 108 | p2 += 4; |
| 109 | } |
| 110 | #endif |
| 111 | |
| 112 | if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { |
| 113 | p1t += 2; |
| 114 | p2 += 2; |
| 115 | } |
| 116 | if ((p1t < matchlimit) && (*p2 == *p1t)) |
| 117 | p1t++; |
| 118 | return p1t - p1; |
| 119 | } |
| 120 | |
| 121 | static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, |
| 122 | const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) |
| 123 | { |
| 124 | u16 *const chaintable = hc4->chaintable; |
| 125 | HTYPE *const hashtable = hc4->hashtable; |
| 126 | const u8 *ref; |
| 127 | #if LZ4_ARCH64 |
| 128 | const BYTE * const base = hc4->base; |
| 129 | #else |
| 130 | const int base = 0; |
| 131 | #endif |
| 132 | int nbattempts = MAX_NB_ATTEMPTS; |
| 133 | size_t repl = 0, ml = 0; |
| 134 | u16 delta; |
| 135 | |
| 136 | /* HC4 match finder */ |
| 137 | lz4hc_insert(hc4, ip); |
| 138 | ref = hashtable[HASH_VALUE(ip)] + base; |
| 139 | |
| 140 | /* potential repetition */ |
| 141 | if (ref >= ip-4) { |
| 142 | /* confirmed */ |
| 143 | if (A32(ref) == A32(ip)) { |
| 144 | delta = (u16)(ip-ref); |
| 145 | repl = ml = lz4hc_commonlength(ip + MINMATCH, |
| 146 | ref + MINMATCH, matchlimit) + MINMATCH; |
| 147 | *matchpos = ref; |
| 148 | } |
| 149 | ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; |
| 150 | } |
| 151 | |
| 152 | while ((ref >= ip - MAX_DISTANCE) && nbattempts) { |
| 153 | nbattempts--; |
| 154 | if (*(ref + ml) == *(ip + ml)) { |
| 155 | if (A32(ref) == A32(ip)) { |
| 156 | size_t mlt = |
| 157 | lz4hc_commonlength(ip + MINMATCH, |
| 158 | ref + MINMATCH, matchlimit) + MINMATCH; |
| 159 | if (mlt > ml) { |
| 160 | ml = mlt; |
| 161 | *matchpos = ref; |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; |
| 166 | } |
| 167 | |
| 168 | /* Complete table */ |
| 169 | if (repl) { |
| 170 | const BYTE *ptr = ip; |
| 171 | const BYTE *end; |
| 172 | end = ip + repl - (MINMATCH-1); |
| 173 | /* Pre-Load */ |
| 174 | while (ptr < end - delta) { |
| 175 | chaintable[(size_t)(ptr) & MAXD_MASK] = delta; |
| 176 | ptr++; |
| 177 | } |
| 178 | do { |
| 179 | chaintable[(size_t)(ptr) & MAXD_MASK] = delta; |
| 180 | /* Head of chain */ |
| 181 | hashtable[HASH_VALUE(ptr)] = (ptr) - base; |
| 182 | ptr++; |
| 183 | } while (ptr < end); |
| 184 | hc4->nexttoupdate = end; |
| 185 | } |
| 186 | |
| 187 | return (int)ml; |
| 188 | } |
| 189 | |
| 190 | static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, |
| 191 | const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, |
| 192 | const u8 **matchpos, const u8 **startpos) |
| 193 | { |
| 194 | u16 *const chaintable = hc4->chaintable; |
| 195 | HTYPE *const hashtable = hc4->hashtable; |
| 196 | #if LZ4_ARCH64 |
| 197 | const BYTE * const base = hc4->base; |
| 198 | #else |
| 199 | const int base = 0; |
| 200 | #endif |
| 201 | const u8 *ref; |
| 202 | int nbattempts = MAX_NB_ATTEMPTS; |
| 203 | int delta = (int)(ip - startlimit); |
| 204 | |
| 205 | /* First Match */ |
| 206 | lz4hc_insert(hc4, ip); |
| 207 | ref = hashtable[HASH_VALUE(ip)] + base; |
| 208 | |
| 209 | while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) |
| 210 | && (nbattempts)) { |
| 211 | nbattempts--; |
| 212 | if (*(startlimit + longest) == *(ref - delta + longest)) { |
| 213 | if (A32(ref) == A32(ip)) { |
| 214 | const u8 *reft = ref + MINMATCH; |
| 215 | const u8 *ipt = ip + MINMATCH; |
| 216 | const u8 *startt = ip; |
| 217 | |
| 218 | while (ipt < matchlimit-(STEPSIZE - 1)) { |
| 219 | #if LZ4_ARCH64 |
| 220 | u64 diff = A64(reft) ^ A64(ipt); |
| 221 | #else |
| 222 | u32 diff = A32(reft) ^ A32(ipt); |
| 223 | #endif |
| 224 | |
| 225 | if (!diff) { |
| 226 | ipt += STEPSIZE; |
| 227 | reft += STEPSIZE; |
| 228 | continue; |
| 229 | } |
| 230 | ipt += LZ4_NBCOMMONBYTES(diff); |
| 231 | goto _endcount; |
| 232 | } |
| 233 | #if LZ4_ARCH64 |
| 234 | if ((ipt < (matchlimit - 3)) |
| 235 | && (A32(reft) == A32(ipt))) { |
| 236 | ipt += 4; |
| 237 | reft += 4; |
| 238 | } |
| 239 | ipt += 2; |
| 240 | #endif |
| 241 | if ((ipt < (matchlimit - 1)) |
| 242 | && (A16(reft) == A16(ipt))) { |
| 243 | reft += 2; |
| 244 | } |
| 245 | if ((ipt < matchlimit) && (*reft == *ipt)) |
| 246 | ipt++; |
| 247 | _endcount: |
| 248 | reft = ref; |
| 249 | |
| 250 | while ((startt > startlimit) |
| 251 | && (reft > hc4->base) |
| 252 | && (startt[-1] == reft[-1])) { |
| 253 | startt--; |
| 254 | reft--; |
| 255 | } |
| 256 | |
| 257 | if ((ipt - startt) > longest) { |
| 258 | longest = (int)(ipt - startt); |
| 259 | *matchpos = reft; |
| 260 | *startpos = startt; |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; |
| 265 | } |
| 266 | return longest; |
| 267 | } |
| 268 | |
| 269 | static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, |
| 270 | int ml, const u8 *ref) |
| 271 | { |
| 272 | int length, len; |
| 273 | u8 *token; |
| 274 | |
| 275 | /* Encode Literal length */ |
| 276 | length = (int)(*ip - *anchor); |
| 277 | token = (*op)++; |
| 278 | if (length >= (int)RUN_MASK) { |
| 279 | *token = (RUN_MASK << ML_BITS); |
| 280 | len = length - RUN_MASK; |
| 281 | for (; len > 254 ; len -= 255) |
| 282 | *(*op)++ = 255; |
| 283 | *(*op)++ = (u8)len; |
| 284 | } else |
| 285 | *token = (length << ML_BITS); |
| 286 | |
| 287 | /* Copy Literals */ |
| 288 | LZ4_BLINDCOPY(*anchor, *op, length); |
| 289 | |
| 290 | /* Encode Offset */ |
| 291 | LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); |
| 292 | |
| 293 | /* Encode MatchLength */ |
| 294 | len = (int)(ml - MINMATCH); |
| 295 | if (len >= (int)ML_MASK) { |
| 296 | *token += ML_MASK; |
| 297 | len -= ML_MASK; |
| 298 | for (; len > 509 ; len -= 510) { |
| 299 | *(*op)++ = 255; |
| 300 | *(*op)++ = 255; |
| 301 | } |
| 302 | if (len > 254) { |
| 303 | len -= 255; |
| 304 | *(*op)++ = 255; |
| 305 | } |
| 306 | *(*op)++ = (u8)len; |
| 307 | } else |
| 308 | *token += len; |
| 309 | |
| 310 | /* Prepare next loop */ |
| 311 | *ip += ml; |
| 312 | *anchor = *ip; |
| 313 | |
| 314 | return 0; |
| 315 | } |
| 316 | |
| 317 | static int lz4_compresshcctx(struct lz4hc_data *ctx, |
| 318 | const char *source, |
| 319 | char *dest, |
| 320 | int isize) |
| 321 | { |
| 322 | const u8 *ip = (const u8 *)source; |
| 323 | const u8 *anchor = ip; |
| 324 | const u8 *const iend = ip + isize; |
| 325 | const u8 *const mflimit = iend - MFLIMIT; |
| 326 | const u8 *const matchlimit = (iend - LASTLITERALS); |
| 327 | |
| 328 | u8 *op = (u8 *)dest; |
| 329 | |
| 330 | int ml, ml2, ml3, ml0; |
| 331 | const u8 *ref = NULL; |
| 332 | const u8 *start2 = NULL; |
| 333 | const u8 *ref2 = NULL; |
| 334 | const u8 *start3 = NULL; |
| 335 | const u8 *ref3 = NULL; |
| 336 | const u8 *start0; |
| 337 | const u8 *ref0; |
| 338 | int lastrun; |
| 339 | |
| 340 | ip++; |
| 341 | |
| 342 | /* Main Loop */ |
| 343 | while (ip < mflimit) { |
| 344 | ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); |
| 345 | if (!ml) { |
| 346 | ip++; |
| 347 | continue; |
| 348 | } |
| 349 | |
| 350 | /* saved, in case we would skip too much */ |
| 351 | start0 = ip; |
| 352 | ref0 = ref; |
| 353 | ml0 = ml; |
| 354 | _search2: |
| 355 | if (ip+ml < mflimit) |
| 356 | ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, |
| 357 | ip + 1, matchlimit, ml, &ref2, &start2); |
| 358 | else |
| 359 | ml2 = ml; |
| 360 | /* No better match */ |
| 361 | if (ml2 == ml) { |
| 362 | lz4_encodesequence(&ip, &op, &anchor, ml, ref); |
| 363 | continue; |
| 364 | } |
| 365 | |
| 366 | if (start0 < ip) { |
| 367 | /* empirical */ |
| 368 | if (start2 < ip + ml0) { |
| 369 | ip = start0; |
| 370 | ref = ref0; |
| 371 | ml = ml0; |
| 372 | } |
| 373 | } |
| 374 | /* |
| 375 | * Here, start0==ip |
| 376 | * First Match too small : removed |
| 377 | */ |
| 378 | if ((start2 - ip) < 3) { |
| 379 | ml = ml2; |
| 380 | ip = start2; |
| 381 | ref = ref2; |
| 382 | goto _search2; |
| 383 | } |
| 384 | |
| 385 | _search3: |
| 386 | /* |
| 387 | * Currently we have : |
| 388 | * ml2 > ml1, and |
| 389 | * ip1+3 <= ip2 (usually < ip1+ml1) |
| 390 | */ |
| 391 | if ((start2 - ip) < OPTIMAL_ML) { |
| 392 | int correction; |
| 393 | int new_ml = ml; |
| 394 | if (new_ml > OPTIMAL_ML) |
| 395 | new_ml = OPTIMAL_ML; |
| 396 | if (ip + new_ml > start2 + ml2 - MINMATCH) |
| 397 | new_ml = (int)(start2 - ip) + ml2 - MINMATCH; |
| 398 | correction = new_ml - (int)(start2 - ip); |
| 399 | if (correction > 0) { |
| 400 | start2 += correction; |
| 401 | ref2 += correction; |
| 402 | ml2 -= correction; |
| 403 | } |
| 404 | } |
| 405 | /* |
| 406 | * Now, we have start2 = ip+new_ml, |
| 407 | * with new_ml=min(ml, OPTIMAL_ML=18) |
| 408 | */ |
| 409 | if (start2 + ml2 < mflimit) |
| 410 | ml3 = lz4hc_insertandgetwidermatch(ctx, |
| 411 | start2 + ml2 - 3, start2, matchlimit, |
| 412 | ml2, &ref3, &start3); |
| 413 | else |
| 414 | ml3 = ml2; |
| 415 | |
| 416 | /* No better match : 2 sequences to encode */ |
| 417 | if (ml3 == ml2) { |
| 418 | /* ip & ref are known; Now for ml */ |
| 419 | if (start2 < ip+ml) |
| 420 | ml = (int)(start2 - ip); |
| 421 | |
| 422 | /* Now, encode 2 sequences */ |
| 423 | lz4_encodesequence(&ip, &op, &anchor, ml, ref); |
| 424 | ip = start2; |
| 425 | lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); |
| 426 | continue; |
| 427 | } |
| 428 | |
| 429 | /* Not enough space for match 2 : remove it */ |
| 430 | if (start3 < ip + ml + 3) { |
| 431 | /* |
| 432 | * can write Seq1 immediately ==> Seq2 is removed, |
| 433 | * so Seq3 becomes Seq1 |
| 434 | */ |
| 435 | if (start3 >= (ip + ml)) { |
| 436 | if (start2 < ip + ml) { |
| 437 | int correction = |
| 438 | (int)(ip + ml - start2); |
| 439 | start2 += correction; |
| 440 | ref2 += correction; |
| 441 | ml2 -= correction; |
| 442 | if (ml2 < MINMATCH) { |
| 443 | start2 = start3; |
| 444 | ref2 = ref3; |
| 445 | ml2 = ml3; |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | lz4_encodesequence(&ip, &op, &anchor, ml, ref); |
| 450 | ip = start3; |
| 451 | ref = ref3; |
| 452 | ml = ml3; |
| 453 | |
| 454 | start0 = start2; |
| 455 | ref0 = ref2; |
| 456 | ml0 = ml2; |
| 457 | goto _search2; |
| 458 | } |
| 459 | |
| 460 | start2 = start3; |
| 461 | ref2 = ref3; |
| 462 | ml2 = ml3; |
| 463 | goto _search3; |
| 464 | } |
| 465 | |
| 466 | /* |
| 467 | * OK, now we have 3 ascending matches; let's write at least |
| 468 | * the first one ip & ref are known; Now for ml |
| 469 | */ |
| 470 | if (start2 < ip + ml) { |
| 471 | if ((start2 - ip) < (int)ML_MASK) { |
| 472 | int correction; |
| 473 | if (ml > OPTIMAL_ML) |
| 474 | ml = OPTIMAL_ML; |
| 475 | if (ip + ml > start2 + ml2 - MINMATCH) |
| 476 | ml = (int)(start2 - ip) + ml2 |
| 477 | - MINMATCH; |
| 478 | correction = ml - (int)(start2 - ip); |
| 479 | if (correction > 0) { |
| 480 | start2 += correction; |
| 481 | ref2 += correction; |
| 482 | ml2 -= correction; |
| 483 | } |
| 484 | } else |
| 485 | ml = (int)(start2 - ip); |
| 486 | } |
| 487 | lz4_encodesequence(&ip, &op, &anchor, ml, ref); |
| 488 | |
| 489 | ip = start2; |
| 490 | ref = ref2; |
| 491 | ml = ml2; |
| 492 | |
| 493 | start2 = start3; |
| 494 | ref2 = ref3; |
| 495 | ml2 = ml3; |
| 496 | |
| 497 | goto _search3; |
| 498 | } |
| 499 | |
| 500 | /* Encode Last Literals */ |
| 501 | lastrun = (int)(iend - anchor); |
| 502 | if (lastrun >= (int)RUN_MASK) { |
| 503 | *op++ = (RUN_MASK << ML_BITS); |
| 504 | lastrun -= RUN_MASK; |
| 505 | for (; lastrun > 254 ; lastrun -= 255) |
| 506 | *op++ = 255; |
| 507 | *op++ = (u8) lastrun; |
| 508 | } else |
| 509 | *op++ = (lastrun << ML_BITS); |
| 510 | memcpy(op, anchor, iend - anchor); |
| 511 | op += iend - anchor; |
| 512 | /* End */ |
| 513 | return (int) (((char *)op) - dest); |
| 514 | } |
| 515 | |
| 516 | int lz4hc_compress(const unsigned char *src, size_t src_len, |
| 517 | unsigned char *dst, size_t *dst_len, void *wrkmem) |
| 518 | { |
| 519 | int ret = -1; |
| 520 | int out_len = 0; |
| 521 | |
| 522 | struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; |
| 523 | lz4hc_init(hc4, (const u8 *)src); |
| 524 | out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, |
| 525 | (char *)dst, (int)src_len); |
| 526 | |
| 527 | if (out_len < 0) |
| 528 | goto exit; |
| 529 | |
| 530 | *dst_len = out_len; |
| 531 | return 0; |
| 532 | |
| 533 | exit: |
| 534 | return ret; |
| 535 | } |
| 536 | EXPORT_SYMBOL_GPL(lz4hc_compress); |
| 537 | |
| 538 | MODULE_LICENSE("GPL"); |
| 539 | MODULE_DESCRIPTION("LZ4HC compressor"); |