| /* |
| * Copyright (c) Facebook, Inc. |
| * All rights reserved. |
| * |
| * This source code is licensed under both the BSD-style license (found in the |
| * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| * in the COPYING file in the root directory of this source tree). |
| * You may select, at your option, one of the above-listed licenses. |
| */ |
| |
| #include "seqgen.h" |
| #include "mem.h" |
| #include <string.h> |
| |
| #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
| |
| static const size_t kMatchBytes = 128; |
| |
| #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r))) |
| static BYTE SEQ_randByte(unsigned* src) |
| { |
| static const U32 prime1 = 2654435761U; |
| static const U32 prime2 = 2246822519U; |
| U32 rand32 = *src; |
| rand32 *= prime1; |
| rand32 ^= prime2; |
| rand32 = SEQ_rotl32(rand32, 13); |
| *src = rand32; |
| return (BYTE)(rand32 >> 5); |
| } |
| |
| SEQ_stream SEQ_initStream(unsigned seed) |
| { |
| SEQ_stream stream; |
| stream.state = 0; |
| XXH64_reset(&stream.xxh, 0); |
| stream.seed = seed; |
| return stream; |
| } |
| |
| /* Generates a single guard byte, then match length + 1 of a different byte, |
| * then another guard byte. |
| */ |
| static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value, |
| SEQ_outBuffer* out) |
| { |
| typedef enum { |
| ml_first_byte = 0, |
| ml_match_bytes, |
| ml_last_byte, |
| } ml_state; |
| BYTE* const ostart = (BYTE*)out->dst; |
| BYTE* const oend = ostart + out->size; |
| BYTE* op = ostart + out->pos; |
| |
| switch ((ml_state)stream->state) { |
| case ml_first_byte: |
| /* Generate a single byte and pick a different byte for the match */ |
| if (op >= oend) { |
| stream->bytesLeft = 1; |
| break; |
| } |
| *op = SEQ_randByte(&stream->seed) & 0xFF; |
| do { |
| stream->saved = SEQ_randByte(&stream->seed) & 0xFF; |
| } while (*op == stream->saved); |
| ++op; |
| /* State transition */ |
| stream->state = ml_match_bytes; |
| stream->bytesLeft = value + 1; |
| /* fall-through */ |
| case ml_match_bytes: { |
| /* Copy matchLength + 1 bytes to the output buffer */ |
| size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); |
| if (setLength > 0) { |
| memset(op, stream->saved, setLength); |
| op += setLength; |
| stream->bytesLeft -= setLength; |
| } |
| if (stream->bytesLeft > 0) |
| break; |
| /* State transition */ |
| stream->state = ml_last_byte; |
| } |
| /* fall-through */ |
| case ml_last_byte: |
| /* Generate a single byte and pick a different byte for the match */ |
| if (op >= oend) { |
| stream->bytesLeft = 1; |
| break; |
| } |
| do { |
| *op = SEQ_randByte(&stream->seed) & 0xFF; |
| } while (*op == stream->saved); |
| ++op; |
| /* State transition */ |
| /* fall-through */ |
| default: |
| stream->state = 0; |
| stream->bytesLeft = 0; |
| break; |
| } |
| XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); |
| out->pos = op - ostart; |
| return stream->bytesLeft; |
| } |
| |
| /* Saves the current seed then generates kMatchBytes random bytes >= 128. |
| * Generates literal length - kMatchBytes random bytes < 128. |
| * Generates another kMatchBytes using the saved seed to generate a match. |
| * This way the match is easy to find for the compressors. |
| */ |
| static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) |
| { |
| typedef enum { |
| ll_start = 0, |
| ll_run_bytes, |
| ll_literals, |
| ll_run_match, |
| } ll_state; |
| BYTE* const ostart = (BYTE*)out->dst; |
| BYTE* const oend = ostart + out->size; |
| BYTE* op = ostart + out->pos; |
| |
| switch ((ll_state)stream->state) { |
| case ll_start: |
| stream->state = ll_run_bytes; |
| stream->saved = stream->seed; |
| stream->bytesLeft = MIN(kMatchBytes, value); |
| /* fall-through */ |
| case ll_run_bytes: |
| while (stream->bytesLeft > 0 && op < oend) { |
| *op++ = SEQ_randByte(&stream->seed) | 0x80; |
| --stream->bytesLeft; |
| } |
| if (stream->bytesLeft > 0) |
| break; |
| /* State transition */ |
| stream->state = ll_literals; |
| stream->bytesLeft = value - MIN(kMatchBytes, value); |
| /* fall-through */ |
| case ll_literals: |
| while (stream->bytesLeft > 0 && op < oend) { |
| *op++ = SEQ_randByte(&stream->seed) & 0x7F; |
| --stream->bytesLeft; |
| } |
| if (stream->bytesLeft > 0) |
| break; |
| /* State transition */ |
| stream->state = ll_run_match; |
| stream->bytesLeft = MIN(kMatchBytes, value); |
| /* fall-through */ |
| case ll_run_match: { |
| while (stream->bytesLeft > 0 && op < oend) { |
| *op++ = SEQ_randByte(&stream->saved) | 0x80; |
| --stream->bytesLeft; |
| } |
| if (stream->bytesLeft > 0) |
| break; |
| } |
| /* fall-through */ |
| default: |
| stream->state = 0; |
| stream->bytesLeft = 0; |
| break; |
| } |
| XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); |
| out->pos = op - ostart; |
| return stream->bytesLeft; |
| } |
| |
| /* Saves the current seed then generates kMatchBytes random bytes >= 128. |
| * Generates offset - kMatchBytes of zeros to get a large offset without |
| * polluting the hash tables. |
| * Generates another kMatchBytes using the saved seed to generate a with the |
| * required offset. |
| */ |
| static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) |
| { |
| typedef enum { |
| of_start = 0, |
| of_run_bytes, |
| of_offset, |
| of_run_match, |
| } of_state; |
| BYTE* const ostart = (BYTE*)out->dst; |
| BYTE* const oend = ostart + out->size; |
| BYTE* op = ostart + out->pos; |
| |
| switch ((of_state)stream->state) { |
| case of_start: |
| stream->state = of_run_bytes; |
| stream->saved = stream->seed; |
| stream->bytesLeft = MIN(value, kMatchBytes); |
| /* fall-through */ |
| case of_run_bytes: { |
| while (stream->bytesLeft > 0 && op < oend) { |
| *op++ = SEQ_randByte(&stream->seed) | 0x80; |
| --stream->bytesLeft; |
| } |
| if (stream->bytesLeft > 0) |
| break; |
| /* State transition */ |
| stream->state = of_offset; |
| stream->bytesLeft = value - MIN(value, kMatchBytes); |
| } |
| /* fall-through */ |
| case of_offset: { |
| /* Copy matchLength + 1 bytes to the output buffer */ |
| size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); |
| if (setLength > 0) { |
| memset(op, 0, setLength); |
| op += setLength; |
| stream->bytesLeft -= setLength; |
| } |
| if (stream->bytesLeft > 0) |
| break; |
| /* State transition */ |
| stream->state = of_run_match; |
| stream->bytesLeft = MIN(value, kMatchBytes); |
| } |
| /* fall-through */ |
| case of_run_match: { |
| while (stream->bytesLeft > 0 && op < oend) { |
| *op++ = SEQ_randByte(&stream->saved) | 0x80; |
| --stream->bytesLeft; |
| } |
| if (stream->bytesLeft > 0) |
| break; |
| } |
| /* fall-through */ |
| default: |
| stream->state = 0; |
| stream->bytesLeft = 0; |
| break; |
| } |
| XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); |
| out->pos = op - ostart; |
| return stream->bytesLeft; |
| } |
| |
| /* Returns the number of bytes left to generate. |
| * Must pass the same type/value until it returns 0. |
| */ |
| size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out) |
| { |
| switch (type) { |
| case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out); |
| case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out); |
| case SEQ_gen_of: return SEQ_gen_offset(stream, value, out); |
| case SEQ_gen_max: /* fall-through */ |
| default: return 0; |
| } |
| } |
| |
| /* Returns the xxhash of the data produced so far */ |
| XXH64_hash_t SEQ_digest(SEQ_stream const* stream) |
| { |
| return XXH64_digest(&stream->xxh); |
| } |