blob: ac1acae515fe708acd984abc8a4aeec2854785e8 [file] [log] [blame]
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +02001// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// API for Brotli compression
16
17#ifndef BROTLI_ENC_ENCODE_H_
18#define BROTLI_ENC_ENCODE_H_
19
20#include <stddef.h>
21#include <stdint.h>
22#include <string>
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010023#include <vector>
Zoltan Szabadka3dbe2e02015-04-23 15:26:08 +020024#include "./command.h"
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010025#include "./hash.h"
26#include "./ringbuffer.h"
Zoltan Szabadkae7650082014-03-20 14:32:35 +010027#include "./static_dict.h"
Zoltan Szabadka3dbe2e02015-04-23 15:26:08 +020028#include "./streams.h"
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +020029
30namespace brotli {
31
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +020032static const int kMaxWindowBits = 24;
Lode Vandevenne6511d6b2015-08-28 16:09:23 +020033static const int kMinWindowBits = 10;
Zoltan Szabadka817a3ed2015-04-01 16:29:04 +020034static const int kMinInputBlockBits = 16;
Zoltan Szabadkae377e652015-04-02 11:12:04 +020035static const int kMaxInputBlockBits = 24;
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +020036
Zoltan Szabadkae7650082014-03-20 14:32:35 +010037struct BrotliParams {
Zoltan Szabadka534654d2015-03-27 14:20:35 +010038 BrotliParams()
Zoltan Szabadkaaa853f32015-05-11 11:33:19 +020039 : mode(MODE_GENERIC),
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +020040 quality(11),
41 lgwin(22),
Zoltan Szabadka817a3ed2015-04-01 16:29:04 +020042 lgblock(0),
Zoltan Szabadka89a6fb82015-04-23 13:15:42 +020043 enable_dictionary(true),
Zoltan Szabadka534654d2015-03-27 14:20:35 +010044 enable_transforms(false),
Zoltan Szabadka89a6fb82015-04-23 13:15:42 +020045 greedy_block_split(false),
46 enable_context_modeling(true) {}
Zoltan Szabadka534654d2015-03-27 14:20:35 +010047
Zoltan Szabadkae7650082014-03-20 14:32:35 +010048 enum Mode {
Zoltan Szabadkaaa853f32015-05-11 11:33:19 +020049 // Default compression mode. The compressor does not know anything in
50 // advance about the properties of the input.
51 MODE_GENERIC = 0,
52 // Compression mode for UTF-8 format text input.
53 MODE_TEXT = 1,
54 // Compression mode used in WOFF 2.0.
55 MODE_FONT = 2,
Zoltan Szabadkae7650082014-03-20 14:32:35 +010056 };
57 Mode mode;
58
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +020059 // Controls the compression-speed vs compression-density tradeoffs. The higher
60 // the quality, the slower the compression. Range is 0 to 11.
61 int quality;
Lode Vandevenne6511d6b2015-08-28 16:09:23 +020062 // Base 2 logarithm of the sliding window size. Range is 10 to 24.
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +020063 int lgwin;
Zoltan Szabadkae377e652015-04-02 11:12:04 +020064 // Base 2 logarithm of the maximum input block size. Range is 16 to 24.
Zoltan Szabadka817a3ed2015-04-01 16:29:04 +020065 // If set to 0, the value will be set based on the quality.
66 int lgblock;
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +020067
Zoltan Szabadka618287b2015-06-12 16:50:49 +020068 // These settings are deprecated and will be ignored.
69 // All speed vs. size compromises are controlled by the quality param.
Zoltan Szabadka89a6fb82015-04-23 13:15:42 +020070 bool enable_dictionary;
Zoltan Szabadka96d04e52014-10-29 15:39:35 +010071 bool enable_transforms;
Zoltan Szabadka534654d2015-03-27 14:20:35 +010072 bool greedy_block_split;
Zoltan Szabadka89a6fb82015-04-23 13:15:42 +020073 bool enable_context_modeling;
Zoltan Szabadkae7650082014-03-20 14:32:35 +010074};
75
Zoltan Szabadka98539222015-04-23 16:20:29 +020076// An instance can not be reused for multiple brotli streams.
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010077class BrotliCompressor {
78 public:
Zoltan Szabadkae7650082014-03-20 14:32:35 +010079 explicit BrotliCompressor(BrotliParams params);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010080 ~BrotliCompressor();
81
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +020082 // The maximum input size that can be processed at once.
Zoltan Szabadka817a3ed2015-04-01 16:29:04 +020083 size_t input_block_size() const { return 1 << params_.lgblock; }
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010084
85 // Encodes the data in input_buffer as a meta-block and writes it to
Zoltan Szabadka485ad822014-10-28 14:05:53 +010086 // encoded_buffer (*encoded_size should be set to the size of
87 // encoded_buffer) and sets *encoded_size to the number of bytes that
88 // was written. Returns 0 if there was an error and 1 otherwise.
89 bool WriteMetaBlock(const size_t input_size,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010090 const uint8_t* input_buffer,
Zoltan Szabadka60c24c02013-12-12 13:18:04 +010091 const bool is_last,
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +010092 size_t* encoded_size,
93 uint8_t* encoded_buffer);
94
Zoltan Szabadka2fd80cd2015-04-23 15:43:37 +020095 // Writes a metadata meta-block containing the given input to encoded_buffer.
96 // *encoded_size should be set to the size of the encoded_buffer.
97 // Sets *encoded_size to the number of bytes that was written.
98 // Note that the given input data will not be part of the sliding window and
99 // thus no backward references can be made to this data from subsequent
100 // metablocks.
101 bool WriteMetadata(const size_t input_size,
102 const uint8_t* input_buffer,
103 const bool is_last,
104 size_t* encoded_size,
105 uint8_t* encoded_buffer);
106
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100107 // Writes a zero-length meta-block with end-of-input bit set to the
Zoltan Szabadka485ad822014-10-28 14:05:53 +0100108 // internal output buffer and copies the output buffer to encoded_buffer
109 // (*encoded_size should be set to the size of encoded_buffer) and sets
110 // *encoded_size to the number of bytes written. Returns false if there was
111 // an error and true otherwise.
112 bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100113
Zoltan Szabadka3dbe2e02015-04-23 15:26:08 +0200114 // Copies the given input data to the internal ring buffer of the compressor.
115 // No processing of the data occurs at this time and this function can be
116 // called multiple times before calling WriteBrotliData() to process the
117 // accumulated input. At most input_block_size() bytes of input data can be
118 // copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
119 void CopyInputToRingBuffer(const size_t input_size,
120 const uint8_t* input_buffer);
121
122 // Processes the accumulated input data and sets *out_size to the length of
123 // the new output meta-block, or to zero if no new output meta-block was
124 // created (in this case the processed input data is buffered internally).
125 // If *out_size is positive, *output points to the start of the output data.
126 // Returns false if the size of the input data is larger than
127 // input_block_size() or if there was an error during writing the output.
128 // If is_last or force_flush is true, an output meta-block is always created.
129 bool WriteBrotliData(const bool is_last, const bool force_flush,
130 size_t* out_size, uint8_t** output);
131
Zoltan Szabadkab43df8f2015-06-12 15:43:54 +0200132 // Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
133 // e.g. for custom static dictionaries for data formats.
134 // Not to be confused with the built-in transformable dictionary of Brotli.
135 // To decode, use BrotliSetCustomDictionary of the decoder with the same
136 // dictionary.
137 void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
138
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +0200139 // No-op, but we keep it here for API backward-compatibility.
140 void WriteStreamHeader() {}
141
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100142 private:
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +0200143 uint8_t* GetBrotliStorage(size_t size);
144
Zoltan Szabadka3dbe2e02015-04-23 15:26:08 +0200145 bool WriteMetaBlockInternal(const bool is_last,
146 const bool utf8_mode,
147 size_t* out_size,
148 uint8_t** output);
149
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100150 BrotliParams params_;
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +0200151 int max_backward_distance_;
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100152 std::unique_ptr<Hashers> hashers_;
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100153 int hash_type_;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100154 size_t input_pos_;
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +0200155 std::unique_ptr<RingBuffer> ringbuffer_;
Zoltan Szabadka89a6fb82015-04-23 13:15:42 +0200156 std::unique_ptr<float[]> literal_cost_;
157 size_t literal_cost_mask_;
Zoltan Szabadka3dbe2e02015-04-23 15:26:08 +0200158 size_t cmd_buffer_size_;
159 std::unique_ptr<Command[]> commands_;
160 int num_commands_;
Zoltan Szabadka0f726df2015-04-28 10:12:47 +0200161 int num_literals_;
Zoltan Szabadka3dbe2e02015-04-23 15:26:08 +0200162 int last_insert_len_;
163 size_t last_flush_pos_;
164 size_t last_processed_pos_;
Zoltan Szabadkab4f39bf2014-10-28 13:25:22 +0100165 int dist_cache_[4];
Zoltan Szabadka65f3fc52015-06-12 16:11:50 +0200166 int saved_dist_cache_[4];
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +0200167 uint8_t last_byte_;
168 uint8_t last_byte_bits_;
Zoltan Szabadka98539222015-04-23 16:20:29 +0200169 uint8_t prev_byte_;
170 uint8_t prev_byte2_;
Zoltan Szabadkad6d69ec2015-04-01 16:10:15 +0200171 int storage_size_;
172 std::unique_ptr<uint8_t[]> storage_;
Zoltan Szabadkac6b9c7c2013-11-15 19:02:17 +0100173};
174
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200175// Compresses the data in input_buffer into encoded_buffer, and sets
176// *encoded_size to the compressed length.
177// Returns 0 if there was an error and 1 otherwise.
Zoltan Szabadkae7650082014-03-20 14:32:35 +0100178int BrotliCompressBuffer(BrotliParams params,
179 size_t input_size,
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200180 const uint8_t* input_buffer,
181 size_t* encoded_size,
182 uint8_t* encoded_buffer);
183
Zoltan Szabadka3dbe2e02015-04-23 15:26:08 +0200184// Same as above, but uses the specified input and output classes instead
185// of reading from and writing to pre-allocated memory buffers.
186int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
187
Zoltan Szabadkab43df8f2015-06-12 15:43:54 +0200188// Before compressing the data, sets a custom LZ77 dictionary with
189// BrotliCompressor::BrotliSetCustomDictionary.
190int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
191 BrotliParams params,
192 BrotliIn* in, BrotliOut* out);
193
Zoltan Szabadkac66e4e32013-10-23 13:06:13 +0200194} // namespace brotli
195
196#endif // BROTLI_ENC_ENCODE_H_