blob: 7f50e291316e3afc335570fbc4617cd37323e7c4 [file] [log] [blame]
openvcdiff311c7142008-08-26 19:29:25 +00001// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef OPEN_VCDIFF_ENCODETABLE_H_
17#define OPEN_VCDIFF_ENCODETABLE_H_
18
19#include <config.h>
openvcdiff28db8072008-10-10 23:29:11 +000020#include <stddef.h> // size_t
21#include <stdint.h> // int32_t
openvcdiff311c7142008-08-26 19:29:25 +000022#include <string>
openvcdiff311c7142008-08-26 19:29:25 +000023#include "addrcache.h"
24#include "checksum.h"
25#include "codetable.h"
openvcdiffd1845782009-03-20 21:56:15 +000026#include "codetablewriter_interface.h"
openvcdiff311c7142008-08-26 19:29:25 +000027
28namespace open_vcdiff {
29
openvcdiff311c7142008-08-26 19:29:25 +000030class OutputStringInterface;
31class VCDiffInstructionMap;
32
33// The method calls after construction *must* conform
34// to the following pattern:
35// {{Add|Copy|Run}* [AddChecksum] Output}*
36//
37// When Output has been called in this sequence, a complete target window
38// (as defined in RFC 3284 section 4.3) will have been appended to
39// out (unless no calls to Add, Run, or Copy were made, in which
40// case Output will do nothing.) The output will not be available for use
41// until after each call to Output().
42//
openvcdiff311c7142008-08-26 19:29:25 +000043// NOT threadsafe.
44//
openvcdiffd1845782009-03-20 21:56:15 +000045class VCDiffCodeTableWriter : public CodeTableWriterInterface {
openvcdiff311c7142008-08-26 19:29:25 +000046 public:
47 // This constructor uses the default code table.
48 // If interleaved is true, the encoder writes each delta file window
49 // by interleaving instructions and sizes with their corresponding
50 // addresses and data, rather than placing these elements into three
51 // separate sections. This facilitates providing partially
52 // decoded results when only a portion of a delta file window
53 // is received (e.g. when HTTP over TCP is used as the
54 // transmission protocol.) The interleaved format is
55 // not consistent with the VCDIFF draft standard.
56 //
57 explicit VCDiffCodeTableWriter(bool interleaved);
58
59 // Uses a non-standard code table and non-standard cache sizes. The caller
60 // must guarantee that code_table_data remains allocated for the lifetime of
61 // the VCDiffCodeTableWriter object. Note that this is different from how
62 // VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given
63 // encoder will use either the default code table or a statically-defined
64 // non-standard code table, whereas the decoder must have the ability to read
65 // an arbitrary non-standard code table from a delta file and discard it once
66 // the file has been decoded.
67 //
68 VCDiffCodeTableWriter(bool interleaved,
69 int near_cache_size,
70 int same_cache_size,
71 const VCDiffCodeTableData& code_table_data,
72 unsigned char max_mode);
73
openvcdiffd1845782009-03-20 21:56:15 +000074 virtual ~VCDiffCodeTableWriter();
openvcdiff311c7142008-08-26 19:29:25 +000075
76 // Initializes the constructed object for use.
77 // This method must be called after a VCDiffCodeTableWriter is constructed
78 // and before any of its other methods can be called. It will return
79 // false if there was an error initializing the object, or true if it
80 // was successful. After the object has been initialized and used,
81 // Init() can be called again to restore the initial state of the object.
82 //
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000083 virtual bool Init(size_t dictionary_size);
84
85 // Write the header (as defined in section 4.1 of the RFC) to *out.
86 // This includes information that can be gathered
87 // before the first chunk of input is available.
88 virtual void WriteHeader(OutputStringInterface* out,
89 VCDiffFormatExtensionFlags format_extensions);
openvcdiff311c7142008-08-26 19:29:25 +000090
openvcdiffd1845782009-03-20 21:56:15 +000091 virtual size_t target_length() const { return target_length_; }
openvcdiff311c7142008-08-26 19:29:25 +000092
93 // Encode an ADD opcode with the "size" bytes starting at data
openvcdiffd1845782009-03-20 21:56:15 +000094 virtual void Add(const char* data, size_t size);
openvcdiff311c7142008-08-26 19:29:25 +000095
96 // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes.
openvcdiffd1845782009-03-20 21:56:15 +000097 virtual void Copy(int32_t offset, size_t size);
openvcdiff311c7142008-08-26 19:29:25 +000098
99 // Encode a RUN opcode for "size" copies of the value "byte".
openvcdiffd1845782009-03-20 21:56:15 +0000100 virtual void Run(size_t size, unsigned char byte);
openvcdiff311c7142008-08-26 19:29:25 +0000101
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000102 virtual void AddChecksum(VCDChecksum checksum) {
openvcdiff311c7142008-08-26 19:29:25 +0000103 add_checksum_ = true;
104 checksum_ = checksum;
105 }
106
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000107 // Appends the encoded delta window to the output
openvcdiff311c7142008-08-26 19:29:25 +0000108 // string. The output string is not null-terminated and may contain embedded
109 // '\0' characters.
openvcdiffd1845782009-03-20 21:56:15 +0000110 virtual void Output(OutputStringInterface* out);
openvcdiff311c7142008-08-26 19:29:25 +0000111
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000112 // There should not be any need to output more data
113 // since EncodeChunk() encodes a complete target window
114 // and there is no end-of-delta-file marker.
115 virtual void FinishEncoding(OutputStringInterface* /*out*/) {}
openvcdiff311c7142008-08-26 19:29:25 +0000116
117 private:
openvcdiff28db8072008-10-10 23:29:11 +0000118 typedef std::string string;
openvcdiff28db8072008-10-10 23:29:11 +0000119
openvcdiff311c7142008-08-26 19:29:25 +0000120 // The maximum value for the mode of a COPY instruction.
121 const unsigned char max_mode_;
122
123 // If interleaved is true, sets data_for_add_and_run_ and
124 // addresses_for_copy_ to point at instructions_and_sizes_,
125 // so that instructions, sizes, addresses and data will be
126 // combined into a single interleaved stream.
127 // If interleaved is false, sets data_for_add_and_run_ and
128 // addresses_for_copy_ to point at their corresponding
129 // separate_... strings, so that the three sections will
130 // be generated separately from one another.
131 //
132 void InitSectionPointers(bool interleaved);
133
134 // Determines the best opcode to encode an instruction, and appends
135 // or substitutes that opcode and its size into the
136 // instructions_and_sizes_ string.
137 //
138 void EncodeInstruction(VCDiffInstructionType inst,
139 size_t size,
140 unsigned char mode);
141
142 void EncodeInstruction(VCDiffInstructionType inst, size_t size) {
143 return EncodeInstruction(inst, size, 0);
144 }
145
146 // Calculates the number of bytes needed to store the given size value as a
147 // variable-length integer (VarintBE).
148 static size_t CalculateLengthOfSizeAsVarint(size_t size);
149
150 // Appends the size value to the string as a variable-length integer.
151 static void AppendSizeToString(size_t size, string* out);
152
153 // Appends the size value to the output string as a variable-length integer.
154 static void AppendSizeToOutputString(size_t size, OutputStringInterface* out);
155
156 // Calculates the "Length of the delta encoding" field for the delta window
157 // header, based on the sizes of the sections and of the other header
158 // elements.
159 size_t CalculateLengthOfTheDeltaEncoding() const;
160
161 // None of the following 'string' objects are null-terminated.
162
163 // A series of instruction opcodes, each of which may be followed
164 // by one or two Varint values representing the size parameters
165 // of the first and second instruction in the opcode.
166 string instructions_and_sizes_;
167
168 // A series of data arguments (byte values) used for ADD and RUN
169 // instructions. Depending on whether interleaved output is used
170 // for streaming or not, the pointer may point to
171 // separate_data_for_add_and_run_ or to instructions_and_sizes_.
172 string *data_for_add_and_run_;
173 string separate_data_for_add_and_run_;
174
175 // A series of Varint addresses used for COPY instructions.
176 // For the SAME mode, a byte value is stored instead of a Varint.
177 // Depending on whether interleaved output is used
178 // for streaming or not, the pointer may point to
179 // separate_addresses_for_copy_ or to instructions_and_sizes_.
180 string *addresses_for_copy_;
181 string separate_addresses_for_copy_;
182
183 VCDiffAddressCache address_cache_;
184
185 size_t dictionary_size_;
186
187 // The number of bytes of target data that has been encoded so far.
188 // Each time Add(), Copy(), or Run() is called, this will be incremented.
189 // The target length is used to compute HERE mode addresses
190 // for COPY instructions, and is also written into the header
191 // of the delta window when Output() is called.
192 //
193 size_t target_length_;
194
195 const VCDiffCodeTableData* code_table_data_;
196
197 // The instruction map facilitates finding an opcode quickly given an
198 // instruction inst, size, and mode. This is an alternate representation
199 // of the same information that is found in code_table_data_.
200 //
201 const VCDiffInstructionMap* instruction_map_;
202
203 // The zero-based index within instructions_and_sizes_ of the byte
204 // that contains the last single-instruction opcode generated by
205 // EncodeInstruction(). (See that function for exhaustive details.)
206 // It is necessary to use an index rather than a pointer for this value
207 // because instructions_and_sizes_ may be resized, which would invalidate
208 // any pointers into its data buffer. The value -1 is reserved to mean that
209 // either no opcodes have been generated yet, or else the last opcode
210 // generated was a double-instruction opcode.
211 //
212 int last_opcode_index_;
213
214 // If true, an Adler32 checksum of the target window data will be written as
215 // a variable-length integer, just after the size of the addresses section.
216 //
217 bool add_checksum_;
218
219 // The checksum to be written to the current target window,
220 // if add_checksum_ is true.
221 // This will not be calculated based on the individual calls to Add(), Run(),
222 // and Copy(), which would be unnecessarily expensive. Instead, the code
223 // that uses the VCDiffCodeTableWriter object is expected to calculate
224 // the checksum all at once and to call AddChecksum() with that value.
225 // Must be called sometime before calling Output(), though it can be called
226 // either before or after the calls to Add(), Run(), and Copy().
227 //
228 VCDChecksum checksum_;
229
openvcdiff311c7142008-08-26 19:29:25 +0000230 // Making these private avoids implicit copy constructor & assignment operator
231 VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT
232 void operator=(const VCDiffCodeTableWriter&);
233};
234
235}; // namespace open_vcdiff
236
237#endif // OPEN_VCDIFF_ENCODETABLE_H_