blob: e6cabd42f0a1b96533f5da3c3f75b8bedf382668 [file] [log] [blame]
openvcdiff311c7142008-08-26 19:29:25 +00001// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Implements a Decoder for the format described in
17// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19//
20// The RFC describes the possibility of using a secondary compressor
21// to further reduce the size of each section of the VCDIFF output.
22// That feature is not supported in this implementation of the encoder
23// and decoder.
24// No secondary compressor types have been publicly registered with
25// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26// in the more than five years since the registry was created, so there
27// is no standard set of compressor IDs which would be generated by other
28// encoders or accepted by other decoders.
29
30#include <config.h>
31#include "google/vcdecoder.h"
openvcdiff28db8072008-10-10 23:29:11 +000032#include <stddef.h> // size_t, ptrdiff_t
openvcdiff311c7142008-08-26 19:29:25 +000033#include <stdint.h> // int32_t
openvcdiff28db8072008-10-10 23:29:11 +000034#include <string.h> // memcpy, memset
openvcdiff311c7142008-08-26 19:29:25 +000035#include <memory> // auto_ptr
36#include <string>
37#include "addrcache.h"
38#include "checksum.h"
39#include "codetable.h"
40#include "decodetable.h"
41#include "headerparser.h"
42#include "logging.h"
43#include "google/output_string.h"
44#include "varint_bigendian.h"
45#include "vcdiff_defs.h"
46
47namespace open_vcdiff {
48
openvcdiff311c7142008-08-26 19:29:25 +000049// This class is used to parse delta file windows as described
50// in RFC sections 4.2 and 4.3. Its methods are not thread-safe.
51//
52// Here is the window format copied from the RFC:
53//
54// Window1
55// Win_Indicator - byte
56// [Source segment size] - integer
57// [Source segment position] - integer
58// The delta encoding of the target window
59// Length of the delta encoding - integer
60// The delta encoding
61// Size of the target window - integer
62// Delta_Indicator - byte
63// Length of data for ADDs and RUNs - integer
64// Length of instructions and sizes - integer
65// Length of addresses for COPYs - integer
66// Data section for ADDs and RUNs - array of bytes
67// Instructions and sizes section - array of bytes
68// Addresses section for COPYs - array of bytes
69// Window2
70// ...
71//
72// Sample usage:
73//
74// VCDiffDeltaFileWindow delta_window_;
75// delta_window_.Init(parent);
76// ParseableChunk parseable_chunk(input_buffer,
77// input_size,
78// leftover_unencoded_bytes);
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000079// while (!parseable_chunk.Empty()) {
80// switch (delta_window_.DecodeWindow(&parseable_chunk)) {
81// case RESULT_END_OF_DATA:
82// <Read more input and retry DecodeWindow later.>
83// case RESULT_ERROR:
84// <Handle error case. An error log message has already been generated.>
85// }
openvcdiff311c7142008-08-26 19:29:25 +000086// }
87//
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000088// DecodeWindow consumes only a single window, and needs to be placed within
89// a loop if multiple windows are to be processed.
openvcdiff311c7142008-08-26 19:29:25 +000090//
91class VCDiffDeltaFileWindow {
92 public:
93 VCDiffDeltaFileWindow();
94 ~VCDiffDeltaFileWindow();
95
96 // Init() should be called immediately after constructing the
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +000097 // VCDiffDeltaFileWindow(). It must be called before DecodeWindow() can be
openvcdiff311c7142008-08-26 19:29:25 +000098 // invoked, or an error will occur.
99 void Init(VCDiffStreamingDecoderImpl* parent);
100
101 // Resets the pointers to the data sections in the current window.
102 void Reset();
103
104 bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
105 unsigned char max_mode) {
106 return reader_.UseCodeTable(code_table_data, max_mode);
107 }
108
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000109 // Decodes a single delta window using the input data from *parseable_chunk.
110 // Appends the decoded target window to parent_->decoded_target(). Returns
111 // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if
112 // the end of input was reached before the entire window could be decoded and
113 // more input is expected (only possible if IsInterleaved() is true), or
114 // RESULT_ERROR if an error occurred during decoding. In the RESULT_ERROR
115 // case, the value of parseable_chunk->pointer_ is undefined; otherwise,
116 // parseable_chunk->Advance() is called to point to the input data position
117 // just after the data that has been decoded.
openvcdiff311c7142008-08-26 19:29:25 +0000118 //
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000119 VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk);
openvcdiff311c7142008-08-26 19:29:25 +0000120
121 bool FoundWindowHeader() const {
122 return found_header_;
123 }
124
125 bool MoreDataExpected() const {
126 // When parsing an interleaved-format delta file,
127 // every time DecodeBody() exits, interleaved_bytes_expected_
128 // will be decremented by the number of bytes parsed. If it
129 // reaches zero, then there is no more data expected because
130 // the size of the interleaved section (given in the window
131 // header) has been reached.
132 return IsInterleaved() && (interleaved_bytes_expected_ > 0);
133 }
134
openvcdiffbaf44ea2009-04-09 19:20:49 +0000135 size_t target_window_start_pos() const { return target_window_start_pos_; }
136
137 void set_target_window_start_pos(size_t new_start_pos) {
138 target_window_start_pos_ = new_start_pos;
139 }
140
openvcdiff311c7142008-08-26 19:29:25 +0000141 // Returns the number of bytes remaining to be decoded in the target window.
142 // If not in the process of decoding a window, returns 0.
143 size_t TargetBytesRemaining();
144
openvcdiff311c7142008-08-26 19:29:25 +0000145 private:
146 // Reads the header of the window section as described in RFC sections 4.2 and
147 // 4.3, up to and including the value "Length of addresses for COPYs". If the
148 // entire header is found, this function sets up the DeltaWindowSections
149 // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
150 // that the decoder can begin decoding the opcodes in these sections. Returns
151 // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
152 // available data was reached before the entire header could be read. (The
153 // latter may be an error condition if there is no more data available.)
154 // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
155 // parsed header.
156 //
157 VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
158
159 // After the window header has been parsed as far as the Delta_Indicator,
160 // this function is called to parse the following delta window header fields:
161 //
162 // Length of data for ADDs and RUNs - integer (VarintBE format)
163 // Length of instructions and sizes - integer (VarintBE format)
164 // Length of addresses for COPYs - integer (VarintBE format)
165 //
166 // If has_checksum_ is true, it also looks for the following element:
167 //
168 // Adler32 checksum - unsigned 32-bit integer (VarintBE format)
169 //
170 // It sets up the DeltaWindowSections instructions_and_sizes_,
171 // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format
172 // is being used, all three sections will include the entire window body; if
173 // the standard format is used, three non-overlapping window sections will be
174 // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
175 // if standard format is being used and there is not enough input data to read
176 // the entire window body. Otherwise, returns RESULT_SUCCESS.
177 VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
178
179 // Decodes the body of the window section as described in RFC sections 4.3,
180 // including the sections "Data section for ADDs and RUNs", "Instructions
181 // and sizes section", and "Addresses section for COPYs". These sections
182 // must already have been set up by ReadWindowHeader(). Returns a
183 // non-negative value on success, or RESULT_END_OF_DATA if the end of input
184 // was reached before the entire window could be decoded (only possible if
185 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
186 // decoding. Appends as much of the decoded target window as possible to
187 // parent->decoded_target().
188 //
189 int DecodeBody(ParseableChunk* parseable_chunk);
190
191 // Returns the number of bytes already decoded into the target window.
192 size_t TargetBytesDecoded();
193
194 // Decodes a single ADD instruction, updating parent_->decoded_target_.
195 VCDiffResult DecodeAdd(size_t size);
196
197 // Decodes a single RUN instruction, updating parent_->decoded_target_.
198 VCDiffResult DecodeRun(size_t size);
199
200 // Decodes a single COPY instruction, updating parent_->decoded_target_.
201 VCDiffResult DecodeCopy(size_t size, unsigned char mode);
202
203 // When using the interleaved format, this function is called both on parsing
204 // the header and on resuming after a RESULT_END_OF_DATA was returned from a
205 // previous call to DecodeBody(). It sets up all three section pointers to
206 // reference the same interleaved stream of instructions, sizes, addresses,
207 // and data. These pointers must be reset every time that work resumes on a
208 // delta window, because the input data string may have been changed or
209 // resized since DecodeBody() last returned.
210 void UpdateInterleavedSectionPointers(const char* data_pos,
211 const char* data_end) {
212 const ptrdiff_t available_data = data_end - data_pos;
213 // Don't read past the end of currently-available data
214 if (available_data > interleaved_bytes_expected_) {
215 instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
216 } else {
217 instructions_and_sizes_.Init(data_pos, available_data);
218 }
219 data_for_add_and_run_.Init(&instructions_and_sizes_);
220 addresses_for_copy_.Init(&instructions_and_sizes_);
221 }
222
223 // If true, the interleaved format described in AllowInterleaved() is used
224 // for the current delta file. Only valid after ReadWindowHeader() has been
225 // called and returned a positive number (i.e., the whole header was parsed),
226 // but before the window has finished decoding.
227 //
228 bool IsInterleaved() const {
229 // If the sections are interleaved, both addresses_for_copy_ and
230 // data_for_add_and_run_ should point at instructions_and_sizes_.
231 return !addresses_for_copy_.IsOwned();
232 }
233
234 // Executes a single COPY or ADD instruction, appending data to
235 // parent_->decoded_target().
openvcdiffbaf44ea2009-04-09 19:20:49 +0000236 void CopyBytes(const char* data, size_t size);
openvcdiff311c7142008-08-26 19:29:25 +0000237
238 // Executes a single RUN instruction, appending data to
239 // parent_->decoded_target().
240 void RunByte(unsigned char byte, size_t size);
241
openvcdiff311c7142008-08-26 19:29:25 +0000242 // Advance *parseable_chunk to point to the current position in the
243 // instructions/sizes section. If interleaved format is used, then
244 // decrement the number of expected bytes in the instructions/sizes section
245 // by the number of instruction/size bytes parsed.
246 void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
247
248 // The parent object which was passed to Init().
249 VCDiffStreamingDecoderImpl* parent_;
250
251 // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
252 // has been called and succeeded in parsing the delta window header, but the
253 // entire window has not yet been decoded.
254 bool found_header_;
255
256 // Contents and length of the current source window. source_segment_ptr_
257 // will be non-NULL if (a) the window section header for the current window
258 // has been read, but the window has not yet finished decoding; or
259 // (b) the window did not specify a source segment.
260 const char* source_segment_ptr_;
261 size_t source_segment_length_;
262
263 // The delta encoding window sections as defined in RFC section 4.3.
264 // The pointer for each section will be incremented as data is consumed and
265 // decoded from that section. If the interleaved format is used,
266 // data_for_add_and_run_ and addresses_for_copy_ will both point to
267 // instructions_and_sizes_; otherwise, they will be separate data sections.
268 //
269 DeltaWindowSection instructions_and_sizes_;
270 DeltaWindowSection data_for_add_and_run_;
271 DeltaWindowSection addresses_for_copy_;
272
273 // The expected bytes left to decode in instructions_and_sizes_. Only used
274 // for the interleaved format.
275 int interleaved_bytes_expected_;
276
277 // The expected length of the target window once it has been decoded.
278 size_t target_window_length_;
279
280 // The index in decoded_target at which the first byte of the current
281 // target window was/will be written.
282 size_t target_window_start_pos_;
283
284 // If has_checksum_ is true, then expected_checksum_ contains an Adler32
285 // checksum of the target window data. This is an extension included in the
286 // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
287 bool has_checksum_;
288 VCDChecksum expected_checksum_;
289
290 VCDiffCodeTableReader reader_;
291
openvcdiff311c7142008-08-26 19:29:25 +0000292 // Making these private avoids implicit copy constructor & assignment operator
293 VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT
294 void operator=(const VCDiffDeltaFileWindow&);
295};
296
openvcdiff28db8072008-10-10 23:29:11 +0000297// *** Inline methods for VCDiffDeltaFileWindow
298
299inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
300 Reset();
301}
302
303inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
304
305inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
306 parent_ = parent;
307}
308
openvcdiff311c7142008-08-26 19:29:25 +0000309class VCDiffStreamingDecoderImpl {
310 public:
openvcdiff28db8072008-10-10 23:29:11 +0000311 typedef std::string string;
openvcdiff28db8072008-10-10 23:29:11 +0000312
313 // The default maximum target file size (and target window size) if
314 // SetMaximumTargetFileSize() is not called.
315 static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB
316
openvcdiffbaf44ea2009-04-09 19:20:49 +0000317 // The largest value that can be passed to SetMaximumTargetWindowSize().
318 // Using a larger value will result in an error.
openvcdiff28db8072008-10-10 23:29:11 +0000319 static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX
320
321 // A constant that is the default value for planned_target_file_size_,
openvcdiff311c7142008-08-26 19:29:25 +0000322 // indicating that the decoder does not have an expected length
323 // for the target data.
324 static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
325
326 VCDiffStreamingDecoderImpl();
327 ~VCDiffStreamingDecoderImpl();
328
329 // Resets all member variables to their initial states.
330 void Reset();
331
332 // These functions are identical to their counterparts
333 // in VCDiffStreamingDecoder.
334 //
335 void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
336
337 bool DecodeChunk(const char* data,
338 size_t len,
339 OutputStringInterface* output_string);
340
341 bool FinishDecoding();
342
343 // If true, the version of VCDIFF used in the current delta file allows
344 // for the interleaved format, in which instructions, addresses and data
345 // are all sent interleaved in the instructions section of each window
346 // rather than being sent in separate sections. This is not part of
347 // the VCDIFF draft standard, so we've defined a special version code
348 // 'S' which implies that this feature is available. Even if interleaving
349 // is supported, it is not mandatory; interleaved format will be implied
350 // if the address and data sections are both zero-length.
351 //
352 bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
353
354 // If true, the version of VCDIFF used in the current delta file allows
355 // each delta window to contain an Adler32 checksum of the target window data.
356 // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
357 // this checksum will appear as a variable-length integer, just after the
358 // "length of addresses for COPYs" value and before the window data sections.
359 // It is possible for some windows in a delta file to use the checksum feature
360 // and for others not to use it (and leave the flag bit set to 0.)
361 // Just as with AllowInterleaved(), this extension is not part of the draft
362 // standard and is only available when the version code 'S' is specified.
363 //
364 bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
365
openvcdiff28db8072008-10-10 23:29:11 +0000366 bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
openvcdiff28db8072008-10-10 23:29:11 +0000367 maximum_target_file_size_ = new_maximum_target_file_size;
368 return true;
369 }
370
371 bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
372 if (new_maximum_target_window_size > kTargetSizeLimit) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000373 VCD_ERROR << "Specified maximum target window size "
374 << new_maximum_target_window_size << " exceeds limit of "
375 << kTargetSizeLimit << " bytes" << VCD_ENDL;
openvcdiff28db8072008-10-10 23:29:11 +0000376 return false;
377 }
378 maximum_target_window_size_ = new_maximum_target_window_size;
379 return true;
380 }
381
382 // See description of planned_target_file_size_, below.
383 bool HasPlannedTargetFileSize() const {
384 return planned_target_file_size_ != kUnlimitedBytes;
385 }
386
387 void SetPlannedTargetFileSize(size_t planned_target_file_size) {
388 planned_target_file_size_ = planned_target_file_size;
389 }
390
openvcdiffbaf44ea2009-04-09 19:20:49 +0000391 void AddToTotalTargetWindowSize(size_t window_size) {
392 total_of_target_window_sizes_ += window_size;
393 }
394
openvcdiff28db8072008-10-10 23:29:11 +0000395 // Checks to see whether the decoded target data has reached its planned size.
396 bool ReachedPlannedTargetFileSize() const {
397 if (!HasPlannedTargetFileSize()) {
398 return false;
399 }
400 // The planned target file size should not have been exceeded.
401 // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
402 // each target window would not make the target file exceed that limit, and
openvcdiff311c7142008-08-26 19:29:25 +0000403 // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
404 // exceeds the advertised target window size.
openvcdiffbaf44ea2009-04-09 19:20:49 +0000405 if (total_of_target_window_sizes_ > planned_target_file_size_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000406 VCD_DFATAL << "Internal error: Decoded data size "
407 << total_of_target_window_sizes_
408 << " exceeds planned target file size "
409 << planned_target_file_size_ << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000410 return true;
411 }
openvcdiffbaf44ea2009-04-09 19:20:49 +0000412 return total_of_target_window_sizes_ == planned_target_file_size_;
openvcdiff311c7142008-08-26 19:29:25 +0000413 }
414
415 // Checks to see whether adding a new target window of the specified size
openvcdiff28db8072008-10-10 23:29:11 +0000416 // would exceed the planned target file size, the maximum target file size,
417 // or the maximum target window size. If so, logs an error and returns true;
418 // otherwise, returns false.
419 bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
openvcdiff311c7142008-08-26 19:29:25 +0000420
421 // Returns the amount of input data passed to the last DecodeChunk()
422 // that was not consumed by the decoder. This is essential if
openvcdiff28db8072008-10-10 23:29:11 +0000423 // SetPlannedTargetFileSize() is being used, in order to preserve the
424 // remaining input data stream once the planned target file has been decoded.
openvcdiff311c7142008-08-26 19:29:25 +0000425 size_t GetUnconsumedDataSize() const {
426 return unparsed_bytes_.size();
427 }
428
429 // This function will return true if the decoder has parsed a complete delta
430 // file header plus zero or more delta file windows, with no data left over.
431 // It will also return true if no delta data at all was decoded. If these
432 // conditions are not met, then FinishDecoding() should not be called.
433 bool IsDecodingComplete() const {
434 if (!FoundFileHeader()) {
435 // No complete delta file header has been parsed yet. DecodeChunk()
436 // may have received some data that it hasn't yet parsed, in which case
437 // decoding is incomplete.
438 return unparsed_bytes_.empty();
439 } else if (custom_code_table_decoder_.get()) {
440 // The decoder is in the middle of parsing a custom code table.
441 return false;
442 } else if (delta_window_.FoundWindowHeader()) {
443 // The decoder is in the middle of parsing an interleaved format delta
444 // window.
445 return false;
openvcdiff28db8072008-10-10 23:29:11 +0000446 } else if (ReachedPlannedTargetFileSize()) {
447 // The decoder found exactly the planned number of bytes. In this case
openvcdiff311c7142008-08-26 19:29:25 +0000448 // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
449 // data after the end of the delta file.
450 return true;
451 } else {
452 // No complete delta file window has been parsed yet. DecodeChunk()
453 // may have received some data that it hasn't yet parsed, in which case
454 // decoding is incomplete.
455 return unparsed_bytes_.empty();
456 }
457 }
458
459 const char* dictionary_ptr() const { return dictionary_ptr_; }
460
461 size_t dictionary_size() const { return dictionary_size_; }
462
463 VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
464
465 string* decoded_target() { return &decoded_target_; }
466
openvcdiffbaf44ea2009-04-09 19:20:49 +0000467 bool allow_vcd_target() const { return allow_vcd_target_; }
openvcdiff311c7142008-08-26 19:29:25 +0000468
openvcdiffbaf44ea2009-04-09 19:20:49 +0000469 void SetAllowVcdTarget(bool allow_vcd_target) {
470 if (start_decoding_was_called_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000471 VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()"
472 << VCD_ENDL;
openvcdiffbaf44ea2009-04-09 19:20:49 +0000473 return;
474 }
475 allow_vcd_target_ = allow_vcd_target;
openvcdiff311c7142008-08-26 19:29:25 +0000476 }
477
openvcdiff311c7142008-08-26 19:29:25 +0000478 private:
479 // Reads the VCDiff delta file header section as described in RFC section 4.1,
480 // except the custom code table data. Returns RESULT_ERROR if an error
481 // occurred, or RESULT_END_OF_DATA if the end of available data was reached
482 // before the entire header could be read. (The latter may be an error
483 // condition if there is no more data available.) Otherwise, advances
484 // data->position_ past the header and returns RESULT_SUCCESS.
485 //
486 VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
487
488 // Indicates whether or not the header has already been read.
489 bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
490
491 // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
492 // file header, this function parses the custom cache sizes and initializes
493 // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
494 // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an
495 // error occurred, or RESULT_END_OF_DATA if the end of available data was
496 // reached before the custom cache sizes could be read. Otherwise, returns
497 // the number of bytes read.
498 //
499 int InitCustomCodeTable(const char* data_start, const char* data_end);
500
501 // If a custom code table was specified in the header section that was parsed
502 // by ReadDeltaFileHeader(), this function makes a recursive call to another
503 // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
504 // custom code table is expected to be supplied as an embedded VCDIFF
505 // encoding that uses the standard code table. Returns RESULT_ERROR if an
506 // error occurs, or RESULT_END_OF_DATA if the end of available data was
507 // reached before the entire custom code table could be read. Otherwise,
508 // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
509 // custom code table. If the function returns RESULT_SUCCESS or
510 // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
511 //
512 VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
513
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000514 // Called after the decoder exhausts all input data. This function
515 // copies from decoded_target_ into output_string all the data that
516 // has not yet been output. It sets decoded_target_output_position_
517 // to mark the start of the next data that needs to be output.
518 void AppendNewOutputText(OutputStringInterface* output_string);
519
520 // Appends to output_string the portion of decoded_target_ that has
521 // not yet been output, then clears decoded_target_. This function is
522 // called after each complete target window has been decoded if
523 // allow_vcd_target is false. In that case, there is no need to retain
524 // target data from any window except the current window.
525 void FlushDecodedTarget(OutputStringInterface* output_string);
526
openvcdiff311c7142008-08-26 19:29:25 +0000527 // Contents and length of the source (dictionary) data.
528 const char* dictionary_ptr_;
529 size_t dictionary_size_;
530
531 // This string will be used to store any unparsed bytes left over when
532 // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
533 // It will also be used to concatenate those unparsed bytes with the data
534 // supplied to the next call to DecodeChunk(), so that they appear in
535 // contiguous memory.
536 string unparsed_bytes_;
537
538 // The portion of the target file that has been decoded so far. This will be
539 // used to fill the output string for DecodeChunk(), and will also be used to
540 // execute COPY instructions that reference target data. Since the source
541 // window can come from a range of addresses in the previously decoded target
542 // data, the entire target file needs to be available to the decoder, not just
543 // the current target window.
544 string decoded_target_;
545
546 // The VCDIFF version byte (also known as "header4") from the
547 // delta file header.
548 unsigned char vcdiff_version_code_;
549
550 VCDiffDeltaFileWindow delta_window_;
551
552 std::auto_ptr<VCDiffAddressCache> addr_cache_;
553
554 // Will be NULL unless a custom code table has been defined.
555 std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
556
557 // Used to receive the decoded custom code table.
558 string custom_code_table_string_;
559
560 // If a custom code table is specified, it will be expressed
561 // as an embedded VCDIFF delta file which uses the default code table
562 // as the source file (dictionary). Use a child decoder object
563 // to decode that delta file.
564 std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
565
566 // If set, then the decoder is expecting *exactly* this number of
567 // target bytes to be decoded from one or more delta file windows.
568 // If this number is exceeded while decoding a window, but was not met
569 // before starting on that window, an error will be reported.
570 // If FinishDecoding() is called before this number is met, an error
571 // will also be reported. This feature is used for decoding the
572 // embedded code table data within a VCDIFF delta file; we want to
573 // stop processing the embedded data once the entire code table has
574 // been decoded, and treat the rest of the available data as part
575 // of the enclosing delta file.
openvcdiff28db8072008-10-10 23:29:11 +0000576 size_t planned_target_file_size_;
577
578 size_t maximum_target_file_size_;
579
580 size_t maximum_target_window_size_;
openvcdiff311c7142008-08-26 19:29:25 +0000581
openvcdiffbaf44ea2009-04-09 19:20:49 +0000582 // Contains the sum of the decoded sizes of all target windows seen so far,
583 // including the expected total size of the current target window in progress
584 // (even if some of the current target window has not yet been decoded.)
585 size_t total_of_target_window_sizes_;
openvcdiff311c7142008-08-26 19:29:25 +0000586
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000587 // Contains the byte position within decoded_target_ of the first data that
588 // has not yet been output by AppendNewOutputText().
589 size_t decoded_target_output_position_;
590
openvcdiff311c7142008-08-26 19:29:25 +0000591 // This value is used to ensure the correct order of calls to the interface
592 // functions, i.e., a single call to StartDecoding(), followed by zero or
593 // more calls to DecodeChunk(), followed by a single call to
594 // FinishDecoding().
595 bool start_decoding_was_called_;
596
openvcdiffbaf44ea2009-04-09 19:20:49 +0000597 // If this value is true then the VCD_TARGET flag can be specified to allow
598 // the source segment to be chosen from the previously-decoded target data.
599 // (This is the default behavior.) If it is false, then specifying the
600 // VCD_TARGET flag is considered an error, and the decoder does not need to
601 // keep in memory any decoded target data prior to the current window.
602 bool allow_vcd_target_;
603
openvcdiff311c7142008-08-26 19:29:25 +0000604 // Making these private avoids implicit copy constructor & assignment operator
605 VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT
606 void operator=(const VCDiffStreamingDecoderImpl&);
607};
608
609// *** Methods for VCDiffStreamingDecoderImpl
610
openvcdiff28db8072008-10-10 23:29:11 +0000611const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
612const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
613
614VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
615 : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
openvcdiffbaf44ea2009-04-09 19:20:49 +0000616 maximum_target_window_size_(kDefaultMaximumTargetFileSize),
617 allow_vcd_target_(true) {
openvcdiff311c7142008-08-26 19:29:25 +0000618 delta_window_.Init(this);
619 Reset();
620}
621
622// Reset() will delete the component objects without reallocating them.
623VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
624
625void VCDiffStreamingDecoderImpl::Reset() {
626 start_decoding_was_called_ = false;
627 dictionary_ptr_ = NULL;
628 dictionary_size_ = 0;
629 vcdiff_version_code_ = '\0';
openvcdiff28db8072008-10-10 23:29:11 +0000630 planned_target_file_size_ = kUnlimitedBytes;
openvcdiffbaf44ea2009-04-09 19:20:49 +0000631 total_of_target_window_sizes_ = 0;
openvcdiff311c7142008-08-26 19:29:25 +0000632 addr_cache_.reset();
633 custom_code_table_.reset();
634 custom_code_table_decoder_.reset();
635 delta_window_.Reset();
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000636 decoded_target_output_position_ = 0;
openvcdiffbaf44ea2009-04-09 19:20:49 +0000637}
638
openvcdiff311c7142008-08-26 19:29:25 +0000639void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
640 size_t dictionary_size) {
641 if (start_decoding_was_called_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000642 VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()"
643 << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000644 return;
645 }
646 unparsed_bytes_.clear();
647 decoded_target_.clear(); // delta_window_.Reset() depends on this
openvcdiff311c7142008-08-26 19:29:25 +0000648 Reset();
649 dictionary_ptr_ = dictionary_ptr;
650 dictionary_size_ = dictionary_size;
651 start_decoding_was_called_ = true;
652}
653
654// Reads the VCDiff delta file header section as described in RFC section 4.1:
655//
656// Header1 - byte = 0xD6 (ASCII 'V' | 0x80)
657// Header2 - byte = 0xC3 (ASCII 'C' | 0x80)
658// Header3 - byte = 0xC4 (ASCII 'D' | 0x80)
659// Header4 - byte
660// Hdr_Indicator - byte
661// [Secondary compressor ID] - byte
662// [Length of code table data] - integer
663// [Code table data]
664//
665// Initializes the code table and address cache objects. Returns RESULT_ERROR
666// if an error occurred, and RESULT_END_OF_DATA if the end of available data was
667// reached before the entire header could be read. (The latter may be an error
668// condition if there is no more data available.) Otherwise, returns
669// RESULT_SUCCESS, and removes the header bytes from the data string.
670//
671// It's relatively inefficient to expect this function to parse any number of
672// input bytes available, down to 1 byte, but it is necessary in case the input
673// is not a properly formatted VCDIFF delta file. If the entire input consists
674// of two bytes "12", then we should recognize that it does not match the
675// initial VCDIFF magic number "VCD" and report an error, rather than waiting
676// indefinitely for more input that will never arrive.
677//
678VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
679 ParseableChunk* data) {
680 if (FoundFileHeader()) {
681 return RESULT_SUCCESS;
682 }
683 size_t data_size = data->UnparsedSize();
684 const DeltaFileHeader* header =
685 reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
686 bool wrong_magic_number = false;
687 switch (data_size) {
688 // Verify only the bytes that are available.
689 default:
690 // Found header contents up to and including VCDIFF version
691 vcdiff_version_code_ = header->header4;
692 if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284)
693 (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000694 VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000695 return RESULT_ERROR;
696 }
697 // fall through
698 case 3:
699 if (header->header3 != 0xC4) { // magic value 'D' | 0x80
700 wrong_magic_number = true;
701 }
702 // fall through
703 case 2:
704 if (header->header2 != 0xC3) { // magic value 'C' | 0x80
705 wrong_magic_number = true;
706 }
707 // fall through
708 case 1:
709 if (header->header1 != 0xD6) { // magic value 'V' | 0x80
710 wrong_magic_number = true;
711 }
712 // fall through
713 case 0:
714 if (wrong_magic_number) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000715 VCD_ERROR << "Did not find VCDIFF header bytes; "
716 "input is not a VCDIFF delta file" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000717 return RESULT_ERROR;
718 }
719 if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
720 }
721 // Secondary compressor not supported.
722 if (header->hdr_indicator & VCD_DECOMPRESS) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000723 VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000724 return RESULT_ERROR;
725 }
726 if (header->hdr_indicator & VCD_CODETABLE) {
727 int bytes_parsed = InitCustomCodeTable(
728 data->UnparsedData() + sizeof(DeltaFileHeader),
729 data->End());
730 switch (bytes_parsed) {
731 case RESULT_ERROR:
732 return RESULT_ERROR;
733 case RESULT_END_OF_DATA:
734 return RESULT_END_OF_DATA;
735 default:
736 data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
737 }
738 } else {
739 addr_cache_.reset(new VCDiffAddressCache);
740 // addr_cache_->Init() will be called
741 // from VCDiffStreamingDecoderImpl::DecodeChunk()
742 data->Advance(sizeof(DeltaFileHeader));
743 }
744 return RESULT_SUCCESS;
745}
746
747int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
748 const char* data_end) {
749 // A custom code table is being specified. Parse the variable-length
750 // cache sizes and begin parsing the encoded custom code table.
751 int32_t near_cache_size = 0, same_cache_size = 0;
752 VCDiffHeaderParser header_parser(data_start, data_end);
753 if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
754 return header_parser.GetResult();
755 }
756 if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
757 return header_parser.GetResult();
758 }
759 custom_code_table_.reset(new struct VCDiffCodeTableData);
760 memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
761 custom_code_table_string_.clear();
762 addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
763 // addr_cache_->Init() will be called
764 // from VCDiffStreamingDecoderImpl::DecodeChunk()
765
766 // If we reach this point (the start of the custom code table)
767 // without encountering a RESULT_END_OF_DATA condition, then we won't call
768 // ReadDeltaFileHeader() again for this delta file.
769 //
770 // Instantiate a recursive decoder to interpret the custom code table
771 // as a VCDIFF encoding of the default code table.
772 custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
773 custom_code_table_decoder_->StartDecoding(
774 reinterpret_cast<const char*>(
775 &VCDiffCodeTableData::kDefaultCodeTableData),
776 sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
openvcdiff28db8072008-10-10 23:29:11 +0000777 custom_code_table_decoder_->SetPlannedTargetFileSize(
778 sizeof(*custom_code_table_));
openvcdiff311c7142008-08-26 19:29:25 +0000779 return static_cast<int>(header_parser.ParsedSize());
780}
781
782VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
783 ParseableChunk* data) {
784 if (!custom_code_table_decoder_.get()) {
785 return RESULT_SUCCESS;
786 }
787 if (!custom_code_table_.get()) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000788 VCD_DFATAL << "Internal error: custom_code_table_decoder_ is set,"
789 " but custom_code_table_ is NULL" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000790 return RESULT_ERROR;
791 }
792 OutputString<string> output_string(&custom_code_table_string_);
793 if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
794 data->UnparsedSize(),
795 &output_string)) {
796 return RESULT_ERROR;
797 }
798 if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
799 // Skip over the consumed data.
800 data->Finish();
801 return RESULT_END_OF_DATA;
802 }
803 if (!custom_code_table_decoder_->FinishDecoding()) {
804 return RESULT_ERROR;
805 }
806 if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000807 VCD_DFATAL << "Decoded custom code table size ("
808 << custom_code_table_string_.length()
809 << ") does not match size of a code table ("
810 << sizeof(*custom_code_table_) << ")" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000811 return RESULT_ERROR;
812 }
813 memcpy(custom_code_table_.get(),
814 custom_code_table_string_.data(),
815 sizeof(*custom_code_table_));
816 custom_code_table_string_.clear();
817 // Skip over the consumed data.
818 data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
819 custom_code_table_decoder_.reset();
820 delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
821 return RESULT_SUCCESS;
822}
823
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000824void VCDiffStreamingDecoderImpl::FlushDecodedTarget(
825 OutputStringInterface* output_string) {
826 output_string->append(
827 decoded_target_.data() + decoded_target_output_position_,
828 decoded_target_.size() - decoded_target_output_position_);
829 decoded_target_.clear();
830 delta_window_.set_target_window_start_pos(0);
831 decoded_target_output_position_ = 0;
832}
openvcdiff311c7142008-08-26 19:29:25 +0000833
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000834void VCDiffStreamingDecoderImpl::AppendNewOutputText(
835 OutputStringInterface* output_string) {
836 const size_t bytes_decoded_this_chunk =
837 decoded_target_.size() - decoded_target_output_position_;
838 if (bytes_decoded_this_chunk > 0) {
839 size_t target_bytes_remaining = delta_window_.TargetBytesRemaining();
840 if (target_bytes_remaining > 0) {
841 // The decoder is midway through decoding a target window. Resize
842 // output_string to match the expected length. The interface guarantees
843 // not to resize output_string more than once per target window decoded.
844 output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
845 + target_bytes_remaining);
openvcdiff311c7142008-08-26 19:29:25 +0000846 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000847 output_string->append(
848 decoded_target_.data() + decoded_target_output_position_,
849 bytes_decoded_this_chunk);
850 decoded_target_output_position_ = decoded_target_.size();
openvcdiff311c7142008-08-26 19:29:25 +0000851 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000852}
openvcdiff311c7142008-08-26 19:29:25 +0000853
854bool VCDiffStreamingDecoderImpl::DecodeChunk(
855 const char* data,
856 size_t len,
857 OutputStringInterface* output_string) {
858 if (!start_decoding_was_called_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000859 VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000860 Reset();
861 return false;
862 }
863 ParseableChunk parseable_chunk(data, len);
864 if (!unparsed_bytes_.empty()) {
865 unparsed_bytes_.append(data, len);
866 parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
867 unparsed_bytes_.size());
868 }
openvcdiff311c7142008-08-26 19:29:25 +0000869 VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
870 if (RESULT_SUCCESS == result) {
871 result = ReadCustomCodeTable(&parseable_chunk);
872 }
873 if (RESULT_SUCCESS == result) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000874 while (!parseable_chunk.Empty()) {
875 result = delta_window_.DecodeWindow(&parseable_chunk);
876 if (RESULT_SUCCESS != result) {
877 break;
878 }
879 if (ReachedPlannedTargetFileSize()) {
880 // Found exactly the length we expected. Stop decoding.
881 break;
882 }
883 if (!allow_vcd_target()) {
884 // VCD_TARGET will never be used to reference target data before the
885 // start of the current window, so flush and clear the contents of
886 // decoded_target_.
887 FlushDecodedTarget(output_string);
888 }
889 }
openvcdiff311c7142008-08-26 19:29:25 +0000890 }
891 if (RESULT_ERROR == result) {
892 Reset(); // Don't allow further DecodeChunk calls
893 return false;
894 }
895 unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
896 parseable_chunk.UnparsedSize());
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000897 AppendNewOutputText(output_string);
openvcdiff311c7142008-08-26 19:29:25 +0000898 return true;
899}
900
901// Finishes decoding after all data has been received. Returns true
902// if decoding of the entire stream was successful.
903bool VCDiffStreamingDecoderImpl::FinishDecoding() {
904 bool success = true;
905 if (!start_decoding_was_called_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000906 VCD_WARNING << "FinishDecoding() called before StartDecoding(),"
907 " or called after DecodeChunk() returned false"
908 << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000909 success = false;
910 } else if (!IsDecodingComplete()) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000911 VCD_ERROR << "FinishDecoding() called before parsing entire"
912 " delta file window" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000913 success = false;
914 }
915 // Reset the object state for the next decode operation
916 Reset();
917 return success;
918}
919
openvcdiff28db8072008-10-10 23:29:11 +0000920bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
921 size_t window_size) const {
922 if (window_size > maximum_target_window_size_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000923 VCD_ERROR << "Length of target window (" << window_size
924 << ") exceeds limit of " << maximum_target_window_size_
925 << " bytes" << VCD_ENDL;
openvcdiff28db8072008-10-10 23:29:11 +0000926 return true;
927 }
928 if (HasPlannedTargetFileSize()) {
929 // The logical expression to check would be:
930 //
openvcdiffbaf44ea2009-04-09 19:20:49 +0000931 // total_of_target_window_sizes_ + window_size > planned_target_file_size_
openvcdiff28db8072008-10-10 23:29:11 +0000932 //
933 // but the addition might cause an integer overflow if target_bytes_to_add
934 // is very large. So it is better to check target_bytes_to_add against
935 // the remaining planned target bytes.
936 size_t remaining_planned_target_file_size =
openvcdiffbaf44ea2009-04-09 19:20:49 +0000937 planned_target_file_size_ - total_of_target_window_sizes_;
openvcdiff28db8072008-10-10 23:29:11 +0000938 if (window_size > remaining_planned_target_file_size) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000939 VCD_ERROR << "Length of target window (" << window_size
940 << " bytes) plus previous windows ("
941 << total_of_target_window_sizes_
942 << " bytes) would exceed planned size of "
943 << planned_target_file_size_ << " bytes" << VCD_ENDL;
openvcdiff28db8072008-10-10 23:29:11 +0000944 return true;
945 }
946 }
947 size_t remaining_maximum_target_bytes =
openvcdiffbaf44ea2009-04-09 19:20:49 +0000948 maximum_target_file_size_ - total_of_target_window_sizes_;
openvcdiff28db8072008-10-10 23:29:11 +0000949 if (window_size > remaining_maximum_target_bytes) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000950 VCD_ERROR << "Length of target window (" << window_size
951 << " bytes) plus previous windows ("
952 << total_of_target_window_sizes_
953 << " bytes) would exceed maximum target file size of "
954 << maximum_target_file_size_ << " bytes" << VCD_ENDL;
openvcdiff28db8072008-10-10 23:29:11 +0000955 return true;
956 }
957 return false;
958}
959
openvcdiff311c7142008-08-26 19:29:25 +0000960// *** Methods for VCDiffDeltaFileWindow
961
openvcdiff311c7142008-08-26 19:29:25 +0000962void VCDiffDeltaFileWindow::Reset() {
963 found_header_ = false;
964
965 // Mark the start of the current target window.
966 target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
967 target_window_length_ = 0;
968
969 source_segment_ptr_ = NULL;
970 source_segment_length_ = 0;
971
972 instructions_and_sizes_.Invalidate();
973 data_for_add_and_run_.Invalidate();
974 addresses_for_copy_.Invalidate();
975
976 interleaved_bytes_expected_ = 0;
977
978 has_checksum_ = false;
979 expected_checksum_ = 0;
openvcdiff311c7142008-08-26 19:29:25 +0000980}
981
982VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
983 VCDiffHeaderParser* header_parser) {
984 size_t add_and_run_data_length = 0;
985 size_t instructions_and_sizes_length = 0;
986 size_t addresses_length = 0;
987 if (!header_parser->ParseSectionLengths(has_checksum_,
988 &add_and_run_data_length,
989 &instructions_and_sizes_length,
990 &addresses_length,
991 &expected_checksum_)) {
992 return header_parser->GetResult();
993 }
994 if (parent_->AllowInterleaved() &&
995 (add_and_run_data_length == 0) &&
996 (addresses_length == 0)) {
997 // The interleaved format is being used.
998 interleaved_bytes_expected_ =
999 static_cast<int>(instructions_and_sizes_length);
1000 UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
1001 header_parser->End());
1002 } else {
1003 // If interleaved format is not used, then the whole window contents
1004 // must be available before decoding can begin. If only part of
1005 // the current window is available, then report end of data
1006 // and re-parse the whole header when DecodeChunk() is called again.
1007 if (header_parser->UnparsedSize() < (add_and_run_data_length +
1008 instructions_and_sizes_length +
1009 addresses_length)) {
1010 return RESULT_END_OF_DATA;
1011 }
1012 data_for_add_and_run_.Init(header_parser->UnparsedData(),
1013 add_and_run_data_length);
1014 instructions_and_sizes_.Init(data_for_add_and_run_.End(),
1015 instructions_and_sizes_length);
1016 addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
1017 if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001018 VCD_ERROR << "The end of the instructions section "
1019 "does not match the end of the delta window" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001020 return RESULT_ERROR;
1021 }
1022 }
1023 reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
1024 instructions_and_sizes_.End());
1025 return RESULT_SUCCESS;
1026}
1027
1028// Here are the elements of the delta window header to be parsed,
1029// from section 4 of the RFC:
1030//
1031// Window1
1032// Win_Indicator - byte
1033// [Source segment size] - integer
1034// [Source segment position] - integer
1035// The delta encoding of the target window
1036// Length of the delta encoding - integer
1037// The delta encoding
1038// Size of the target window - integer
1039// Delta_Indicator - byte
1040// Length of data for ADDs and RUNs - integer
1041// Length of instructions and sizes - integer
1042// Length of addresses for COPYs - integer
1043// Data section for ADDs and RUNs - array of bytes
1044// Instructions and sizes section - array of bytes
1045// Addresses section for COPYs - array of bytes
1046//
1047VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
1048 ParseableChunk* parseable_chunk) {
openvcdiff@gmail.comf1dd9332009-10-09 22:40:32 +00001049 std::string* decoded_target = parent_->decoded_target();
openvcdiff311c7142008-08-26 19:29:25 +00001050 VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
1051 parseable_chunk->End());
1052 size_t source_segment_position = 0;
1053 unsigned char win_indicator = 0;
1054 if (!header_parser.ParseWinIndicatorAndSourceSegment(
1055 parent_->dictionary_size(),
1056 decoded_target->size(),
openvcdiffbaf44ea2009-04-09 19:20:49 +00001057 parent_->allow_vcd_target(),
openvcdiff311c7142008-08-26 19:29:25 +00001058 &win_indicator,
1059 &source_segment_length_,
1060 &source_segment_position)) {
1061 return header_parser.GetResult();
1062 }
1063 has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
1064 if (!header_parser.ParseWindowLengths(&target_window_length_)) {
1065 return header_parser.GetResult();
1066 }
openvcdiff28db8072008-10-10 23:29:11 +00001067 if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
1068 // An error has been logged by TargetWindowWouldExceedSizeLimits().
openvcdiff311c7142008-08-26 19:29:25 +00001069 return RESULT_ERROR;
1070 }
1071 header_parser.ParseDeltaIndicator();
1072 VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
1073 if (RESULT_SUCCESS != setup_return_code) {
1074 return setup_return_code;
1075 }
1076 // Reserve enough space in the output string for the current target window.
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001077 const size_t wanted_capacity =
1078 target_window_start_pos_ + target_window_length_;
1079 if (decoded_target->capacity() < wanted_capacity) {
1080 decoded_target->reserve(wanted_capacity);
1081 }
openvcdiff311c7142008-08-26 19:29:25 +00001082 // Get a pointer to the start of the source segment.
1083 if (win_indicator & VCD_SOURCE) {
1084 source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
1085 } else if (win_indicator & VCD_TARGET) {
1086 // This assignment must happen after the reserve().
1087 // decoded_target should not be resized again while processing this window,
1088 // so source_segment_ptr_ should remain valid.
1089 source_segment_ptr_ = decoded_target->data() + source_segment_position;
1090 }
1091 // The whole window header was found and parsed successfully.
1092 found_header_ = true;
1093 parseable_chunk->Advance(header_parser.ParsedSize());
openvcdiffbaf44ea2009-04-09 19:20:49 +00001094 parent_->AddToTotalTargetWindowSize(target_window_length_);
openvcdiff311c7142008-08-26 19:29:25 +00001095 return RESULT_SUCCESS;
1096}
1097
1098void VCDiffDeltaFileWindow::UpdateInstructionPointer(
1099 ParseableChunk* parseable_chunk) {
1100 if (IsInterleaved()) {
1101 size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
1102 // Reduce expected instruction segment length by bytes parsed
1103 interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
1104 parseable_chunk->Advance(bytes_parsed);
1105 }
1106}
1107
openvcdiffbaf44ea2009-04-09 19:20:49 +00001108inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
openvcdiff311c7142008-08-26 19:29:25 +00001109 return parent_->decoded_target()->size() - target_window_start_pos_;
1110}
1111
1112size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
1113 if (target_window_length_ == 0) {
1114 // There is no window being decoded at present
1115 return 0;
1116 } else {
1117 return target_window_length_ - TargetBytesDecoded();
1118 }
1119}
1120
openvcdiffbaf44ea2009-04-09 19:20:49 +00001121inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
openvcdiff311c7142008-08-26 19:29:25 +00001122 parent_->decoded_target()->append(data, size);
openvcdiff311c7142008-08-26 19:29:25 +00001123}
1124
openvcdiffbaf44ea2009-04-09 19:20:49 +00001125inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
openvcdiff311c7142008-08-26 19:29:25 +00001126 parent_->decoded_target()->append(size, byte);
openvcdiff311c7142008-08-26 19:29:25 +00001127}
1128
1129VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
1130 if (size > data_for_add_and_run_.UnparsedSize()) {
1131 return RESULT_END_OF_DATA;
1132 }
1133 // Write the next "size" data bytes
openvcdiffbaf44ea2009-04-09 19:20:49 +00001134 CopyBytes(data_for_add_and_run_.UnparsedData(), size);
openvcdiff311c7142008-08-26 19:29:25 +00001135 data_for_add_and_run_.Advance(size);
1136 return RESULT_SUCCESS;
1137}
1138
1139VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
1140 if (data_for_add_and_run_.Empty()) {
1141 return RESULT_END_OF_DATA;
1142 }
1143 // Write "size" copies of the next data byte
1144 RunByte(*data_for_add_and_run_.UnparsedData(), size);
1145 data_for_add_and_run_.Advance(1);
1146 return RESULT_SUCCESS;
1147}
1148
1149VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
1150 unsigned char mode) {
1151 // Keep track of the number of target bytes decoded as a local variable
1152 // to avoid recalculating it each time it is needed.
1153 size_t target_bytes_decoded = TargetBytesDecoded();
1154 const VCDAddress here_address =
1155 static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
1156 const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
1157 here_address,
1158 mode,
1159 addresses_for_copy_.UnparsedDataAddr(),
1160 addresses_for_copy_.End());
1161 switch (decoded_address) {
1162 case RESULT_ERROR:
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001163 VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001164 return RESULT_ERROR;
1165 case RESULT_END_OF_DATA:
1166 return RESULT_END_OF_DATA;
1167 default:
1168 if ((decoded_address < 0) || (decoded_address > here_address)) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001169 VCD_DFATAL << "Internal error: unexpected address " << decoded_address
1170 << " returned from DecodeAddress, with here_address = "
1171 << here_address << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001172 return RESULT_ERROR;
1173 }
1174 break;
1175 }
1176 size_t address = static_cast<size_t>(decoded_address);
1177 if ((address + size) <= source_segment_length_) {
1178 // Copy all data from source segment
openvcdiffbaf44ea2009-04-09 19:20:49 +00001179 CopyBytes(&source_segment_ptr_[address], size);
openvcdiff311c7142008-08-26 19:29:25 +00001180 return RESULT_SUCCESS;
1181 }
1182 // Copy some data from target window...
1183 if (address < source_segment_length_) {
1184 // ... plus some data from source segment
1185 const size_t partial_copy_size = source_segment_length_ - address;
openvcdiffbaf44ea2009-04-09 19:20:49 +00001186 CopyBytes(&source_segment_ptr_[address], partial_copy_size);
openvcdiff311c7142008-08-26 19:29:25 +00001187 target_bytes_decoded += partial_copy_size;
1188 address += partial_copy_size;
1189 size -= partial_copy_size;
1190 }
1191 address -= source_segment_length_;
1192 // address is now based at start of target window
1193 const char* const target_segment_ptr = parent_->decoded_target()->data() +
1194 target_window_start_pos_;
1195 while (size > (target_bytes_decoded - address)) {
1196 // Recursive copy that extends into the yet-to-be-copied target data
1197 const size_t partial_copy_size = target_bytes_decoded - address;
openvcdiffbaf44ea2009-04-09 19:20:49 +00001198 CopyBytes(&target_segment_ptr[address], partial_copy_size);
openvcdiff311c7142008-08-26 19:29:25 +00001199 target_bytes_decoded += partial_copy_size;
1200 address += partial_copy_size;
1201 size -= partial_copy_size;
1202 }
openvcdiffbaf44ea2009-04-09 19:20:49 +00001203 CopyBytes(&target_segment_ptr[address], size);
openvcdiff311c7142008-08-26 19:29:25 +00001204 return RESULT_SUCCESS;
1205}
1206
1207int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
1208 if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
1209 != parseable_chunk->UnparsedData())) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001210 VCD_DFATAL << "Internal error: interleaved format is used, but the"
1211 " input pointer does not point to the instructions section"
1212 << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001213 return RESULT_ERROR;
1214 }
1215 while (TargetBytesDecoded() < target_window_length_) {
1216 int32_t decoded_size = VCD_INSTRUCTION_ERROR;
1217 unsigned char mode = 0;
1218 VCDiffInstructionType instruction =
1219 reader_.GetNextInstruction(&decoded_size, &mode);
1220 switch (instruction) {
1221 case VCD_INSTRUCTION_END_OF_DATA:
1222 UpdateInstructionPointer(parseable_chunk);
1223 return RESULT_END_OF_DATA;
1224 case VCD_INSTRUCTION_ERROR:
1225 return RESULT_ERROR;
1226 default:
1227 break;
1228 }
1229 const size_t size = static_cast<size_t>(decoded_size);
1230 // The value of "size" itself could be enormous (say, INT32_MAX)
1231 // so check it individually against the limit to protect against
1232 // overflow when adding it to something else.
1233 if ((size > target_window_length_) ||
1234 ((size + TargetBytesDecoded()) > target_window_length_)) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001235 VCD_ERROR << VCDiffInstructionName(instruction)
1236 << " with size " << size
1237 << " plus existing " << TargetBytesDecoded()
1238 << " bytes of target data exceeds length of target"
1239 " window (" << target_window_length_ << " bytes)"
1240 << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001241 return RESULT_ERROR;
1242 }
1243 VCDiffResult result = RESULT_SUCCESS;
1244 switch (instruction) {
1245 case VCD_ADD:
1246 result = DecodeAdd(size);
1247 break;
1248 case VCD_RUN:
1249 result = DecodeRun(size);
1250 break;
1251 case VCD_COPY:
1252 result = DecodeCopy(size, mode);
1253 break;
1254 default:
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001255 VCD_DFATAL << "Unexpected instruction type " << instruction
1256 << "in opcode stream" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001257 return RESULT_ERROR;
1258 }
1259 switch (result) {
1260 case RESULT_END_OF_DATA:
1261 reader_.UnGetInstruction();
1262 UpdateInstructionPointer(parseable_chunk);
1263 return RESULT_END_OF_DATA;
1264 case RESULT_ERROR:
1265 return RESULT_ERROR;
1266 case RESULT_SUCCESS:
1267 break;
1268 }
1269 }
1270 if (TargetBytesDecoded() != target_window_length_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001271 VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded()
1272 << " bytes) does not match expected size ("
1273 << target_window_length_ << " bytes)" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001274 return RESULT_ERROR;
1275 }
1276 const char* const target_window_start =
1277 parent_->decoded_target()->data() + target_window_start_pos_;
1278 if (has_checksum_ &&
1279 (ComputeAdler32(target_window_start, target_window_length_)
1280 != expected_checksum_)) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001281 VCD_ERROR << "Target data does not match checksum; this could mean "
1282 "that the wrong dictionary was used" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001283 return RESULT_ERROR;
1284 }
1285 if (!instructions_and_sizes_.Empty()) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001286 VCD_ERROR << "Excess instructions and sizes left over "
1287 "after decoding target window" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001288 return RESULT_ERROR;
1289 }
1290 if (!IsInterleaved()) {
1291 // Standard format is being used, with three separate sections for the
1292 // instructions, data, and addresses.
1293 if (!data_for_add_and_run_.Empty()) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001294 VCD_ERROR << "Excess ADD/RUN data left over "
1295 "after decoding target window" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001296 return RESULT_ERROR;
1297 }
1298 if (!addresses_for_copy_.Empty()) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001299 VCD_ERROR << "Excess COPY addresses left over "
1300 "after decoding target window" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001301 return RESULT_ERROR;
1302 }
1303 // Reached the end of the window. Update the ParseableChunk to point to the
1304 // end of the addresses section, which is the last section in the window.
1305 parseable_chunk->SetPosition(addresses_for_copy_.End());
1306 } else {
openvcdiffbaf44ea2009-04-09 19:20:49 +00001307 // Interleaved format is being used.
openvcdiff311c7142008-08-26 19:29:25 +00001308 UpdateInstructionPointer(parseable_chunk);
1309 }
1310 return RESULT_SUCCESS;
1311}
1312
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001313VCDiffResult VCDiffDeltaFileWindow::DecodeWindow(
openvcdiff311c7142008-08-26 19:29:25 +00001314 ParseableChunk* parseable_chunk) {
1315 if (!parent_) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001316 VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() "
1317 "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +00001318 return RESULT_ERROR;
1319 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001320 if (!found_header_) {
1321 switch (ReadHeader(parseable_chunk)) {
openvcdiff311c7142008-08-26 19:29:25 +00001322 case RESULT_END_OF_DATA:
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001323 return RESULT_END_OF_DATA;
openvcdiff311c7142008-08-26 19:29:25 +00001324 case RESULT_ERROR:
1325 return RESULT_ERROR;
1326 default:
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001327 // Reset address cache between windows (RFC section 5.1)
1328 if (!parent_->addr_cache()->Init()) {
1329 VCD_DFATAL << "Error initializing address cache" << VCD_ENDL;
1330 return RESULT_ERROR;
1331 }
openvcdiff311c7142008-08-26 19:29:25 +00001332 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001333 } else {
1334 // We are resuming a window that was partially decoded before a
1335 // RESULT_END_OF_DATA was returned. This can only happen on the first
1336 // loop iteration, and only if the interleaved format is enabled and used.
1337 if (!IsInterleaved()) {
1338 VCD_DFATAL << "Internal error: Resumed decoding of a delta file window"
1339 " when interleaved format is not being used" << VCD_ENDL;
1340 return RESULT_ERROR;
openvcdiff311c7142008-08-26 19:29:25 +00001341 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001342 UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
1343 parseable_chunk->End());
1344 reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
1345 instructions_and_sizes_.End());
openvcdiff311c7142008-08-26 19:29:25 +00001346 }
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +00001347 switch (DecodeBody(parseable_chunk)) {
1348 case RESULT_END_OF_DATA:
1349 if (MoreDataExpected()) {
1350 return RESULT_END_OF_DATA;
1351 } else {
1352 VCD_ERROR << "End of data reached while decoding VCDIFF delta file"
1353 << VCD_ENDL;
1354 // fall through to RESULT_ERROR case
1355 }
1356 case RESULT_ERROR:
1357 return RESULT_ERROR;
1358 default:
1359 break; // DecodeBody succeeded
1360 }
1361 // Get ready to read a new delta window
1362 Reset();
openvcdiff311c7142008-08-26 19:29:25 +00001363 return RESULT_SUCCESS;
1364}
1365
1366// *** Methods for VCDiffStreamingDecoder
1367
1368VCDiffStreamingDecoder::VCDiffStreamingDecoder()
1369: impl_(new VCDiffStreamingDecoderImpl) { }
1370
1371VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
1372
1373void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1374 impl_->StartDecoding(source, len);
1375}
1376
1377bool VCDiffStreamingDecoder::DecodeChunkToInterface(
1378 const char* data,
1379 size_t len,
1380 OutputStringInterface* output_string) {
1381 return impl_->DecodeChunk(data, len, output_string);
1382}
1383
1384bool VCDiffStreamingDecoder::FinishDecoding() {
1385 return impl_->FinishDecoding();
1386}
1387
openvcdiff28db8072008-10-10 23:29:11 +00001388bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
1389 size_t new_maximum_target_file_size) {
1390 return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
1391}
1392
1393bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
1394 size_t new_maximum_target_window_size) {
1395 return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
1396}
1397
openvcdiffbaf44ea2009-04-09 19:20:49 +00001398void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
1399 impl_->SetAllowVcdTarget(allow_vcd_target);
openvcdiff311c7142008-08-26 19:29:25 +00001400}
1401
1402bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
1403 size_t dictionary_size,
1404 const string& encoding,
1405 OutputStringInterface* target) {
1406 target->clear();
1407 decoder_.StartDecoding(dictionary_ptr, dictionary_size);
1408 if (!decoder_.DecodeChunkToInterface(encoding.data(),
1409 encoding.size(),
1410 target)) {
1411 return false;
1412 }
1413 return decoder_.FinishDecoding();
1414}
1415
1416} // namespace open_vcdiff