blob: 02afa872ae2fbefad27aa6a93209add433dcd76e [file] [log] [blame]
openvcdiff311c7142008-08-26 19:29:25 +00001// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Implements a Decoder for the format described in
17// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19//
20// The RFC describes the possibility of using a secondary compressor
21// to further reduce the size of each section of the VCDIFF output.
22// That feature is not supported in this implementation of the encoder
23// and decoder.
24// No secondary compressor types have been publicly registered with
25// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26// in the more than five years since the registry was created, so there
27// is no standard set of compressor IDs which would be generated by other
28// encoders or accepted by other decoders.
29
30#include <config.h>
31#include "google/vcdecoder.h"
openvcdiff28db8072008-10-10 23:29:11 +000032#include <stddef.h> // size_t, ptrdiff_t
openvcdiff311c7142008-08-26 19:29:25 +000033#include <stdint.h> // int32_t
openvcdiff28db8072008-10-10 23:29:11 +000034#include <string.h> // memcpy, memset
openvcdiff311c7142008-08-26 19:29:25 +000035#include <memory> // auto_ptr
36#include <string>
37#include "addrcache.h"
38#include "checksum.h"
39#include "codetable.h"
40#include "decodetable.h"
41#include "headerparser.h"
42#include "logging.h"
43#include "google/output_string.h"
44#include "varint_bigendian.h"
45#include "vcdiff_defs.h"
46
47namespace open_vcdiff {
48
49namespace {
50
openvcdiff311c7142008-08-26 19:29:25 +000051enum VCDiffAnnotationType {
52 VCD_ANNOTATION_LITERAL,
53 VCD_ANNOTATION_DMATCH,
54 VCD_ANNOTATION_BMATCH
55};
56
57static const char* kAnnotationStartTags[] = {
58 "<literal>",
59 "<dmatch>",
60 "<bmatch>"
61};
62
63static const char* kAnnotationEndTags[] = {
64 "</literal>",
65 "</dmatch>",
66 "</bmatch>"
67};
68
69} // anonymous namespace
70
71// This class is used to parse delta file windows as described
72// in RFC sections 4.2 and 4.3. Its methods are not thread-safe.
73//
74// Here is the window format copied from the RFC:
75//
76// Window1
77// Win_Indicator - byte
78// [Source segment size] - integer
79// [Source segment position] - integer
80// The delta encoding of the target window
81// Length of the delta encoding - integer
82// The delta encoding
83// Size of the target window - integer
84// Delta_Indicator - byte
85// Length of data for ADDs and RUNs - integer
86// Length of instructions and sizes - integer
87// Length of addresses for COPYs - integer
88// Data section for ADDs and RUNs - array of bytes
89// Instructions and sizes section - array of bytes
90// Addresses section for COPYs - array of bytes
91// Window2
92// ...
93//
94// Sample usage:
95//
96// VCDiffDeltaFileWindow delta_window_;
97// delta_window_.Init(parent);
98// ParseableChunk parseable_chunk(input_buffer,
99// input_size,
100// leftover_unencoded_bytes);
101// switch (delta_window_.DecodeWindows(&parseable_chunk)) {
102// case RESULT_END_OF_DATA:
103// <Read more input and retry DecodeWindows later.>
104// case RESULT_ERROR:
105// <Handle error case. An error log message has already been generated.>
106// }
107//
108// DecodeWindows consumes as many windows from the input as it can. It only
109// needs to be placed within a loop if the loop is used to obtain more input
110// (delta file) data.
111//
112class VCDiffDeltaFileWindow {
113 public:
openvcdiff28db8072008-10-10 23:29:11 +0000114#ifndef VCDIFF_HAS_GLOBAL_STRING
115 typedef std::string string;
116#endif // !VCDIFF_HAS_GLOBAL_STRING
117
openvcdiff311c7142008-08-26 19:29:25 +0000118 VCDiffDeltaFileWindow();
119 ~VCDiffDeltaFileWindow();
120
121 // Init() should be called immediately after constructing the
122 // VCDiffDeltaFileWindow(). It must be called before DecodeWindows() can be
123 // invoked, or an error will occur.
124 void Init(VCDiffStreamingDecoderImpl* parent);
125
126 // Resets the pointers to the data sections in the current window.
127 void Reset();
128
129 bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
130 unsigned char max_mode) {
131 return reader_.UseCodeTable(code_table_data, max_mode);
132 }
133
134 // Decodes as many delta windows as possible using the input data from
135 // *parseable_chunk. Appends the decoded target windows to
136 // parent_->decoded_target(). If annotated output is enabled, appends
137 // annotated output to parent_->annotated_output(). Returns RESULT_SUCCESS on
138 // success, or RESULT_END_OF_DATA if the end of input was reached before the
139 // entire window could be decoded and more input is expected (only possible if
140 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
141 // decoding. In the RESULT_ERROR case, the value of parseable_chunk->pointer_
142 // is undefined; otherwise, parseable_chunk->Advance() is called to point to
143 // the input data position just after the data that has been decoded.
144 //
openvcdiff28db8072008-10-10 23:29:11 +0000145 // If planned_target_file_size is not set to kUnlimitedBytes, then the decoder
openvcdiff311c7142008-08-26 19:29:25 +0000146 // expects *exactly* this number of target bytes to be decoded from one or
147 // more delta file windows. If this number is met exactly after finishing a
148 // delta window, this function will return RESULT_SUCCESS without processing
149 // any more bytes from data_pointer. If this number is exceeded while
150 // decoding a window, but was not met before starting that window,
151 // then RESULT_ERROR will be returned.
152 //
153 VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk);
154
155 bool FoundWindowHeader() const {
156 return found_header_;
157 }
158
159 bool MoreDataExpected() const {
160 // When parsing an interleaved-format delta file,
161 // every time DecodeBody() exits, interleaved_bytes_expected_
162 // will be decremented by the number of bytes parsed. If it
163 // reaches zero, then there is no more data expected because
164 // the size of the interleaved section (given in the window
165 // header) has been reached.
166 return IsInterleaved() && (interleaved_bytes_expected_ > 0);
167 }
168
169 // Returns the number of bytes remaining to be decoded in the target window.
170 // If not in the process of decoding a window, returns 0.
171 size_t TargetBytesRemaining();
172
173 void EnableAnnotatedOutput() {
174 if (!annotated_output_.get()) {
175 annotated_output_.reset(new string);
176 }
177 }
178
179 void DisableAnnotatedOutput() {
180 annotated_output_.reset(NULL);
181 }
182
183 private:
184 // Reads the header of the window section as described in RFC sections 4.2 and
185 // 4.3, up to and including the value "Length of addresses for COPYs". If the
186 // entire header is found, this function sets up the DeltaWindowSections
187 // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
188 // that the decoder can begin decoding the opcodes in these sections. Returns
189 // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
190 // available data was reached before the entire header could be read. (The
191 // latter may be an error condition if there is no more data available.)
192 // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
193 // parsed header.
194 //
195 VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
196
197 // After the window header has been parsed as far as the Delta_Indicator,
198 // this function is called to parse the following delta window header fields:
199 //
200 // Length of data for ADDs and RUNs - integer (VarintBE format)
201 // Length of instructions and sizes - integer (VarintBE format)
202 // Length of addresses for COPYs - integer (VarintBE format)
203 //
204 // If has_checksum_ is true, it also looks for the following element:
205 //
206 // Adler32 checksum - unsigned 32-bit integer (VarintBE format)
207 //
208 // It sets up the DeltaWindowSections instructions_and_sizes_,
209 // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format
210 // is being used, all three sections will include the entire window body; if
211 // the standard format is used, three non-overlapping window sections will be
212 // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
213 // if standard format is being used and there is not enough input data to read
214 // the entire window body. Otherwise, returns RESULT_SUCCESS.
215 VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
216
217 // Decodes the body of the window section as described in RFC sections 4.3,
218 // including the sections "Data section for ADDs and RUNs", "Instructions
219 // and sizes section", and "Addresses section for COPYs". These sections
220 // must already have been set up by ReadWindowHeader(). Returns a
221 // non-negative value on success, or RESULT_END_OF_DATA if the end of input
222 // was reached before the entire window could be decoded (only possible if
223 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
224 // decoding. Appends as much of the decoded target window as possible to
225 // parent->decoded_target().
226 //
227 int DecodeBody(ParseableChunk* parseable_chunk);
228
229 // Returns the number of bytes already decoded into the target window.
230 size_t TargetBytesDecoded();
231
232 // Decodes a single ADD instruction, updating parent_->decoded_target_.
233 VCDiffResult DecodeAdd(size_t size);
234
235 // Decodes a single RUN instruction, updating parent_->decoded_target_.
236 VCDiffResult DecodeRun(size_t size);
237
238 // Decodes a single COPY instruction, updating parent_->decoded_target_.
239 VCDiffResult DecodeCopy(size_t size, unsigned char mode);
240
241 // When using the interleaved format, this function is called both on parsing
242 // the header and on resuming after a RESULT_END_OF_DATA was returned from a
243 // previous call to DecodeBody(). It sets up all three section pointers to
244 // reference the same interleaved stream of instructions, sizes, addresses,
245 // and data. These pointers must be reset every time that work resumes on a
246 // delta window, because the input data string may have been changed or
247 // resized since DecodeBody() last returned.
248 void UpdateInterleavedSectionPointers(const char* data_pos,
249 const char* data_end) {
250 const ptrdiff_t available_data = data_end - data_pos;
251 // Don't read past the end of currently-available data
252 if (available_data > interleaved_bytes_expected_) {
253 instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
254 } else {
255 instructions_and_sizes_.Init(data_pos, available_data);
256 }
257 data_for_add_and_run_.Init(&instructions_and_sizes_);
258 addresses_for_copy_.Init(&instructions_and_sizes_);
259 }
260
261 // If true, the interleaved format described in AllowInterleaved() is used
262 // for the current delta file. Only valid after ReadWindowHeader() has been
263 // called and returned a positive number (i.e., the whole header was parsed),
264 // but before the window has finished decoding.
265 //
266 bool IsInterleaved() const {
267 // If the sections are interleaved, both addresses_for_copy_ and
268 // data_for_add_and_run_ should point at instructions_and_sizes_.
269 return !addresses_for_copy_.IsOwned();
270 }
271
272 // Executes a single COPY or ADD instruction, appending data to
273 // parent_->decoded_target().
274 void CopyBytes(const char* data,
275 size_t size,
276 VCDiffAnnotationType annotation_type);
277
278 // Executes a single RUN instruction, appending data to
279 // parent_->decoded_target().
280 void RunByte(unsigned char byte, size_t size);
281
282 void AppendAnnotatedOutput(string* annotated_output) {
283 if (annotated_output_.get()) {
284 annotated_output->append(*annotated_output_.get());
285 }
286 }
287
288 // Advance *parseable_chunk to point to the current position in the
289 // instructions/sizes section. If interleaved format is used, then
290 // decrement the number of expected bytes in the instructions/sizes section
291 // by the number of instruction/size bytes parsed.
292 void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
293
294 // The parent object which was passed to Init().
295 VCDiffStreamingDecoderImpl* parent_;
296
297 // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
298 // has been called and succeeded in parsing the delta window header, but the
299 // entire window has not yet been decoded.
300 bool found_header_;
301
302 // Contents and length of the current source window. source_segment_ptr_
303 // will be non-NULL if (a) the window section header for the current window
304 // has been read, but the window has not yet finished decoding; or
305 // (b) the window did not specify a source segment.
306 const char* source_segment_ptr_;
307 size_t source_segment_length_;
308
309 // The delta encoding window sections as defined in RFC section 4.3.
310 // The pointer for each section will be incremented as data is consumed and
311 // decoded from that section. If the interleaved format is used,
312 // data_for_add_and_run_ and addresses_for_copy_ will both point to
313 // instructions_and_sizes_; otherwise, they will be separate data sections.
314 //
315 DeltaWindowSection instructions_and_sizes_;
316 DeltaWindowSection data_for_add_and_run_;
317 DeltaWindowSection addresses_for_copy_;
318
319 // The expected bytes left to decode in instructions_and_sizes_. Only used
320 // for the interleaved format.
321 int interleaved_bytes_expected_;
322
323 // The expected length of the target window once it has been decoded.
324 size_t target_window_length_;
325
326 // The index in decoded_target at which the first byte of the current
327 // target window was/will be written.
328 size_t target_window_start_pos_;
329
330 // If has_checksum_ is true, then expected_checksum_ contains an Adler32
331 // checksum of the target window data. This is an extension included in the
332 // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
333 bool has_checksum_;
334 VCDChecksum expected_checksum_;
335
336 VCDiffCodeTableReader reader_;
337
338 // This value is initialized to NULL, which means that annotated output is
339 // disabled. If EnableAnnotatedOutput() is called, it will be set to point
340 // to a new string object, and annotated output will be gathered into that
341 // string.
342 std::auto_ptr<string> annotated_output_;
343
344 // Making these private avoids implicit copy constructor & assignment operator
345 VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT
346 void operator=(const VCDiffDeltaFileWindow&);
347};
348
openvcdiff28db8072008-10-10 23:29:11 +0000349// *** Inline methods for VCDiffDeltaFileWindow
350
351inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
352 Reset();
353}
354
355inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
356
357inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
358 parent_ = parent;
359}
360
openvcdiff311c7142008-08-26 19:29:25 +0000361class VCDiffStreamingDecoderImpl {
362 public:
openvcdiff28db8072008-10-10 23:29:11 +0000363#ifndef VCDIFF_HAS_GLOBAL_STRING
364 typedef std::string string;
365#endif // !VCDIFF_HAS_GLOBAL_STRING
366
367 // The default maximum target file size (and target window size) if
368 // SetMaximumTargetFileSize() is not called.
369 static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB
370
371 // The largest value that can be passed to SetMaximumTargetFileSize() or
372 // SetMaximumTargetWindowSize(). Using a larger value will result in an
373 // error.
374 static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX
375
376 // A constant that is the default value for planned_target_file_size_,
openvcdiff311c7142008-08-26 19:29:25 +0000377 // indicating that the decoder does not have an expected length
378 // for the target data.
379 static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
380
381 VCDiffStreamingDecoderImpl();
382 ~VCDiffStreamingDecoderImpl();
383
384 // Resets all member variables to their initial states.
385 void Reset();
386
387 // These functions are identical to their counterparts
388 // in VCDiffStreamingDecoder.
389 //
390 void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
391
392 bool DecodeChunk(const char* data,
393 size_t len,
394 OutputStringInterface* output_string);
395
396 bool FinishDecoding();
397
398 // If true, the version of VCDIFF used in the current delta file allows
399 // for the interleaved format, in which instructions, addresses and data
400 // are all sent interleaved in the instructions section of each window
401 // rather than being sent in separate sections. This is not part of
402 // the VCDIFF draft standard, so we've defined a special version code
403 // 'S' which implies that this feature is available. Even if interleaving
404 // is supported, it is not mandatory; interleaved format will be implied
405 // if the address and data sections are both zero-length.
406 //
407 bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
408
409 // If true, the version of VCDIFF used in the current delta file allows
410 // each delta window to contain an Adler32 checksum of the target window data.
411 // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
412 // this checksum will appear as a variable-length integer, just after the
413 // "length of addresses for COPYs" value and before the window data sections.
414 // It is possible for some windows in a delta file to use the checksum feature
415 // and for others not to use it (and leave the flag bit set to 0.)
416 // Just as with AllowInterleaved(), this extension is not part of the draft
417 // standard and is only available when the version code 'S' is specified.
418 //
419 bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
420
openvcdiff28db8072008-10-10 23:29:11 +0000421 bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
422 if (new_maximum_target_file_size > kTargetSizeLimit) {
423 LOG(ERROR) << "Specified maximum target file size "
424 << new_maximum_target_file_size << " exceeds limit of "
425 << kTargetSizeLimit << " bytes" << LOG_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000426 return false;
427 }
openvcdiff28db8072008-10-10 23:29:11 +0000428 maximum_target_file_size_ = new_maximum_target_file_size;
429 return true;
430 }
431
432 bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
433 if (new_maximum_target_window_size > kTargetSizeLimit) {
434 LOG(ERROR) << "Specified maximum target window size "
435 << new_maximum_target_window_size << " exceeds limit of "
436 << kTargetSizeLimit << " bytes" << LOG_ENDL;
437 return false;
438 }
439 maximum_target_window_size_ = new_maximum_target_window_size;
440 return true;
441 }
442
443 // See description of planned_target_file_size_, below.
444 bool HasPlannedTargetFileSize() const {
445 return planned_target_file_size_ != kUnlimitedBytes;
446 }
447
448 void SetPlannedTargetFileSize(size_t planned_target_file_size) {
449 planned_target_file_size_ = planned_target_file_size;
450 }
451
452 // Checks to see whether the decoded target data has reached its planned size.
453 bool ReachedPlannedTargetFileSize() const {
454 if (!HasPlannedTargetFileSize()) {
455 return false;
456 }
457 // The planned target file size should not have been exceeded.
458 // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
459 // each target window would not make the target file exceed that limit, and
openvcdiff311c7142008-08-26 19:29:25 +0000460 // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
461 // exceeds the advertised target window size.
openvcdiff28db8072008-10-10 23:29:11 +0000462 if (decoded_target_.size() > planned_target_file_size_) {
openvcdiff311c7142008-08-26 19:29:25 +0000463 LOG(DFATAL) << "Internal error: Decoded data size "
464 << decoded_target_.size()
openvcdiff28db8072008-10-10 23:29:11 +0000465 << " exceeds planned target file size "
466 << planned_target_file_size_ << LOG_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000467 return true;
468 }
openvcdiff28db8072008-10-10 23:29:11 +0000469 return decoded_target_.size() == planned_target_file_size_;
openvcdiff311c7142008-08-26 19:29:25 +0000470 }
471
472 // Checks to see whether adding a new target window of the specified size
openvcdiff28db8072008-10-10 23:29:11 +0000473 // would exceed the planned target file size, the maximum target file size,
474 // or the maximum target window size. If so, logs an error and returns true;
475 // otherwise, returns false.
476 bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
openvcdiff311c7142008-08-26 19:29:25 +0000477
478 // Returns the amount of input data passed to the last DecodeChunk()
479 // that was not consumed by the decoder. This is essential if
openvcdiff28db8072008-10-10 23:29:11 +0000480 // SetPlannedTargetFileSize() is being used, in order to preserve the
481 // remaining input data stream once the planned target file has been decoded.
openvcdiff311c7142008-08-26 19:29:25 +0000482 size_t GetUnconsumedDataSize() const {
483 return unparsed_bytes_.size();
484 }
485
486 // This function will return true if the decoder has parsed a complete delta
487 // file header plus zero or more delta file windows, with no data left over.
488 // It will also return true if no delta data at all was decoded. If these
489 // conditions are not met, then FinishDecoding() should not be called.
490 bool IsDecodingComplete() const {
491 if (!FoundFileHeader()) {
492 // No complete delta file header has been parsed yet. DecodeChunk()
493 // may have received some data that it hasn't yet parsed, in which case
494 // decoding is incomplete.
495 return unparsed_bytes_.empty();
496 } else if (custom_code_table_decoder_.get()) {
497 // The decoder is in the middle of parsing a custom code table.
498 return false;
499 } else if (delta_window_.FoundWindowHeader()) {
500 // The decoder is in the middle of parsing an interleaved format delta
501 // window.
502 return false;
openvcdiff28db8072008-10-10 23:29:11 +0000503 } else if (ReachedPlannedTargetFileSize()) {
504 // The decoder found exactly the planned number of bytes. In this case
openvcdiff311c7142008-08-26 19:29:25 +0000505 // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
506 // data after the end of the delta file.
507 return true;
508 } else {
509 // No complete delta file window has been parsed yet. DecodeChunk()
510 // may have received some data that it hasn't yet parsed, in which case
511 // decoding is incomplete.
512 return unparsed_bytes_.empty();
513 }
514 }
515
516 const char* dictionary_ptr() const { return dictionary_ptr_; }
517
518 size_t dictionary_size() const { return dictionary_size_; }
519
520 VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
521
522 string* decoded_target() { return &decoded_target_; }
523
524 string* annotated_output() { return &annotated_output_; }
525
526 // The variable that determines whether annotated output is enabled is
527 // delta_window_.annotated_output_. If that member is NULL, then the feature
528 // is disabled.
529 void EnableAnnotatedOutput() {
530 delta_window_.EnableAnnotatedOutput();
531 }
532
533 void DisableAnnotatedOutput() {
534 delta_window_.DisableAnnotatedOutput();
535 }
536
537 void GetAnnotatedOutput(OutputStringInterface* annotated_output) {
538 // We could use annotated_output->assign(), but that method is not defined
539 // for some output string types, so use clear() + append() to accomplish the
540 // same thing.
541 annotated_output->clear();
542 annotated_output->append(annotated_output_.data(),
543 annotated_output_.size());
544 }
545
546 private:
547 // Reads the VCDiff delta file header section as described in RFC section 4.1,
548 // except the custom code table data. Returns RESULT_ERROR if an error
549 // occurred, or RESULT_END_OF_DATA if the end of available data was reached
550 // before the entire header could be read. (The latter may be an error
551 // condition if there is no more data available.) Otherwise, advances
552 // data->position_ past the header and returns RESULT_SUCCESS.
553 //
554 VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
555
556 // Indicates whether or not the header has already been read.
557 bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
558
559 // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
560 // file header, this function parses the custom cache sizes and initializes
561 // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
562 // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an
563 // error occurred, or RESULT_END_OF_DATA if the end of available data was
564 // reached before the custom cache sizes could be read. Otherwise, returns
565 // the number of bytes read.
566 //
567 int InitCustomCodeTable(const char* data_start, const char* data_end);
568
569 // If a custom code table was specified in the header section that was parsed
570 // by ReadDeltaFileHeader(), this function makes a recursive call to another
571 // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
572 // custom code table is expected to be supplied as an embedded VCDIFF
573 // encoding that uses the standard code table. Returns RESULT_ERROR if an
574 // error occurs, or RESULT_END_OF_DATA if the end of available data was
575 // reached before the entire custom code table could be read. Otherwise,
576 // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
577 // custom code table. If the function returns RESULT_SUCCESS or
578 // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
579 //
580 VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
581
582 // Contents and length of the source (dictionary) data.
583 const char* dictionary_ptr_;
584 size_t dictionary_size_;
585
586 // This string will be used to store any unparsed bytes left over when
587 // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
588 // It will also be used to concatenate those unparsed bytes with the data
589 // supplied to the next call to DecodeChunk(), so that they appear in
590 // contiguous memory.
591 string unparsed_bytes_;
592
593 // The portion of the target file that has been decoded so far. This will be
594 // used to fill the output string for DecodeChunk(), and will also be used to
595 // execute COPY instructions that reference target data. Since the source
596 // window can come from a range of addresses in the previously decoded target
597 // data, the entire target file needs to be available to the decoder, not just
598 // the current target window.
599 string decoded_target_;
600
601 // The VCDIFF version byte (also known as "header4") from the
602 // delta file header.
603 unsigned char vcdiff_version_code_;
604
605 VCDiffDeltaFileWindow delta_window_;
606
607 std::auto_ptr<VCDiffAddressCache> addr_cache_;
608
609 // Will be NULL unless a custom code table has been defined.
610 std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
611
612 // Used to receive the decoded custom code table.
613 string custom_code_table_string_;
614
615 // If a custom code table is specified, it will be expressed
616 // as an embedded VCDIFF delta file which uses the default code table
617 // as the source file (dictionary). Use a child decoder object
618 // to decode that delta file.
619 std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
620
621 // If set, then the decoder is expecting *exactly* this number of
622 // target bytes to be decoded from one or more delta file windows.
623 // If this number is exceeded while decoding a window, but was not met
624 // before starting on that window, an error will be reported.
625 // If FinishDecoding() is called before this number is met, an error
626 // will also be reported. This feature is used for decoding the
627 // embedded code table data within a VCDIFF delta file; we want to
628 // stop processing the embedded data once the entire code table has
629 // been decoded, and treat the rest of the available data as part
630 // of the enclosing delta file.
openvcdiff28db8072008-10-10 23:29:11 +0000631 size_t planned_target_file_size_;
632
633 size_t maximum_target_file_size_;
634
635 size_t maximum_target_window_size_;
openvcdiff311c7142008-08-26 19:29:25 +0000636
637 // This string will always be empty until EnableAnnotatedOutput() is called,
638 // at which point it will start to accumulate annotated delta windows each
639 // time DecodeChunk() finishes a window. It will be cleared each time that
640 // StartDecoding() is called.
641 string annotated_output_;
642
643 // This value is used to ensure the correct order of calls to the interface
644 // functions, i.e., a single call to StartDecoding(), followed by zero or
645 // more calls to DecodeChunk(), followed by a single call to
646 // FinishDecoding().
647 bool start_decoding_was_called_;
648
649 // Making these private avoids implicit copy constructor & assignment operator
650 VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT
651 void operator=(const VCDiffStreamingDecoderImpl&);
652};
653
654// *** Methods for VCDiffStreamingDecoderImpl
655
openvcdiff28db8072008-10-10 23:29:11 +0000656const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
657const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
658
659VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
660 : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
661 maximum_target_window_size_(kDefaultMaximumTargetFileSize) {
openvcdiff311c7142008-08-26 19:29:25 +0000662 delta_window_.Init(this);
663 Reset();
664}
665
666// Reset() will delete the component objects without reallocating them.
667VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
668
669void VCDiffStreamingDecoderImpl::Reset() {
670 start_decoding_was_called_ = false;
671 dictionary_ptr_ = NULL;
672 dictionary_size_ = 0;
673 vcdiff_version_code_ = '\0';
openvcdiff28db8072008-10-10 23:29:11 +0000674 planned_target_file_size_ = kUnlimitedBytes;
openvcdiff311c7142008-08-26 19:29:25 +0000675 addr_cache_.reset();
676 custom_code_table_.reset();
677 custom_code_table_decoder_.reset();
678 delta_window_.Reset();
679}
680
681void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
682 size_t dictionary_size) {
683 if (start_decoding_was_called_) {
684 LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()"
685 << LOG_ENDL;
686 return;
687 }
688 unparsed_bytes_.clear();
689 decoded_target_.clear(); // delta_window_.Reset() depends on this
690 annotated_output_.clear();
691 Reset();
692 dictionary_ptr_ = dictionary_ptr;
693 dictionary_size_ = dictionary_size;
694 start_decoding_was_called_ = true;
695}
696
697// Reads the VCDiff delta file header section as described in RFC section 4.1:
698//
699// Header1 - byte = 0xD6 (ASCII 'V' | 0x80)
700// Header2 - byte = 0xC3 (ASCII 'C' | 0x80)
701// Header3 - byte = 0xC4 (ASCII 'D' | 0x80)
702// Header4 - byte
703// Hdr_Indicator - byte
704// [Secondary compressor ID] - byte
705// [Length of code table data] - integer
706// [Code table data]
707//
708// Initializes the code table and address cache objects. Returns RESULT_ERROR
709// if an error occurred, and RESULT_END_OF_DATA if the end of available data was
710// reached before the entire header could be read. (The latter may be an error
711// condition if there is no more data available.) Otherwise, returns
712// RESULT_SUCCESS, and removes the header bytes from the data string.
713//
714// It's relatively inefficient to expect this function to parse any number of
715// input bytes available, down to 1 byte, but it is necessary in case the input
716// is not a properly formatted VCDIFF delta file. If the entire input consists
717// of two bytes "12", then we should recognize that it does not match the
718// initial VCDIFF magic number "VCD" and report an error, rather than waiting
719// indefinitely for more input that will never arrive.
720//
721VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
722 ParseableChunk* data) {
723 if (FoundFileHeader()) {
724 return RESULT_SUCCESS;
725 }
726 size_t data_size = data->UnparsedSize();
727 const DeltaFileHeader* header =
728 reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
729 bool wrong_magic_number = false;
730 switch (data_size) {
731 // Verify only the bytes that are available.
732 default:
733 // Found header contents up to and including VCDIFF version
734 vcdiff_version_code_ = header->header4;
735 if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284)
736 (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol
737 LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL;
738 return RESULT_ERROR;
739 }
740 // fall through
741 case 3:
742 if (header->header3 != 0xC4) { // magic value 'D' | 0x80
743 wrong_magic_number = true;
744 }
745 // fall through
746 case 2:
747 if (header->header2 != 0xC3) { // magic value 'C' | 0x80
748 wrong_magic_number = true;
749 }
750 // fall through
751 case 1:
752 if (header->header1 != 0xD6) { // magic value 'V' | 0x80
753 wrong_magic_number = true;
754 }
755 // fall through
756 case 0:
757 if (wrong_magic_number) {
758 LOG(ERROR) << "Did not find VCDIFF header bytes; "
759 "input is not a VCDIFF delta file" << LOG_ENDL;
760 return RESULT_ERROR;
761 }
762 if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
763 }
764 // Secondary compressor not supported.
765 if (header->hdr_indicator & VCD_DECOMPRESS) {
766 LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL;
767 return RESULT_ERROR;
768 }
769 if (header->hdr_indicator & VCD_CODETABLE) {
770 int bytes_parsed = InitCustomCodeTable(
771 data->UnparsedData() + sizeof(DeltaFileHeader),
772 data->End());
773 switch (bytes_parsed) {
774 case RESULT_ERROR:
775 return RESULT_ERROR;
776 case RESULT_END_OF_DATA:
777 return RESULT_END_OF_DATA;
778 default:
779 data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
780 }
781 } else {
782 addr_cache_.reset(new VCDiffAddressCache);
783 // addr_cache_->Init() will be called
784 // from VCDiffStreamingDecoderImpl::DecodeChunk()
785 data->Advance(sizeof(DeltaFileHeader));
786 }
787 return RESULT_SUCCESS;
788}
789
790int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
791 const char* data_end) {
792 // A custom code table is being specified. Parse the variable-length
793 // cache sizes and begin parsing the encoded custom code table.
794 int32_t near_cache_size = 0, same_cache_size = 0;
795 VCDiffHeaderParser header_parser(data_start, data_end);
796 if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
797 return header_parser.GetResult();
798 }
799 if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
800 return header_parser.GetResult();
801 }
802 custom_code_table_.reset(new struct VCDiffCodeTableData);
803 memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
804 custom_code_table_string_.clear();
805 addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
806 // addr_cache_->Init() will be called
807 // from VCDiffStreamingDecoderImpl::DecodeChunk()
808
809 // If we reach this point (the start of the custom code table)
810 // without encountering a RESULT_END_OF_DATA condition, then we won't call
811 // ReadDeltaFileHeader() again for this delta file.
812 //
813 // Instantiate a recursive decoder to interpret the custom code table
814 // as a VCDIFF encoding of the default code table.
815 custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
816 custom_code_table_decoder_->StartDecoding(
817 reinterpret_cast<const char*>(
818 &VCDiffCodeTableData::kDefaultCodeTableData),
819 sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
openvcdiff28db8072008-10-10 23:29:11 +0000820 custom_code_table_decoder_->SetPlannedTargetFileSize(
821 sizeof(*custom_code_table_));
openvcdiff311c7142008-08-26 19:29:25 +0000822 return static_cast<int>(header_parser.ParsedSize());
823}
824
825VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
826 ParseableChunk* data) {
827 if (!custom_code_table_decoder_.get()) {
828 return RESULT_SUCCESS;
829 }
830 if (!custom_code_table_.get()) {
831 LOG(DFATAL) << "Internal error: custom_code_table_decoder_ is set,"
832 " but custom_code_table_ is NULL" << LOG_ENDL;
833 return RESULT_ERROR;
834 }
835 OutputString<string> output_string(&custom_code_table_string_);
836 if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
837 data->UnparsedSize(),
838 &output_string)) {
839 return RESULT_ERROR;
840 }
841 if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
842 // Skip over the consumed data.
843 data->Finish();
844 return RESULT_END_OF_DATA;
845 }
846 if (!custom_code_table_decoder_->FinishDecoding()) {
847 return RESULT_ERROR;
848 }
849 if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
openvcdiff28db8072008-10-10 23:29:11 +0000850 LOG(DFATAL) << "Decoded custom code table size ("
openvcdiff311c7142008-08-26 19:29:25 +0000851 << custom_code_table_string_.length()
openvcdiff28db8072008-10-10 23:29:11 +0000852 << ") does not match size of a code table ("
853 << sizeof(*custom_code_table_) << ")" << LOG_ENDL;
openvcdiff311c7142008-08-26 19:29:25 +0000854 return RESULT_ERROR;
855 }
856 memcpy(custom_code_table_.get(),
857 custom_code_table_string_.data(),
858 sizeof(*custom_code_table_));
859 custom_code_table_string_.clear();
860 // Skip over the consumed data.
861 data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
862 custom_code_table_decoder_.reset();
863 delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
864 return RESULT_SUCCESS;
865}
866
867namespace {
868
869class TrackNewOutputText {
870 public:
openvcdiff28db8072008-10-10 23:29:11 +0000871#ifndef VCDIFF_HAS_GLOBAL_STRING
872 typedef std::string string;
873#endif // !VCDIFF_HAS_GLOBAL_STRING
874
openvcdiff311c7142008-08-26 19:29:25 +0000875 explicit TrackNewOutputText(const string& decoded_target)
876 : decoded_target_(decoded_target),
877 initial_decoded_target_size_(decoded_target.size()) { }
878
879 void AppendNewOutputText(size_t target_bytes_remaining,
880 OutputStringInterface* output_string) {
881 const size_t bytes_decoded_this_chunk =
882 decoded_target_.size() - initial_decoded_target_size_;
883 if (bytes_decoded_this_chunk > 0) {
884 if (target_bytes_remaining > 0) {
885 // The decoder is midway through decoding a target window. Resize
886 // output_string to match the expected length. The interface guarantees
887 // not to resize the output_string more than once per target window
888 // decoded.
889 output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
890 + target_bytes_remaining);
891 }
892 output_string->append(
893 decoded_target_.data() + initial_decoded_target_size_,
894 bytes_decoded_this_chunk);
895 }
896 }
897
898 private:
899 const string& decoded_target_;
900 size_t initial_decoded_target_size_;
901};
902
903} // anonymous namespace
904
905bool VCDiffStreamingDecoderImpl::DecodeChunk(
906 const char* data,
907 size_t len,
908 OutputStringInterface* output_string) {
909 if (!start_decoding_was_called_) {
910 LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL;
911 Reset();
912 return false;
913 }
914 ParseableChunk parseable_chunk(data, len);
915 if (!unparsed_bytes_.empty()) {
916 unparsed_bytes_.append(data, len);
917 parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
918 unparsed_bytes_.size());
919 }
920 TrackNewOutputText output_tracker(decoded_target_);
921 VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
922 if (RESULT_SUCCESS == result) {
923 result = ReadCustomCodeTable(&parseable_chunk);
924 }
925 if (RESULT_SUCCESS == result) {
926 result = delta_window_.DecodeWindows(&parseable_chunk);
927 }
928 if (RESULT_ERROR == result) {
929 Reset(); // Don't allow further DecodeChunk calls
930 return false;
931 }
932 unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
933 parseable_chunk.UnparsedSize());
934 output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(),
935 output_string);
936 return true;
937}
938
939// Finishes decoding after all data has been received. Returns true
940// if decoding of the entire stream was successful.
941bool VCDiffStreamingDecoderImpl::FinishDecoding() {
942 bool success = true;
943 if (!start_decoding_was_called_) {
944 LOG(WARNING) << "FinishDecoding() called before StartDecoding(),"
945 " or called after DecodeChunk() returned false"
946 << LOG_ENDL;
947 success = false;
948 } else if (!IsDecodingComplete()) {
949 LOG(ERROR) << "FinishDecoding() called before parsing entire"
950 " delta file window" << LOG_ENDL;
951 success = false;
952 }
953 // Reset the object state for the next decode operation
954 Reset();
955 return success;
956}
957
openvcdiff28db8072008-10-10 23:29:11 +0000958bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
959 size_t window_size) const {
960 if (window_size > maximum_target_window_size_) {
961 LOG(ERROR) << "Length of target window (" << window_size
962 << ") exceeds limit of " << maximum_target_window_size_
963 << " bytes" << LOG_ENDL;
964 return true;
965 }
966 if (HasPlannedTargetFileSize()) {
967 // The logical expression to check would be:
968 //
969 // decoded_target_.size() + window_size > planned_target_file_size_
970 //
971 // but the addition might cause an integer overflow if target_bytes_to_add
972 // is very large. So it is better to check target_bytes_to_add against
973 // the remaining planned target bytes.
974 size_t remaining_planned_target_file_size =
975 planned_target_file_size_ - decoded_target_.size();
976 if (window_size > remaining_planned_target_file_size) {
977 LOG(ERROR) << "Length of target window (" << window_size
978 << " bytes) plus previous windows (" << decoded_target_.size()
979 << " bytes) would exceed planned size of "
980 << planned_target_file_size_ << " bytes" << LOG_ENDL;
981 return true;
982 }
983 }
984 size_t remaining_maximum_target_bytes =
985 maximum_target_file_size_ - decoded_target_.size();
986 if (window_size > remaining_maximum_target_bytes) {
987 LOG(ERROR) << "Length of target window (" << window_size
988 << " bytes) plus previous windows (" << decoded_target_.size()
989 << " bytes) would exceed maximum target file size of "
990 << maximum_target_file_size_ << " bytes" << LOG_ENDL;
991 return true;
992 }
993 return false;
994}
995
openvcdiff311c7142008-08-26 19:29:25 +0000996// *** Methods for VCDiffDeltaFileWindow
997
openvcdiff311c7142008-08-26 19:29:25 +0000998void VCDiffDeltaFileWindow::Reset() {
999 found_header_ = false;
1000
1001 // Mark the start of the current target window.
1002 target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
1003 target_window_length_ = 0;
1004
1005 source_segment_ptr_ = NULL;
1006 source_segment_length_ = 0;
1007
1008 instructions_and_sizes_.Invalidate();
1009 data_for_add_and_run_.Invalidate();
1010 addresses_for_copy_.Invalidate();
1011
1012 interleaved_bytes_expected_ = 0;
1013
1014 has_checksum_ = false;
1015 expected_checksum_ = 0;
1016 if (annotated_output_.get()) {
1017 annotated_output_->clear();
1018 }
1019}
1020
1021VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
1022 VCDiffHeaderParser* header_parser) {
1023 size_t add_and_run_data_length = 0;
1024 size_t instructions_and_sizes_length = 0;
1025 size_t addresses_length = 0;
1026 if (!header_parser->ParseSectionLengths(has_checksum_,
1027 &add_and_run_data_length,
1028 &instructions_and_sizes_length,
1029 &addresses_length,
1030 &expected_checksum_)) {
1031 return header_parser->GetResult();
1032 }
1033 if (parent_->AllowInterleaved() &&
1034 (add_and_run_data_length == 0) &&
1035 (addresses_length == 0)) {
1036 // The interleaved format is being used.
1037 interleaved_bytes_expected_ =
1038 static_cast<int>(instructions_and_sizes_length);
1039 UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
1040 header_parser->End());
1041 } else {
1042 // If interleaved format is not used, then the whole window contents
1043 // must be available before decoding can begin. If only part of
1044 // the current window is available, then report end of data
1045 // and re-parse the whole header when DecodeChunk() is called again.
1046 if (header_parser->UnparsedSize() < (add_and_run_data_length +
1047 instructions_and_sizes_length +
1048 addresses_length)) {
1049 return RESULT_END_OF_DATA;
1050 }
1051 data_for_add_and_run_.Init(header_parser->UnparsedData(),
1052 add_and_run_data_length);
1053 instructions_and_sizes_.Init(data_for_add_and_run_.End(),
1054 instructions_and_sizes_length);
1055 addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
1056 if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
1057 LOG(ERROR) << "The end of the instructions section "
1058 "does not match the end of the delta window" << LOG_ENDL;
1059 return RESULT_ERROR;
1060 }
1061 }
1062 reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
1063 instructions_and_sizes_.End());
1064 return RESULT_SUCCESS;
1065}
1066
1067// Here are the elements of the delta window header to be parsed,
1068// from section 4 of the RFC:
1069//
1070// Window1
1071// Win_Indicator - byte
1072// [Source segment size] - integer
1073// [Source segment position] - integer
1074// The delta encoding of the target window
1075// Length of the delta encoding - integer
1076// The delta encoding
1077// Size of the target window - integer
1078// Delta_Indicator - byte
1079// Length of data for ADDs and RUNs - integer
1080// Length of instructions and sizes - integer
1081// Length of addresses for COPYs - integer
1082// Data section for ADDs and RUNs - array of bytes
1083// Instructions and sizes section - array of bytes
1084// Addresses section for COPYs - array of bytes
1085//
1086VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
1087 ParseableChunk* parseable_chunk) {
1088 string* decoded_target = parent_->decoded_target();
1089 VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
1090 parseable_chunk->End());
1091 size_t source_segment_position = 0;
1092 unsigned char win_indicator = 0;
1093 if (!header_parser.ParseWinIndicatorAndSourceSegment(
1094 parent_->dictionary_size(),
1095 decoded_target->size(),
1096 &win_indicator,
1097 &source_segment_length_,
1098 &source_segment_position)) {
1099 return header_parser.GetResult();
1100 }
1101 has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
1102 if (!header_parser.ParseWindowLengths(&target_window_length_)) {
1103 return header_parser.GetResult();
1104 }
openvcdiff28db8072008-10-10 23:29:11 +00001105 if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
1106 // An error has been logged by TargetWindowWouldExceedSizeLimits().
openvcdiff311c7142008-08-26 19:29:25 +00001107 return RESULT_ERROR;
1108 }
1109 header_parser.ParseDeltaIndicator();
1110 VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
1111 if (RESULT_SUCCESS != setup_return_code) {
1112 return setup_return_code;
1113 }
1114 // Reserve enough space in the output string for the current target window.
1115 decoded_target->reserve(target_window_start_pos_ + target_window_length_);
1116 // Get a pointer to the start of the source segment.
1117 if (win_indicator & VCD_SOURCE) {
1118 source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
1119 } else if (win_indicator & VCD_TARGET) {
1120 // This assignment must happen after the reserve().
1121 // decoded_target should not be resized again while processing this window,
1122 // so source_segment_ptr_ should remain valid.
1123 source_segment_ptr_ = decoded_target->data() + source_segment_position;
1124 }
1125 // The whole window header was found and parsed successfully.
1126 found_header_ = true;
1127 parseable_chunk->Advance(header_parser.ParsedSize());
1128 return RESULT_SUCCESS;
1129}
1130
1131void VCDiffDeltaFileWindow::UpdateInstructionPointer(
1132 ParseableChunk* parseable_chunk) {
1133 if (IsInterleaved()) {
1134 size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
1135 // Reduce expected instruction segment length by bytes parsed
1136 interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
1137 parseable_chunk->Advance(bytes_parsed);
1138 }
1139}
1140
1141size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
1142 return parent_->decoded_target()->size() - target_window_start_pos_;
1143}
1144
1145size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
1146 if (target_window_length_ == 0) {
1147 // There is no window being decoded at present
1148 return 0;
1149 } else {
1150 return target_window_length_ - TargetBytesDecoded();
1151 }
1152}
1153
1154void VCDiffDeltaFileWindow::CopyBytes(const char* data,
1155 size_t size,
1156 VCDiffAnnotationType annotation_type) {
1157 parent_->decoded_target()->append(data, size);
1158 if (annotated_output_.get()) {
1159 annotated_output_->append(kAnnotationStartTags[annotation_type]);
1160 annotated_output_->append(data, size);
1161 annotated_output_->append(kAnnotationEndTags[annotation_type]);
1162 }
1163}
1164
1165void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
1166 parent_->decoded_target()->append(size, byte);
1167 if (annotated_output_.get()) {
1168 annotated_output_->append(kAnnotationStartTags[VCD_ANNOTATION_LITERAL]);
1169 annotated_output_->append(size, byte);
1170 annotated_output_->append(kAnnotationEndTags[VCD_ANNOTATION_LITERAL]);
1171 }
1172}
1173
1174VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
1175 if (size > data_for_add_and_run_.UnparsedSize()) {
1176 return RESULT_END_OF_DATA;
1177 }
1178 // Write the next "size" data bytes
1179 CopyBytes(data_for_add_and_run_.UnparsedData(), size, VCD_ANNOTATION_LITERAL);
1180 data_for_add_and_run_.Advance(size);
1181 return RESULT_SUCCESS;
1182}
1183
1184VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
1185 if (data_for_add_and_run_.Empty()) {
1186 return RESULT_END_OF_DATA;
1187 }
1188 // Write "size" copies of the next data byte
1189 RunByte(*data_for_add_and_run_.UnparsedData(), size);
1190 data_for_add_and_run_.Advance(1);
1191 return RESULT_SUCCESS;
1192}
1193
1194VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
1195 unsigned char mode) {
1196 // Keep track of the number of target bytes decoded as a local variable
1197 // to avoid recalculating it each time it is needed.
1198 size_t target_bytes_decoded = TargetBytesDecoded();
1199 const VCDAddress here_address =
1200 static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
1201 const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
1202 here_address,
1203 mode,
1204 addresses_for_copy_.UnparsedDataAddr(),
1205 addresses_for_copy_.End());
1206 switch (decoded_address) {
1207 case RESULT_ERROR:
1208 LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL;
1209 return RESULT_ERROR;
1210 case RESULT_END_OF_DATA:
1211 return RESULT_END_OF_DATA;
1212 default:
1213 if ((decoded_address < 0) || (decoded_address > here_address)) {
1214 LOG(DFATAL) << "Internal error: unexpected address " << decoded_address
1215 << " returned from DecodeAddress, with here_address = "
1216 << here_address << LOG_ENDL;
1217 return RESULT_ERROR;
1218 }
1219 break;
1220 }
1221 size_t address = static_cast<size_t>(decoded_address);
1222 if ((address + size) <= source_segment_length_) {
1223 // Copy all data from source segment
1224 CopyBytes(&source_segment_ptr_[address], size, VCD_ANNOTATION_DMATCH);
1225 return RESULT_SUCCESS;
1226 }
1227 // Copy some data from target window...
1228 if (address < source_segment_length_) {
1229 // ... plus some data from source segment
1230 const size_t partial_copy_size = source_segment_length_ - address;
1231 CopyBytes(&source_segment_ptr_[address],
1232 partial_copy_size,
1233 VCD_ANNOTATION_DMATCH);
1234 target_bytes_decoded += partial_copy_size;
1235 address += partial_copy_size;
1236 size -= partial_copy_size;
1237 }
1238 address -= source_segment_length_;
1239 // address is now based at start of target window
1240 const char* const target_segment_ptr = parent_->decoded_target()->data() +
1241 target_window_start_pos_;
1242 while (size > (target_bytes_decoded - address)) {
1243 // Recursive copy that extends into the yet-to-be-copied target data
1244 const size_t partial_copy_size = target_bytes_decoded - address;
1245 CopyBytes(&target_segment_ptr[address],
1246 partial_copy_size,
1247 VCD_ANNOTATION_BMATCH);
1248 target_bytes_decoded += partial_copy_size;
1249 address += partial_copy_size;
1250 size -= partial_copy_size;
1251 }
1252 CopyBytes(&target_segment_ptr[address], size, VCD_ANNOTATION_BMATCH);
1253 return RESULT_SUCCESS;
1254}
1255
1256int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
1257 if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
1258 != parseable_chunk->UnparsedData())) {
1259 LOG(DFATAL) << "Internal error: interleaved format is used, but the"
1260 " input pointer does not point to the instructions section"
1261 << LOG_ENDL;
1262 return RESULT_ERROR;
1263 }
1264 while (TargetBytesDecoded() < target_window_length_) {
1265 int32_t decoded_size = VCD_INSTRUCTION_ERROR;
1266 unsigned char mode = 0;
1267 VCDiffInstructionType instruction =
1268 reader_.GetNextInstruction(&decoded_size, &mode);
1269 switch (instruction) {
1270 case VCD_INSTRUCTION_END_OF_DATA:
1271 UpdateInstructionPointer(parseable_chunk);
1272 return RESULT_END_OF_DATA;
1273 case VCD_INSTRUCTION_ERROR:
1274 return RESULT_ERROR;
1275 default:
1276 break;
1277 }
1278 const size_t size = static_cast<size_t>(decoded_size);
1279 // The value of "size" itself could be enormous (say, INT32_MAX)
1280 // so check it individually against the limit to protect against
1281 // overflow when adding it to something else.
1282 if ((size > target_window_length_) ||
1283 ((size + TargetBytesDecoded()) > target_window_length_)) {
1284 LOG(ERROR) << VCDiffInstructionName(instruction)
1285 << " with size " << size
1286 << " plus existing " << TargetBytesDecoded()
1287 << " bytes of target data exceeds length of target"
1288 " window (" << target_window_length_ << " bytes)"
1289 << LOG_ENDL;
1290 return RESULT_ERROR;
1291 }
1292 VCDiffResult result = RESULT_SUCCESS;
1293 switch (instruction) {
1294 case VCD_ADD:
1295 result = DecodeAdd(size);
1296 break;
1297 case VCD_RUN:
1298 result = DecodeRun(size);
1299 break;
1300 case VCD_COPY:
1301 result = DecodeCopy(size, mode);
1302 break;
1303 default:
1304 LOG(DFATAL) << "Unexpected instruction type " << instruction
1305 << "in opcode stream" << LOG_ENDL;
1306 return RESULT_ERROR;
1307 }
1308 switch (result) {
1309 case RESULT_END_OF_DATA:
1310 reader_.UnGetInstruction();
1311 UpdateInstructionPointer(parseable_chunk);
1312 return RESULT_END_OF_DATA;
1313 case RESULT_ERROR:
1314 return RESULT_ERROR;
1315 case RESULT_SUCCESS:
1316 break;
1317 }
1318 }
1319 if (TargetBytesDecoded() != target_window_length_) {
1320 LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded()
1321 << " bytes) does not match expected size ("
1322 << target_window_length_ << " bytes)" << LOG_ENDL;
1323 return RESULT_ERROR;
1324 }
1325 const char* const target_window_start =
1326 parent_->decoded_target()->data() + target_window_start_pos_;
1327 if (has_checksum_ &&
1328 (ComputeAdler32(target_window_start, target_window_length_)
1329 != expected_checksum_)) {
1330 LOG(ERROR) << "Target data does not match checksum; this could mean "
1331 "that the wrong dictionary was used" << LOG_ENDL;
1332 return RESULT_ERROR;
1333 }
1334 if (!instructions_and_sizes_.Empty()) {
1335 LOG(ERROR) << "Excess instructions and sizes left over "
1336 "after decoding target window" << LOG_ENDL;
1337 return RESULT_ERROR;
1338 }
1339 if (!IsInterleaved()) {
1340 // Standard format is being used, with three separate sections for the
1341 // instructions, data, and addresses.
1342 if (!data_for_add_and_run_.Empty()) {
1343 LOG(ERROR) << "Excess ADD/RUN data left over "
1344 "after decoding target window" << LOG_ENDL;
1345 return RESULT_ERROR;
1346 }
1347 if (!addresses_for_copy_.Empty()) {
1348 LOG(ERROR) << "Excess COPY addresses left over "
1349 "after decoding target window" << LOG_ENDL;
1350 return RESULT_ERROR;
1351 }
1352 // Reached the end of the window. Update the ParseableChunk to point to the
1353 // end of the addresses section, which is the last section in the window.
1354 parseable_chunk->SetPosition(addresses_for_copy_.End());
1355 } else {
1356 // Interleaved format is being used. The window may have been only
1357 // partially decoded.
1358 UpdateInstructionPointer(parseable_chunk);
1359 }
1360 return RESULT_SUCCESS;
1361}
1362
1363VCDiffResult VCDiffDeltaFileWindow::DecodeWindows(
1364 ParseableChunk* parseable_chunk) {
1365 if (!parent_) {
1366 LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() "
1367 "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL;
1368 return RESULT_ERROR;
1369 }
1370 while (!parseable_chunk->Empty()) {
1371 if (!found_header_) {
1372 switch (ReadHeader(parseable_chunk)) {
1373 case RESULT_END_OF_DATA:
1374 return RESULT_END_OF_DATA;
1375 case RESULT_ERROR:
1376 return RESULT_ERROR;
1377 default:
1378 // Reset address cache between windows (RFC section 5.1)
1379 if (!parent_->addr_cache()->Init()) {
1380 LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL;
1381 return RESULT_ERROR;
1382 }
1383 }
1384 } else {
1385 // We are resuming a window that was partially decoded before a
1386 // RESULT_END_OF_DATA was returned. This can only happen on the first
1387 // loop iteration, and only if the interleaved format is enabled and used.
1388 if (!IsInterleaved()) {
1389 LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window"
1390 " when interleaved format is not being used" << LOG_ENDL;
1391 return RESULT_ERROR;
1392 }
1393 UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
1394 parseable_chunk->End());
1395 reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
1396 instructions_and_sizes_.End());
1397 }
1398 switch (DecodeBody(parseable_chunk)) {
1399 case RESULT_END_OF_DATA:
1400 if (MoreDataExpected()) {
1401 return RESULT_END_OF_DATA;
1402 } else {
1403 LOG(ERROR) << "End of data reached while decoding VCDIFF delta file"
1404 << LOG_ENDL;
1405 // fall through to RESULT_ERROR case
1406 }
1407 case RESULT_ERROR:
1408 return RESULT_ERROR;
1409 default:
1410 break; // DecodeBody succeeded
1411 }
1412 AppendAnnotatedOutput(parent_->annotated_output());
1413 // Get ready to read a new delta window
1414 Reset();
openvcdiff28db8072008-10-10 23:29:11 +00001415 if (parent_->ReachedPlannedTargetFileSize()) {
1416 // Found exactly the length we expected. Stop decoding.
openvcdiff311c7142008-08-26 19:29:25 +00001417 return RESULT_SUCCESS;
1418 }
1419 }
1420 return RESULT_SUCCESS;
1421}
1422
1423// *** Methods for VCDiffStreamingDecoder
1424
1425VCDiffStreamingDecoder::VCDiffStreamingDecoder()
1426: impl_(new VCDiffStreamingDecoderImpl) { }
1427
1428VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
1429
1430void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1431 impl_->StartDecoding(source, len);
1432}
1433
1434bool VCDiffStreamingDecoder::DecodeChunkToInterface(
1435 const char* data,
1436 size_t len,
1437 OutputStringInterface* output_string) {
1438 return impl_->DecodeChunk(data, len, output_string);
1439}
1440
1441bool VCDiffStreamingDecoder::FinishDecoding() {
1442 return impl_->FinishDecoding();
1443}
1444
openvcdiff28db8072008-10-10 23:29:11 +00001445bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
1446 size_t new_maximum_target_file_size) {
1447 return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
1448}
1449
1450bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
1451 size_t new_maximum_target_window_size) {
1452 return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
1453}
1454
openvcdiff311c7142008-08-26 19:29:25 +00001455void VCDiffStreamingDecoder::EnableAnnotatedOutput() {
1456 impl_->EnableAnnotatedOutput();
1457}
1458
1459void VCDiffStreamingDecoder::DisableAnnotatedOutput() {
1460 impl_->DisableAnnotatedOutput();
1461}
1462
1463void VCDiffStreamingDecoder::GetAnnotatedOutputToInterface(
1464 OutputStringInterface* annotated_output) {
1465 impl_->GetAnnotatedOutput(annotated_output);
1466}
1467
1468bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
1469 size_t dictionary_size,
1470 const string& encoding,
1471 OutputStringInterface* target) {
1472 target->clear();
1473 decoder_.StartDecoding(dictionary_ptr, dictionary_size);
1474 if (!decoder_.DecodeChunkToInterface(encoding.data(),
1475 encoding.size(),
1476 target)) {
1477 return false;
1478 }
1479 return decoder_.FinishDecoding();
1480}
1481
1482} // namespace open_vcdiff