openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1 | // Copyright 2008 Google Inc. |
| 2 | // Author: Lincoln Smith |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | // Implements a Decoder for the format described in |
| 17 | // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. |
| 18 | // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html |
| 19 | // |
| 20 | // The RFC describes the possibility of using a secondary compressor |
| 21 | // to further reduce the size of each section of the VCDIFF output. |
| 22 | // That feature is not supported in this implementation of the encoder |
| 23 | // and decoder. |
| 24 | // No secondary compressor types have been publicly registered with |
| 25 | // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids |
| 26 | // in the more than five years since the registry was created, so there |
| 27 | // is no standard set of compressor IDs which would be generated by other |
| 28 | // encoders or accepted by other decoders. |
| 29 | |
| 30 | #include <config.h> |
| 31 | #include "google/vcdecoder.h" |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 32 | #include <stddef.h> // size_t, ptrdiff_t |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 33 | #include <stdint.h> // int32_t |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 34 | #include <string.h> // memcpy, memset |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 35 | #include <memory> // auto_ptr |
| 36 | #include <string> |
| 37 | #include "addrcache.h" |
| 38 | #include "checksum.h" |
| 39 | #include "codetable.h" |
| 40 | #include "decodetable.h" |
| 41 | #include "headerparser.h" |
| 42 | #include "logging.h" |
| 43 | #include "google/output_string.h" |
| 44 | #include "varint_bigendian.h" |
| 45 | #include "vcdiff_defs.h" |
| 46 | |
| 47 | namespace open_vcdiff { |
| 48 | |
| 49 | namespace { |
| 50 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 51 | enum VCDiffAnnotationType { |
| 52 | VCD_ANNOTATION_LITERAL, |
| 53 | VCD_ANNOTATION_DMATCH, |
| 54 | VCD_ANNOTATION_BMATCH |
| 55 | }; |
| 56 | |
| 57 | static const char* kAnnotationStartTags[] = { |
| 58 | "<literal>", |
| 59 | "<dmatch>", |
| 60 | "<bmatch>" |
| 61 | }; |
| 62 | |
| 63 | static const char* kAnnotationEndTags[] = { |
| 64 | "</literal>", |
| 65 | "</dmatch>", |
| 66 | "</bmatch>" |
| 67 | }; |
| 68 | |
| 69 | } // anonymous namespace |
| 70 | |
| 71 | // This class is used to parse delta file windows as described |
| 72 | // in RFC sections 4.2 and 4.3. Its methods are not thread-safe. |
| 73 | // |
| 74 | // Here is the window format copied from the RFC: |
| 75 | // |
| 76 | // Window1 |
| 77 | // Win_Indicator - byte |
| 78 | // [Source segment size] - integer |
| 79 | // [Source segment position] - integer |
| 80 | // The delta encoding of the target window |
| 81 | // Length of the delta encoding - integer |
| 82 | // The delta encoding |
| 83 | // Size of the target window - integer |
| 84 | // Delta_Indicator - byte |
| 85 | // Length of data for ADDs and RUNs - integer |
| 86 | // Length of instructions and sizes - integer |
| 87 | // Length of addresses for COPYs - integer |
| 88 | // Data section for ADDs and RUNs - array of bytes |
| 89 | // Instructions and sizes section - array of bytes |
| 90 | // Addresses section for COPYs - array of bytes |
| 91 | // Window2 |
| 92 | // ... |
| 93 | // |
| 94 | // Sample usage: |
| 95 | // |
| 96 | // VCDiffDeltaFileWindow delta_window_; |
| 97 | // delta_window_.Init(parent); |
| 98 | // ParseableChunk parseable_chunk(input_buffer, |
| 99 | // input_size, |
| 100 | // leftover_unencoded_bytes); |
| 101 | // switch (delta_window_.DecodeWindows(&parseable_chunk)) { |
| 102 | // case RESULT_END_OF_DATA: |
| 103 | // <Read more input and retry DecodeWindows later.> |
| 104 | // case RESULT_ERROR: |
| 105 | // <Handle error case. An error log message has already been generated.> |
| 106 | // } |
| 107 | // |
| 108 | // DecodeWindows consumes as many windows from the input as it can. It only |
| 109 | // needs to be placed within a loop if the loop is used to obtain more input |
| 110 | // (delta file) data. |
| 111 | // |
| 112 | class VCDiffDeltaFileWindow { |
| 113 | public: |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 114 | #ifndef VCDIFF_HAS_GLOBAL_STRING |
| 115 | typedef std::string string; |
| 116 | #endif // !VCDIFF_HAS_GLOBAL_STRING |
| 117 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 118 | VCDiffDeltaFileWindow(); |
| 119 | ~VCDiffDeltaFileWindow(); |
| 120 | |
| 121 | // Init() should be called immediately after constructing the |
| 122 | // VCDiffDeltaFileWindow(). It must be called before DecodeWindows() can be |
| 123 | // invoked, or an error will occur. |
| 124 | void Init(VCDiffStreamingDecoderImpl* parent); |
| 125 | |
| 126 | // Resets the pointers to the data sections in the current window. |
| 127 | void Reset(); |
| 128 | |
| 129 | bool UseCodeTable(const VCDiffCodeTableData& code_table_data, |
| 130 | unsigned char max_mode) { |
| 131 | return reader_.UseCodeTable(code_table_data, max_mode); |
| 132 | } |
| 133 | |
| 134 | // Decodes as many delta windows as possible using the input data from |
| 135 | // *parseable_chunk. Appends the decoded target windows to |
| 136 | // parent_->decoded_target(). If annotated output is enabled, appends |
| 137 | // annotated output to parent_->annotated_output(). Returns RESULT_SUCCESS on |
| 138 | // success, or RESULT_END_OF_DATA if the end of input was reached before the |
| 139 | // entire window could be decoded and more input is expected (only possible if |
| 140 | // IsInterleaved() is true), or RESULT_ERROR if an error occurred during |
| 141 | // decoding. In the RESULT_ERROR case, the value of parseable_chunk->pointer_ |
| 142 | // is undefined; otherwise, parseable_chunk->Advance() is called to point to |
| 143 | // the input data position just after the data that has been decoded. |
| 144 | // |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 145 | // If planned_target_file_size is not set to kUnlimitedBytes, then the decoder |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 146 | // expects *exactly* this number of target bytes to be decoded from one or |
| 147 | // more delta file windows. If this number is met exactly after finishing a |
| 148 | // delta window, this function will return RESULT_SUCCESS without processing |
| 149 | // any more bytes from data_pointer. If this number is exceeded while |
| 150 | // decoding a window, but was not met before starting that window, |
| 151 | // then RESULT_ERROR will be returned. |
| 152 | // |
| 153 | VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk); |
| 154 | |
| 155 | bool FoundWindowHeader() const { |
| 156 | return found_header_; |
| 157 | } |
| 158 | |
| 159 | bool MoreDataExpected() const { |
| 160 | // When parsing an interleaved-format delta file, |
| 161 | // every time DecodeBody() exits, interleaved_bytes_expected_ |
| 162 | // will be decremented by the number of bytes parsed. If it |
| 163 | // reaches zero, then there is no more data expected because |
| 164 | // the size of the interleaved section (given in the window |
| 165 | // header) has been reached. |
| 166 | return IsInterleaved() && (interleaved_bytes_expected_ > 0); |
| 167 | } |
| 168 | |
| 169 | // Returns the number of bytes remaining to be decoded in the target window. |
| 170 | // If not in the process of decoding a window, returns 0. |
| 171 | size_t TargetBytesRemaining(); |
| 172 | |
| 173 | void EnableAnnotatedOutput() { |
| 174 | if (!annotated_output_.get()) { |
| 175 | annotated_output_.reset(new string); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | void DisableAnnotatedOutput() { |
| 180 | annotated_output_.reset(NULL); |
| 181 | } |
| 182 | |
| 183 | private: |
| 184 | // Reads the header of the window section as described in RFC sections 4.2 and |
| 185 | // 4.3, up to and including the value "Length of addresses for COPYs". If the |
| 186 | // entire header is found, this function sets up the DeltaWindowSections |
| 187 | // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so |
| 188 | // that the decoder can begin decoding the opcodes in these sections. Returns |
| 189 | // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of |
| 190 | // available data was reached before the entire header could be read. (The |
| 191 | // latter may be an error condition if there is no more data available.) |
| 192 | // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the |
| 193 | // parsed header. |
| 194 | // |
| 195 | VCDiffResult ReadHeader(ParseableChunk* parseable_chunk); |
| 196 | |
| 197 | // After the window header has been parsed as far as the Delta_Indicator, |
| 198 | // this function is called to parse the following delta window header fields: |
| 199 | // |
| 200 | // Length of data for ADDs and RUNs - integer (VarintBE format) |
| 201 | // Length of instructions and sizes - integer (VarintBE format) |
| 202 | // Length of addresses for COPYs - integer (VarintBE format) |
| 203 | // |
| 204 | // If has_checksum_ is true, it also looks for the following element: |
| 205 | // |
| 206 | // Adler32 checksum - unsigned 32-bit integer (VarintBE format) |
| 207 | // |
| 208 | // It sets up the DeltaWindowSections instructions_and_sizes_, |
| 209 | // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format |
| 210 | // is being used, all three sections will include the entire window body; if |
| 211 | // the standard format is used, three non-overlapping window sections will be |
| 212 | // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA |
| 213 | // if standard format is being used and there is not enough input data to read |
| 214 | // the entire window body. Otherwise, returns RESULT_SUCCESS. |
| 215 | VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser); |
| 216 | |
| 217 | // Decodes the body of the window section as described in RFC sections 4.3, |
| 218 | // including the sections "Data section for ADDs and RUNs", "Instructions |
| 219 | // and sizes section", and "Addresses section for COPYs". These sections |
| 220 | // must already have been set up by ReadWindowHeader(). Returns a |
| 221 | // non-negative value on success, or RESULT_END_OF_DATA if the end of input |
| 222 | // was reached before the entire window could be decoded (only possible if |
| 223 | // IsInterleaved() is true), or RESULT_ERROR if an error occurred during |
| 224 | // decoding. Appends as much of the decoded target window as possible to |
| 225 | // parent->decoded_target(). |
| 226 | // |
| 227 | int DecodeBody(ParseableChunk* parseable_chunk); |
| 228 | |
| 229 | // Returns the number of bytes already decoded into the target window. |
| 230 | size_t TargetBytesDecoded(); |
| 231 | |
| 232 | // Decodes a single ADD instruction, updating parent_->decoded_target_. |
| 233 | VCDiffResult DecodeAdd(size_t size); |
| 234 | |
| 235 | // Decodes a single RUN instruction, updating parent_->decoded_target_. |
| 236 | VCDiffResult DecodeRun(size_t size); |
| 237 | |
| 238 | // Decodes a single COPY instruction, updating parent_->decoded_target_. |
| 239 | VCDiffResult DecodeCopy(size_t size, unsigned char mode); |
| 240 | |
| 241 | // When using the interleaved format, this function is called both on parsing |
| 242 | // the header and on resuming after a RESULT_END_OF_DATA was returned from a |
| 243 | // previous call to DecodeBody(). It sets up all three section pointers to |
| 244 | // reference the same interleaved stream of instructions, sizes, addresses, |
| 245 | // and data. These pointers must be reset every time that work resumes on a |
| 246 | // delta window, because the input data string may have been changed or |
| 247 | // resized since DecodeBody() last returned. |
| 248 | void UpdateInterleavedSectionPointers(const char* data_pos, |
| 249 | const char* data_end) { |
| 250 | const ptrdiff_t available_data = data_end - data_pos; |
| 251 | // Don't read past the end of currently-available data |
| 252 | if (available_data > interleaved_bytes_expected_) { |
| 253 | instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_); |
| 254 | } else { |
| 255 | instructions_and_sizes_.Init(data_pos, available_data); |
| 256 | } |
| 257 | data_for_add_and_run_.Init(&instructions_and_sizes_); |
| 258 | addresses_for_copy_.Init(&instructions_and_sizes_); |
| 259 | } |
| 260 | |
| 261 | // If true, the interleaved format described in AllowInterleaved() is used |
| 262 | // for the current delta file. Only valid after ReadWindowHeader() has been |
| 263 | // called and returned a positive number (i.e., the whole header was parsed), |
| 264 | // but before the window has finished decoding. |
| 265 | // |
| 266 | bool IsInterleaved() const { |
| 267 | // If the sections are interleaved, both addresses_for_copy_ and |
| 268 | // data_for_add_and_run_ should point at instructions_and_sizes_. |
| 269 | return !addresses_for_copy_.IsOwned(); |
| 270 | } |
| 271 | |
| 272 | // Executes a single COPY or ADD instruction, appending data to |
| 273 | // parent_->decoded_target(). |
| 274 | void CopyBytes(const char* data, |
| 275 | size_t size, |
| 276 | VCDiffAnnotationType annotation_type); |
| 277 | |
| 278 | // Executes a single RUN instruction, appending data to |
| 279 | // parent_->decoded_target(). |
| 280 | void RunByte(unsigned char byte, size_t size); |
| 281 | |
| 282 | void AppendAnnotatedOutput(string* annotated_output) { |
| 283 | if (annotated_output_.get()) { |
| 284 | annotated_output->append(*annotated_output_.get()); |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | // Advance *parseable_chunk to point to the current position in the |
| 289 | // instructions/sizes section. If interleaved format is used, then |
| 290 | // decrement the number of expected bytes in the instructions/sizes section |
| 291 | // by the number of instruction/size bytes parsed. |
| 292 | void UpdateInstructionPointer(ParseableChunk* parseable_chunk); |
| 293 | |
| 294 | // The parent object which was passed to Init(). |
| 295 | VCDiffStreamingDecoderImpl* parent_; |
| 296 | |
| 297 | // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader() |
| 298 | // has been called and succeeded in parsing the delta window header, but the |
| 299 | // entire window has not yet been decoded. |
| 300 | bool found_header_; |
| 301 | |
| 302 | // Contents and length of the current source window. source_segment_ptr_ |
| 303 | // will be non-NULL if (a) the window section header for the current window |
| 304 | // has been read, but the window has not yet finished decoding; or |
| 305 | // (b) the window did not specify a source segment. |
| 306 | const char* source_segment_ptr_; |
| 307 | size_t source_segment_length_; |
| 308 | |
| 309 | // The delta encoding window sections as defined in RFC section 4.3. |
| 310 | // The pointer for each section will be incremented as data is consumed and |
| 311 | // decoded from that section. If the interleaved format is used, |
| 312 | // data_for_add_and_run_ and addresses_for_copy_ will both point to |
| 313 | // instructions_and_sizes_; otherwise, they will be separate data sections. |
| 314 | // |
| 315 | DeltaWindowSection instructions_and_sizes_; |
| 316 | DeltaWindowSection data_for_add_and_run_; |
| 317 | DeltaWindowSection addresses_for_copy_; |
| 318 | |
| 319 | // The expected bytes left to decode in instructions_and_sizes_. Only used |
| 320 | // for the interleaved format. |
| 321 | int interleaved_bytes_expected_; |
| 322 | |
| 323 | // The expected length of the target window once it has been decoded. |
| 324 | size_t target_window_length_; |
| 325 | |
| 326 | // The index in decoded_target at which the first byte of the current |
| 327 | // target window was/will be written. |
| 328 | size_t target_window_start_pos_; |
| 329 | |
| 330 | // If has_checksum_ is true, then expected_checksum_ contains an Adler32 |
| 331 | // checksum of the target window data. This is an extension included in the |
| 332 | // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard. |
| 333 | bool has_checksum_; |
| 334 | VCDChecksum expected_checksum_; |
| 335 | |
| 336 | VCDiffCodeTableReader reader_; |
| 337 | |
| 338 | // This value is initialized to NULL, which means that annotated output is |
| 339 | // disabled. If EnableAnnotatedOutput() is called, it will be set to point |
| 340 | // to a new string object, and annotated output will be gathered into that |
| 341 | // string. |
| 342 | std::auto_ptr<string> annotated_output_; |
| 343 | |
| 344 | // Making these private avoids implicit copy constructor & assignment operator |
| 345 | VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT |
| 346 | void operator=(const VCDiffDeltaFileWindow&); |
| 347 | }; |
| 348 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 349 | // *** Inline methods for VCDiffDeltaFileWindow |
| 350 | |
| 351 | inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) { |
| 352 | Reset(); |
| 353 | } |
| 354 | |
| 355 | inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { } |
| 356 | |
| 357 | inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) { |
| 358 | parent_ = parent; |
| 359 | } |
| 360 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 361 | class VCDiffStreamingDecoderImpl { |
| 362 | public: |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 363 | #ifndef VCDIFF_HAS_GLOBAL_STRING |
| 364 | typedef std::string string; |
| 365 | #endif // !VCDIFF_HAS_GLOBAL_STRING |
| 366 | |
| 367 | // The default maximum target file size (and target window size) if |
| 368 | // SetMaximumTargetFileSize() is not called. |
| 369 | static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB |
| 370 | |
| 371 | // The largest value that can be passed to SetMaximumTargetFileSize() or |
| 372 | // SetMaximumTargetWindowSize(). Using a larger value will result in an |
| 373 | // error. |
| 374 | static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX |
| 375 | |
| 376 | // A constant that is the default value for planned_target_file_size_, |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 377 | // indicating that the decoder does not have an expected length |
| 378 | // for the target data. |
| 379 | static const size_t kUnlimitedBytes = static_cast<size_t>(-3); |
| 380 | |
| 381 | VCDiffStreamingDecoderImpl(); |
| 382 | ~VCDiffStreamingDecoderImpl(); |
| 383 | |
| 384 | // Resets all member variables to their initial states. |
| 385 | void Reset(); |
| 386 | |
| 387 | // These functions are identical to their counterparts |
| 388 | // in VCDiffStreamingDecoder. |
| 389 | // |
| 390 | void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); |
| 391 | |
| 392 | bool DecodeChunk(const char* data, |
| 393 | size_t len, |
| 394 | OutputStringInterface* output_string); |
| 395 | |
| 396 | bool FinishDecoding(); |
| 397 | |
| 398 | // If true, the version of VCDIFF used in the current delta file allows |
| 399 | // for the interleaved format, in which instructions, addresses and data |
| 400 | // are all sent interleaved in the instructions section of each window |
| 401 | // rather than being sent in separate sections. This is not part of |
| 402 | // the VCDIFF draft standard, so we've defined a special version code |
| 403 | // 'S' which implies that this feature is available. Even if interleaving |
| 404 | // is supported, it is not mandatory; interleaved format will be implied |
| 405 | // if the address and data sections are both zero-length. |
| 406 | // |
| 407 | bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; } |
| 408 | |
| 409 | // If true, the version of VCDIFF used in the current delta file allows |
| 410 | // each delta window to contain an Adler32 checksum of the target window data. |
| 411 | // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then |
| 412 | // this checksum will appear as a variable-length integer, just after the |
| 413 | // "length of addresses for COPYs" value and before the window data sections. |
| 414 | // It is possible for some windows in a delta file to use the checksum feature |
| 415 | // and for others not to use it (and leave the flag bit set to 0.) |
| 416 | // Just as with AllowInterleaved(), this extension is not part of the draft |
| 417 | // standard and is only available when the version code 'S' is specified. |
| 418 | // |
| 419 | bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; } |
| 420 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 421 | bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) { |
| 422 | if (new_maximum_target_file_size > kTargetSizeLimit) { |
| 423 | LOG(ERROR) << "Specified maximum target file size " |
| 424 | << new_maximum_target_file_size << " exceeds limit of " |
| 425 | << kTargetSizeLimit << " bytes" << LOG_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 426 | return false; |
| 427 | } |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 428 | maximum_target_file_size_ = new_maximum_target_file_size; |
| 429 | return true; |
| 430 | } |
| 431 | |
| 432 | bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) { |
| 433 | if (new_maximum_target_window_size > kTargetSizeLimit) { |
| 434 | LOG(ERROR) << "Specified maximum target window size " |
| 435 | << new_maximum_target_window_size << " exceeds limit of " |
| 436 | << kTargetSizeLimit << " bytes" << LOG_ENDL; |
| 437 | return false; |
| 438 | } |
| 439 | maximum_target_window_size_ = new_maximum_target_window_size; |
| 440 | return true; |
| 441 | } |
| 442 | |
| 443 | // See description of planned_target_file_size_, below. |
| 444 | bool HasPlannedTargetFileSize() const { |
| 445 | return planned_target_file_size_ != kUnlimitedBytes; |
| 446 | } |
| 447 | |
| 448 | void SetPlannedTargetFileSize(size_t planned_target_file_size) { |
| 449 | planned_target_file_size_ = planned_target_file_size; |
| 450 | } |
| 451 | |
| 452 | // Checks to see whether the decoded target data has reached its planned size. |
| 453 | bool ReachedPlannedTargetFileSize() const { |
| 454 | if (!HasPlannedTargetFileSize()) { |
| 455 | return false; |
| 456 | } |
| 457 | // The planned target file size should not have been exceeded. |
| 458 | // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of |
| 459 | // each target window would not make the target file exceed that limit, and |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 460 | // DecodeBody() will return RESULT_ERROR if the actual decoded output ever |
| 461 | // exceeds the advertised target window size. |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 462 | if (decoded_target_.size() > planned_target_file_size_) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 463 | LOG(DFATAL) << "Internal error: Decoded data size " |
| 464 | << decoded_target_.size() |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 465 | << " exceeds planned target file size " |
| 466 | << planned_target_file_size_ << LOG_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 467 | return true; |
| 468 | } |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 469 | return decoded_target_.size() == planned_target_file_size_; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 470 | } |
| 471 | |
| 472 | // Checks to see whether adding a new target window of the specified size |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 473 | // would exceed the planned target file size, the maximum target file size, |
| 474 | // or the maximum target window size. If so, logs an error and returns true; |
| 475 | // otherwise, returns false. |
| 476 | bool TargetWindowWouldExceedSizeLimits(size_t window_size) const; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 477 | |
| 478 | // Returns the amount of input data passed to the last DecodeChunk() |
| 479 | // that was not consumed by the decoder. This is essential if |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 480 | // SetPlannedTargetFileSize() is being used, in order to preserve the |
| 481 | // remaining input data stream once the planned target file has been decoded. |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 482 | size_t GetUnconsumedDataSize() const { |
| 483 | return unparsed_bytes_.size(); |
| 484 | } |
| 485 | |
| 486 | // This function will return true if the decoder has parsed a complete delta |
| 487 | // file header plus zero or more delta file windows, with no data left over. |
| 488 | // It will also return true if no delta data at all was decoded. If these |
| 489 | // conditions are not met, then FinishDecoding() should not be called. |
| 490 | bool IsDecodingComplete() const { |
| 491 | if (!FoundFileHeader()) { |
| 492 | // No complete delta file header has been parsed yet. DecodeChunk() |
| 493 | // may have received some data that it hasn't yet parsed, in which case |
| 494 | // decoding is incomplete. |
| 495 | return unparsed_bytes_.empty(); |
| 496 | } else if (custom_code_table_decoder_.get()) { |
| 497 | // The decoder is in the middle of parsing a custom code table. |
| 498 | return false; |
| 499 | } else if (delta_window_.FoundWindowHeader()) { |
| 500 | // The decoder is in the middle of parsing an interleaved format delta |
| 501 | // window. |
| 502 | return false; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 503 | } else if (ReachedPlannedTargetFileSize()) { |
| 504 | // The decoder found exactly the planned number of bytes. In this case |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 505 | // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover |
| 506 | // data after the end of the delta file. |
| 507 | return true; |
| 508 | } else { |
| 509 | // No complete delta file window has been parsed yet. DecodeChunk() |
| 510 | // may have received some data that it hasn't yet parsed, in which case |
| 511 | // decoding is incomplete. |
| 512 | return unparsed_bytes_.empty(); |
| 513 | } |
| 514 | } |
| 515 | |
| 516 | const char* dictionary_ptr() const { return dictionary_ptr_; } |
| 517 | |
| 518 | size_t dictionary_size() const { return dictionary_size_; } |
| 519 | |
| 520 | VCDiffAddressCache* addr_cache() { return addr_cache_.get(); } |
| 521 | |
| 522 | string* decoded_target() { return &decoded_target_; } |
| 523 | |
| 524 | string* annotated_output() { return &annotated_output_; } |
| 525 | |
| 526 | // The variable that determines whether annotated output is enabled is |
| 527 | // delta_window_.annotated_output_. If that member is NULL, then the feature |
| 528 | // is disabled. |
| 529 | void EnableAnnotatedOutput() { |
| 530 | delta_window_.EnableAnnotatedOutput(); |
| 531 | } |
| 532 | |
| 533 | void DisableAnnotatedOutput() { |
| 534 | delta_window_.DisableAnnotatedOutput(); |
| 535 | } |
| 536 | |
| 537 | void GetAnnotatedOutput(OutputStringInterface* annotated_output) { |
| 538 | // We could use annotated_output->assign(), but that method is not defined |
| 539 | // for some output string types, so use clear() + append() to accomplish the |
| 540 | // same thing. |
| 541 | annotated_output->clear(); |
| 542 | annotated_output->append(annotated_output_.data(), |
| 543 | annotated_output_.size()); |
| 544 | } |
| 545 | |
| 546 | private: |
| 547 | // Reads the VCDiff delta file header section as described in RFC section 4.1, |
| 548 | // except the custom code table data. Returns RESULT_ERROR if an error |
| 549 | // occurred, or RESULT_END_OF_DATA if the end of available data was reached |
| 550 | // before the entire header could be read. (The latter may be an error |
| 551 | // condition if there is no more data available.) Otherwise, advances |
| 552 | // data->position_ past the header and returns RESULT_SUCCESS. |
| 553 | // |
| 554 | VCDiffResult ReadDeltaFileHeader(ParseableChunk* data); |
| 555 | |
| 556 | // Indicates whether or not the header has already been read. |
| 557 | bool FoundFileHeader() const { return addr_cache_.get() != NULL; } |
| 558 | |
| 559 | // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta |
| 560 | // file header, this function parses the custom cache sizes and initializes |
| 561 | // a nested VCDiffStreamingDecoderImpl object that will be used to parse the |
| 562 | // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an |
| 563 | // error occurred, or RESULT_END_OF_DATA if the end of available data was |
| 564 | // reached before the custom cache sizes could be read. Otherwise, returns |
| 565 | // the number of bytes read. |
| 566 | // |
| 567 | int InitCustomCodeTable(const char* data_start, const char* data_end); |
| 568 | |
| 569 | // If a custom code table was specified in the header section that was parsed |
| 570 | // by ReadDeltaFileHeader(), this function makes a recursive call to another |
| 571 | // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the |
| 572 | // custom code table is expected to be supplied as an embedded VCDIFF |
| 573 | // encoding that uses the standard code table. Returns RESULT_ERROR if an |
| 574 | // error occurs, or RESULT_END_OF_DATA if the end of available data was |
| 575 | // reached before the entire custom code table could be read. Otherwise, |
| 576 | // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded |
| 577 | // custom code table. If the function returns RESULT_SUCCESS or |
| 578 | // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes. |
| 579 | // |
| 580 | VCDiffResult ReadCustomCodeTable(ParseableChunk* data); |
| 581 | |
| 582 | // Contents and length of the source (dictionary) data. |
| 583 | const char* dictionary_ptr_; |
| 584 | size_t dictionary_size_; |
| 585 | |
| 586 | // This string will be used to store any unparsed bytes left over when |
| 587 | // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA. |
| 588 | // It will also be used to concatenate those unparsed bytes with the data |
| 589 | // supplied to the next call to DecodeChunk(), so that they appear in |
| 590 | // contiguous memory. |
| 591 | string unparsed_bytes_; |
| 592 | |
| 593 | // The portion of the target file that has been decoded so far. This will be |
| 594 | // used to fill the output string for DecodeChunk(), and will also be used to |
| 595 | // execute COPY instructions that reference target data. Since the source |
| 596 | // window can come from a range of addresses in the previously decoded target |
| 597 | // data, the entire target file needs to be available to the decoder, not just |
| 598 | // the current target window. |
| 599 | string decoded_target_; |
| 600 | |
| 601 | // The VCDIFF version byte (also known as "header4") from the |
| 602 | // delta file header. |
| 603 | unsigned char vcdiff_version_code_; |
| 604 | |
| 605 | VCDiffDeltaFileWindow delta_window_; |
| 606 | |
| 607 | std::auto_ptr<VCDiffAddressCache> addr_cache_; |
| 608 | |
| 609 | // Will be NULL unless a custom code table has been defined. |
| 610 | std::auto_ptr<VCDiffCodeTableData> custom_code_table_; |
| 611 | |
| 612 | // Used to receive the decoded custom code table. |
| 613 | string custom_code_table_string_; |
| 614 | |
| 615 | // If a custom code table is specified, it will be expressed |
| 616 | // as an embedded VCDIFF delta file which uses the default code table |
| 617 | // as the source file (dictionary). Use a child decoder object |
| 618 | // to decode that delta file. |
| 619 | std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_; |
| 620 | |
| 621 | // If set, then the decoder is expecting *exactly* this number of |
| 622 | // target bytes to be decoded from one or more delta file windows. |
| 623 | // If this number is exceeded while decoding a window, but was not met |
| 624 | // before starting on that window, an error will be reported. |
| 625 | // If FinishDecoding() is called before this number is met, an error |
| 626 | // will also be reported. This feature is used for decoding the |
| 627 | // embedded code table data within a VCDIFF delta file; we want to |
| 628 | // stop processing the embedded data once the entire code table has |
| 629 | // been decoded, and treat the rest of the available data as part |
| 630 | // of the enclosing delta file. |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 631 | size_t planned_target_file_size_; |
| 632 | |
| 633 | size_t maximum_target_file_size_; |
| 634 | |
| 635 | size_t maximum_target_window_size_; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 636 | |
| 637 | // This string will always be empty until EnableAnnotatedOutput() is called, |
| 638 | // at which point it will start to accumulate annotated delta windows each |
| 639 | // time DecodeChunk() finishes a window. It will be cleared each time that |
| 640 | // StartDecoding() is called. |
| 641 | string annotated_output_; |
| 642 | |
| 643 | // This value is used to ensure the correct order of calls to the interface |
| 644 | // functions, i.e., a single call to StartDecoding(), followed by zero or |
| 645 | // more calls to DecodeChunk(), followed by a single call to |
| 646 | // FinishDecoding(). |
| 647 | bool start_decoding_was_called_; |
| 648 | |
| 649 | // Making these private avoids implicit copy constructor & assignment operator |
| 650 | VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT |
| 651 | void operator=(const VCDiffStreamingDecoderImpl&); |
| 652 | }; |
| 653 | |
| 654 | // *** Methods for VCDiffStreamingDecoderImpl |
| 655 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 656 | const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize; |
| 657 | const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes; |
| 658 | |
| 659 | VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() |
| 660 | : maximum_target_file_size_(kDefaultMaximumTargetFileSize), |
| 661 | maximum_target_window_size_(kDefaultMaximumTargetFileSize) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 662 | delta_window_.Init(this); |
| 663 | Reset(); |
| 664 | } |
| 665 | |
| 666 | // Reset() will delete the component objects without reallocating them. |
| 667 | VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); } |
| 668 | |
| 669 | void VCDiffStreamingDecoderImpl::Reset() { |
| 670 | start_decoding_was_called_ = false; |
| 671 | dictionary_ptr_ = NULL; |
| 672 | dictionary_size_ = 0; |
| 673 | vcdiff_version_code_ = '\0'; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 674 | planned_target_file_size_ = kUnlimitedBytes; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 675 | addr_cache_.reset(); |
| 676 | custom_code_table_.reset(); |
| 677 | custom_code_table_decoder_.reset(); |
| 678 | delta_window_.Reset(); |
| 679 | } |
| 680 | |
| 681 | void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr, |
| 682 | size_t dictionary_size) { |
| 683 | if (start_decoding_was_called_) { |
| 684 | LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()" |
| 685 | << LOG_ENDL; |
| 686 | return; |
| 687 | } |
| 688 | unparsed_bytes_.clear(); |
| 689 | decoded_target_.clear(); // delta_window_.Reset() depends on this |
| 690 | annotated_output_.clear(); |
| 691 | Reset(); |
| 692 | dictionary_ptr_ = dictionary_ptr; |
| 693 | dictionary_size_ = dictionary_size; |
| 694 | start_decoding_was_called_ = true; |
| 695 | } |
| 696 | |
| 697 | // Reads the VCDiff delta file header section as described in RFC section 4.1: |
| 698 | // |
| 699 | // Header1 - byte = 0xD6 (ASCII 'V' | 0x80) |
| 700 | // Header2 - byte = 0xC3 (ASCII 'C' | 0x80) |
| 701 | // Header3 - byte = 0xC4 (ASCII 'D' | 0x80) |
| 702 | // Header4 - byte |
| 703 | // Hdr_Indicator - byte |
| 704 | // [Secondary compressor ID] - byte |
| 705 | // [Length of code table data] - integer |
| 706 | // [Code table data] |
| 707 | // |
| 708 | // Initializes the code table and address cache objects. Returns RESULT_ERROR |
| 709 | // if an error occurred, and RESULT_END_OF_DATA if the end of available data was |
| 710 | // reached before the entire header could be read. (The latter may be an error |
| 711 | // condition if there is no more data available.) Otherwise, returns |
| 712 | // RESULT_SUCCESS, and removes the header bytes from the data string. |
| 713 | // |
| 714 | // It's relatively inefficient to expect this function to parse any number of |
| 715 | // input bytes available, down to 1 byte, but it is necessary in case the input |
| 716 | // is not a properly formatted VCDIFF delta file. If the entire input consists |
| 717 | // of two bytes "12", then we should recognize that it does not match the |
| 718 | // initial VCDIFF magic number "VCD" and report an error, rather than waiting |
| 719 | // indefinitely for more input that will never arrive. |
| 720 | // |
| 721 | VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader( |
| 722 | ParseableChunk* data) { |
| 723 | if (FoundFileHeader()) { |
| 724 | return RESULT_SUCCESS; |
| 725 | } |
| 726 | size_t data_size = data->UnparsedSize(); |
| 727 | const DeltaFileHeader* header = |
| 728 | reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData()); |
| 729 | bool wrong_magic_number = false; |
| 730 | switch (data_size) { |
| 731 | // Verify only the bytes that are available. |
| 732 | default: |
| 733 | // Found header contents up to and including VCDIFF version |
| 734 | vcdiff_version_code_ = header->header4; |
| 735 | if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284) |
| 736 | (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol |
| 737 | LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL; |
| 738 | return RESULT_ERROR; |
| 739 | } |
| 740 | // fall through |
| 741 | case 3: |
| 742 | if (header->header3 != 0xC4) { // magic value 'D' | 0x80 |
| 743 | wrong_magic_number = true; |
| 744 | } |
| 745 | // fall through |
| 746 | case 2: |
| 747 | if (header->header2 != 0xC3) { // magic value 'C' | 0x80 |
| 748 | wrong_magic_number = true; |
| 749 | } |
| 750 | // fall through |
| 751 | case 1: |
| 752 | if (header->header1 != 0xD6) { // magic value 'V' | 0x80 |
| 753 | wrong_magic_number = true; |
| 754 | } |
| 755 | // fall through |
| 756 | case 0: |
| 757 | if (wrong_magic_number) { |
| 758 | LOG(ERROR) << "Did not find VCDIFF header bytes; " |
| 759 | "input is not a VCDIFF delta file" << LOG_ENDL; |
| 760 | return RESULT_ERROR; |
| 761 | } |
| 762 | if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA; |
| 763 | } |
| 764 | // Secondary compressor not supported. |
| 765 | if (header->hdr_indicator & VCD_DECOMPRESS) { |
| 766 | LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL; |
| 767 | return RESULT_ERROR; |
| 768 | } |
| 769 | if (header->hdr_indicator & VCD_CODETABLE) { |
| 770 | int bytes_parsed = InitCustomCodeTable( |
| 771 | data->UnparsedData() + sizeof(DeltaFileHeader), |
| 772 | data->End()); |
| 773 | switch (bytes_parsed) { |
| 774 | case RESULT_ERROR: |
| 775 | return RESULT_ERROR; |
| 776 | case RESULT_END_OF_DATA: |
| 777 | return RESULT_END_OF_DATA; |
| 778 | default: |
| 779 | data->Advance(sizeof(DeltaFileHeader) + bytes_parsed); |
| 780 | } |
| 781 | } else { |
| 782 | addr_cache_.reset(new VCDiffAddressCache); |
| 783 | // addr_cache_->Init() will be called |
| 784 | // from VCDiffStreamingDecoderImpl::DecodeChunk() |
| 785 | data->Advance(sizeof(DeltaFileHeader)); |
| 786 | } |
| 787 | return RESULT_SUCCESS; |
| 788 | } |
| 789 | |
| 790 | int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start, |
| 791 | const char* data_end) { |
| 792 | // A custom code table is being specified. Parse the variable-length |
| 793 | // cache sizes and begin parsing the encoded custom code table. |
| 794 | int32_t near_cache_size = 0, same_cache_size = 0; |
| 795 | VCDiffHeaderParser header_parser(data_start, data_end); |
| 796 | if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) { |
| 797 | return header_parser.GetResult(); |
| 798 | } |
| 799 | if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) { |
| 800 | return header_parser.GetResult(); |
| 801 | } |
| 802 | custom_code_table_.reset(new struct VCDiffCodeTableData); |
| 803 | memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData)); |
| 804 | custom_code_table_string_.clear(); |
| 805 | addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size)); |
| 806 | // addr_cache_->Init() will be called |
| 807 | // from VCDiffStreamingDecoderImpl::DecodeChunk() |
| 808 | |
| 809 | // If we reach this point (the start of the custom code table) |
| 810 | // without encountering a RESULT_END_OF_DATA condition, then we won't call |
| 811 | // ReadDeltaFileHeader() again for this delta file. |
| 812 | // |
| 813 | // Instantiate a recursive decoder to interpret the custom code table |
| 814 | // as a VCDIFF encoding of the default code table. |
| 815 | custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl); |
| 816 | custom_code_table_decoder_->StartDecoding( |
| 817 | reinterpret_cast<const char*>( |
| 818 | &VCDiffCodeTableData::kDefaultCodeTableData), |
| 819 | sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 820 | custom_code_table_decoder_->SetPlannedTargetFileSize( |
| 821 | sizeof(*custom_code_table_)); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 822 | return static_cast<int>(header_parser.ParsedSize()); |
| 823 | } |
| 824 | |
| 825 | VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable( |
| 826 | ParseableChunk* data) { |
| 827 | if (!custom_code_table_decoder_.get()) { |
| 828 | return RESULT_SUCCESS; |
| 829 | } |
| 830 | if (!custom_code_table_.get()) { |
| 831 | LOG(DFATAL) << "Internal error: custom_code_table_decoder_ is set," |
| 832 | " but custom_code_table_ is NULL" << LOG_ENDL; |
| 833 | return RESULT_ERROR; |
| 834 | } |
| 835 | OutputString<string> output_string(&custom_code_table_string_); |
| 836 | if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(), |
| 837 | data->UnparsedSize(), |
| 838 | &output_string)) { |
| 839 | return RESULT_ERROR; |
| 840 | } |
| 841 | if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) { |
| 842 | // Skip over the consumed data. |
| 843 | data->Finish(); |
| 844 | return RESULT_END_OF_DATA; |
| 845 | } |
| 846 | if (!custom_code_table_decoder_->FinishDecoding()) { |
| 847 | return RESULT_ERROR; |
| 848 | } |
| 849 | if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) { |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 850 | LOG(DFATAL) << "Decoded custom code table size (" |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 851 | << custom_code_table_string_.length() |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 852 | << ") does not match size of a code table (" |
| 853 | << sizeof(*custom_code_table_) << ")" << LOG_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 854 | return RESULT_ERROR; |
| 855 | } |
| 856 | memcpy(custom_code_table_.get(), |
| 857 | custom_code_table_string_.data(), |
| 858 | sizeof(*custom_code_table_)); |
| 859 | custom_code_table_string_.clear(); |
| 860 | // Skip over the consumed data. |
| 861 | data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize()); |
| 862 | custom_code_table_decoder_.reset(); |
| 863 | delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode()); |
| 864 | return RESULT_SUCCESS; |
| 865 | } |
| 866 | |
| 867 | namespace { |
| 868 | |
| 869 | class TrackNewOutputText { |
| 870 | public: |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 871 | #ifndef VCDIFF_HAS_GLOBAL_STRING |
| 872 | typedef std::string string; |
| 873 | #endif // !VCDIFF_HAS_GLOBAL_STRING |
| 874 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 875 | explicit TrackNewOutputText(const string& decoded_target) |
| 876 | : decoded_target_(decoded_target), |
| 877 | initial_decoded_target_size_(decoded_target.size()) { } |
| 878 | |
| 879 | void AppendNewOutputText(size_t target_bytes_remaining, |
| 880 | OutputStringInterface* output_string) { |
| 881 | const size_t bytes_decoded_this_chunk = |
| 882 | decoded_target_.size() - initial_decoded_target_size_; |
| 883 | if (bytes_decoded_this_chunk > 0) { |
| 884 | if (target_bytes_remaining > 0) { |
| 885 | // The decoder is midway through decoding a target window. Resize |
| 886 | // output_string to match the expected length. The interface guarantees |
| 887 | // not to resize the output_string more than once per target window |
| 888 | // decoded. |
| 889 | output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk |
| 890 | + target_bytes_remaining); |
| 891 | } |
| 892 | output_string->append( |
| 893 | decoded_target_.data() + initial_decoded_target_size_, |
| 894 | bytes_decoded_this_chunk); |
| 895 | } |
| 896 | } |
| 897 | |
| 898 | private: |
| 899 | const string& decoded_target_; |
| 900 | size_t initial_decoded_target_size_; |
| 901 | }; |
| 902 | |
| 903 | } // anonymous namespace |
| 904 | |
| 905 | bool VCDiffStreamingDecoderImpl::DecodeChunk( |
| 906 | const char* data, |
| 907 | size_t len, |
| 908 | OutputStringInterface* output_string) { |
| 909 | if (!start_decoding_was_called_) { |
| 910 | LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL; |
| 911 | Reset(); |
| 912 | return false; |
| 913 | } |
| 914 | ParseableChunk parseable_chunk(data, len); |
| 915 | if (!unparsed_bytes_.empty()) { |
| 916 | unparsed_bytes_.append(data, len); |
| 917 | parseable_chunk.SetDataBuffer(unparsed_bytes_.data(), |
| 918 | unparsed_bytes_.size()); |
| 919 | } |
| 920 | TrackNewOutputText output_tracker(decoded_target_); |
| 921 | VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk); |
| 922 | if (RESULT_SUCCESS == result) { |
| 923 | result = ReadCustomCodeTable(&parseable_chunk); |
| 924 | } |
| 925 | if (RESULT_SUCCESS == result) { |
| 926 | result = delta_window_.DecodeWindows(&parseable_chunk); |
| 927 | } |
| 928 | if (RESULT_ERROR == result) { |
| 929 | Reset(); // Don't allow further DecodeChunk calls |
| 930 | return false; |
| 931 | } |
| 932 | unparsed_bytes_.assign(parseable_chunk.UnparsedData(), |
| 933 | parseable_chunk.UnparsedSize()); |
| 934 | output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(), |
| 935 | output_string); |
| 936 | return true; |
| 937 | } |
| 938 | |
| 939 | // Finishes decoding after all data has been received. Returns true |
| 940 | // if decoding of the entire stream was successful. |
| 941 | bool VCDiffStreamingDecoderImpl::FinishDecoding() { |
| 942 | bool success = true; |
| 943 | if (!start_decoding_was_called_) { |
| 944 | LOG(WARNING) << "FinishDecoding() called before StartDecoding()," |
| 945 | " or called after DecodeChunk() returned false" |
| 946 | << LOG_ENDL; |
| 947 | success = false; |
| 948 | } else if (!IsDecodingComplete()) { |
| 949 | LOG(ERROR) << "FinishDecoding() called before parsing entire" |
| 950 | " delta file window" << LOG_ENDL; |
| 951 | success = false; |
| 952 | } |
| 953 | // Reset the object state for the next decode operation |
| 954 | Reset(); |
| 955 | return success; |
| 956 | } |
| 957 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 958 | bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits( |
| 959 | size_t window_size) const { |
| 960 | if (window_size > maximum_target_window_size_) { |
| 961 | LOG(ERROR) << "Length of target window (" << window_size |
| 962 | << ") exceeds limit of " << maximum_target_window_size_ |
| 963 | << " bytes" << LOG_ENDL; |
| 964 | return true; |
| 965 | } |
| 966 | if (HasPlannedTargetFileSize()) { |
| 967 | // The logical expression to check would be: |
| 968 | // |
| 969 | // decoded_target_.size() + window_size > planned_target_file_size_ |
| 970 | // |
| 971 | // but the addition might cause an integer overflow if target_bytes_to_add |
| 972 | // is very large. So it is better to check target_bytes_to_add against |
| 973 | // the remaining planned target bytes. |
| 974 | size_t remaining_planned_target_file_size = |
| 975 | planned_target_file_size_ - decoded_target_.size(); |
| 976 | if (window_size > remaining_planned_target_file_size) { |
| 977 | LOG(ERROR) << "Length of target window (" << window_size |
| 978 | << " bytes) plus previous windows (" << decoded_target_.size() |
| 979 | << " bytes) would exceed planned size of " |
| 980 | << planned_target_file_size_ << " bytes" << LOG_ENDL; |
| 981 | return true; |
| 982 | } |
| 983 | } |
| 984 | size_t remaining_maximum_target_bytes = |
| 985 | maximum_target_file_size_ - decoded_target_.size(); |
| 986 | if (window_size > remaining_maximum_target_bytes) { |
| 987 | LOG(ERROR) << "Length of target window (" << window_size |
| 988 | << " bytes) plus previous windows (" << decoded_target_.size() |
| 989 | << " bytes) would exceed maximum target file size of " |
| 990 | << maximum_target_file_size_ << " bytes" << LOG_ENDL; |
| 991 | return true; |
| 992 | } |
| 993 | return false; |
| 994 | } |
| 995 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 996 | // *** Methods for VCDiffDeltaFileWindow |
| 997 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 998 | void VCDiffDeltaFileWindow::Reset() { |
| 999 | found_header_ = false; |
| 1000 | |
| 1001 | // Mark the start of the current target window. |
| 1002 | target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U; |
| 1003 | target_window_length_ = 0; |
| 1004 | |
| 1005 | source_segment_ptr_ = NULL; |
| 1006 | source_segment_length_ = 0; |
| 1007 | |
| 1008 | instructions_and_sizes_.Invalidate(); |
| 1009 | data_for_add_and_run_.Invalidate(); |
| 1010 | addresses_for_copy_.Invalidate(); |
| 1011 | |
| 1012 | interleaved_bytes_expected_ = 0; |
| 1013 | |
| 1014 | has_checksum_ = false; |
| 1015 | expected_checksum_ = 0; |
| 1016 | if (annotated_output_.get()) { |
| 1017 | annotated_output_->clear(); |
| 1018 | } |
| 1019 | } |
| 1020 | |
| 1021 | VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections( |
| 1022 | VCDiffHeaderParser* header_parser) { |
| 1023 | size_t add_and_run_data_length = 0; |
| 1024 | size_t instructions_and_sizes_length = 0; |
| 1025 | size_t addresses_length = 0; |
| 1026 | if (!header_parser->ParseSectionLengths(has_checksum_, |
| 1027 | &add_and_run_data_length, |
| 1028 | &instructions_and_sizes_length, |
| 1029 | &addresses_length, |
| 1030 | &expected_checksum_)) { |
| 1031 | return header_parser->GetResult(); |
| 1032 | } |
| 1033 | if (parent_->AllowInterleaved() && |
| 1034 | (add_and_run_data_length == 0) && |
| 1035 | (addresses_length == 0)) { |
| 1036 | // The interleaved format is being used. |
| 1037 | interleaved_bytes_expected_ = |
| 1038 | static_cast<int>(instructions_and_sizes_length); |
| 1039 | UpdateInterleavedSectionPointers(header_parser->UnparsedData(), |
| 1040 | header_parser->End()); |
| 1041 | } else { |
| 1042 | // If interleaved format is not used, then the whole window contents |
| 1043 | // must be available before decoding can begin. If only part of |
| 1044 | // the current window is available, then report end of data |
| 1045 | // and re-parse the whole header when DecodeChunk() is called again. |
| 1046 | if (header_parser->UnparsedSize() < (add_and_run_data_length + |
| 1047 | instructions_and_sizes_length + |
| 1048 | addresses_length)) { |
| 1049 | return RESULT_END_OF_DATA; |
| 1050 | } |
| 1051 | data_for_add_and_run_.Init(header_parser->UnparsedData(), |
| 1052 | add_and_run_data_length); |
| 1053 | instructions_and_sizes_.Init(data_for_add_and_run_.End(), |
| 1054 | instructions_and_sizes_length); |
| 1055 | addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length); |
| 1056 | if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) { |
| 1057 | LOG(ERROR) << "The end of the instructions section " |
| 1058 | "does not match the end of the delta window" << LOG_ENDL; |
| 1059 | return RESULT_ERROR; |
| 1060 | } |
| 1061 | } |
| 1062 | reader_.Init(instructions_and_sizes_.UnparsedDataAddr(), |
| 1063 | instructions_and_sizes_.End()); |
| 1064 | return RESULT_SUCCESS; |
| 1065 | } |
| 1066 | |
| 1067 | // Here are the elements of the delta window header to be parsed, |
| 1068 | // from section 4 of the RFC: |
| 1069 | // |
| 1070 | // Window1 |
| 1071 | // Win_Indicator - byte |
| 1072 | // [Source segment size] - integer |
| 1073 | // [Source segment position] - integer |
| 1074 | // The delta encoding of the target window |
| 1075 | // Length of the delta encoding - integer |
| 1076 | // The delta encoding |
| 1077 | // Size of the target window - integer |
| 1078 | // Delta_Indicator - byte |
| 1079 | // Length of data for ADDs and RUNs - integer |
| 1080 | // Length of instructions and sizes - integer |
| 1081 | // Length of addresses for COPYs - integer |
| 1082 | // Data section for ADDs and RUNs - array of bytes |
| 1083 | // Instructions and sizes section - array of bytes |
| 1084 | // Addresses section for COPYs - array of bytes |
| 1085 | // |
| 1086 | VCDiffResult VCDiffDeltaFileWindow::ReadHeader( |
| 1087 | ParseableChunk* parseable_chunk) { |
| 1088 | string* decoded_target = parent_->decoded_target(); |
| 1089 | VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(), |
| 1090 | parseable_chunk->End()); |
| 1091 | size_t source_segment_position = 0; |
| 1092 | unsigned char win_indicator = 0; |
| 1093 | if (!header_parser.ParseWinIndicatorAndSourceSegment( |
| 1094 | parent_->dictionary_size(), |
| 1095 | decoded_target->size(), |
| 1096 | &win_indicator, |
| 1097 | &source_segment_length_, |
| 1098 | &source_segment_position)) { |
| 1099 | return header_parser.GetResult(); |
| 1100 | } |
| 1101 | has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM); |
| 1102 | if (!header_parser.ParseWindowLengths(&target_window_length_)) { |
| 1103 | return header_parser.GetResult(); |
| 1104 | } |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 1105 | if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) { |
| 1106 | // An error has been logged by TargetWindowWouldExceedSizeLimits(). |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1107 | return RESULT_ERROR; |
| 1108 | } |
| 1109 | header_parser.ParseDeltaIndicator(); |
| 1110 | VCDiffResult setup_return_code = SetUpWindowSections(&header_parser); |
| 1111 | if (RESULT_SUCCESS != setup_return_code) { |
| 1112 | return setup_return_code; |
| 1113 | } |
| 1114 | // Reserve enough space in the output string for the current target window. |
| 1115 | decoded_target->reserve(target_window_start_pos_ + target_window_length_); |
| 1116 | // Get a pointer to the start of the source segment. |
| 1117 | if (win_indicator & VCD_SOURCE) { |
| 1118 | source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position; |
| 1119 | } else if (win_indicator & VCD_TARGET) { |
| 1120 | // This assignment must happen after the reserve(). |
| 1121 | // decoded_target should not be resized again while processing this window, |
| 1122 | // so source_segment_ptr_ should remain valid. |
| 1123 | source_segment_ptr_ = decoded_target->data() + source_segment_position; |
| 1124 | } |
| 1125 | // The whole window header was found and parsed successfully. |
| 1126 | found_header_ = true; |
| 1127 | parseable_chunk->Advance(header_parser.ParsedSize()); |
| 1128 | return RESULT_SUCCESS; |
| 1129 | } |
| 1130 | |
| 1131 | void VCDiffDeltaFileWindow::UpdateInstructionPointer( |
| 1132 | ParseableChunk* parseable_chunk) { |
| 1133 | if (IsInterleaved()) { |
| 1134 | size_t bytes_parsed = instructions_and_sizes_.ParsedSize(); |
| 1135 | // Reduce expected instruction segment length by bytes parsed |
| 1136 | interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed); |
| 1137 | parseable_chunk->Advance(bytes_parsed); |
| 1138 | } |
| 1139 | } |
| 1140 | |
| 1141 | size_t VCDiffDeltaFileWindow::TargetBytesDecoded() { |
| 1142 | return parent_->decoded_target()->size() - target_window_start_pos_; |
| 1143 | } |
| 1144 | |
| 1145 | size_t VCDiffDeltaFileWindow::TargetBytesRemaining() { |
| 1146 | if (target_window_length_ == 0) { |
| 1147 | // There is no window being decoded at present |
| 1148 | return 0; |
| 1149 | } else { |
| 1150 | return target_window_length_ - TargetBytesDecoded(); |
| 1151 | } |
| 1152 | } |
| 1153 | |
| 1154 | void VCDiffDeltaFileWindow::CopyBytes(const char* data, |
| 1155 | size_t size, |
| 1156 | VCDiffAnnotationType annotation_type) { |
| 1157 | parent_->decoded_target()->append(data, size); |
| 1158 | if (annotated_output_.get()) { |
| 1159 | annotated_output_->append(kAnnotationStartTags[annotation_type]); |
| 1160 | annotated_output_->append(data, size); |
| 1161 | annotated_output_->append(kAnnotationEndTags[annotation_type]); |
| 1162 | } |
| 1163 | } |
| 1164 | |
| 1165 | void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) { |
| 1166 | parent_->decoded_target()->append(size, byte); |
| 1167 | if (annotated_output_.get()) { |
| 1168 | annotated_output_->append(kAnnotationStartTags[VCD_ANNOTATION_LITERAL]); |
| 1169 | annotated_output_->append(size, byte); |
| 1170 | annotated_output_->append(kAnnotationEndTags[VCD_ANNOTATION_LITERAL]); |
| 1171 | } |
| 1172 | } |
| 1173 | |
| 1174 | VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) { |
| 1175 | if (size > data_for_add_and_run_.UnparsedSize()) { |
| 1176 | return RESULT_END_OF_DATA; |
| 1177 | } |
| 1178 | // Write the next "size" data bytes |
| 1179 | CopyBytes(data_for_add_and_run_.UnparsedData(), size, VCD_ANNOTATION_LITERAL); |
| 1180 | data_for_add_and_run_.Advance(size); |
| 1181 | return RESULT_SUCCESS; |
| 1182 | } |
| 1183 | |
| 1184 | VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) { |
| 1185 | if (data_for_add_and_run_.Empty()) { |
| 1186 | return RESULT_END_OF_DATA; |
| 1187 | } |
| 1188 | // Write "size" copies of the next data byte |
| 1189 | RunByte(*data_for_add_and_run_.UnparsedData(), size); |
| 1190 | data_for_add_and_run_.Advance(1); |
| 1191 | return RESULT_SUCCESS; |
| 1192 | } |
| 1193 | |
| 1194 | VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size, |
| 1195 | unsigned char mode) { |
| 1196 | // Keep track of the number of target bytes decoded as a local variable |
| 1197 | // to avoid recalculating it each time it is needed. |
| 1198 | size_t target_bytes_decoded = TargetBytesDecoded(); |
| 1199 | const VCDAddress here_address = |
| 1200 | static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded); |
| 1201 | const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress( |
| 1202 | here_address, |
| 1203 | mode, |
| 1204 | addresses_for_copy_.UnparsedDataAddr(), |
| 1205 | addresses_for_copy_.End()); |
| 1206 | switch (decoded_address) { |
| 1207 | case RESULT_ERROR: |
| 1208 | LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL; |
| 1209 | return RESULT_ERROR; |
| 1210 | case RESULT_END_OF_DATA: |
| 1211 | return RESULT_END_OF_DATA; |
| 1212 | default: |
| 1213 | if ((decoded_address < 0) || (decoded_address > here_address)) { |
| 1214 | LOG(DFATAL) << "Internal error: unexpected address " << decoded_address |
| 1215 | << " returned from DecodeAddress, with here_address = " |
| 1216 | << here_address << LOG_ENDL; |
| 1217 | return RESULT_ERROR; |
| 1218 | } |
| 1219 | break; |
| 1220 | } |
| 1221 | size_t address = static_cast<size_t>(decoded_address); |
| 1222 | if ((address + size) <= source_segment_length_) { |
| 1223 | // Copy all data from source segment |
| 1224 | CopyBytes(&source_segment_ptr_[address], size, VCD_ANNOTATION_DMATCH); |
| 1225 | return RESULT_SUCCESS; |
| 1226 | } |
| 1227 | // Copy some data from target window... |
| 1228 | if (address < source_segment_length_) { |
| 1229 | // ... plus some data from source segment |
| 1230 | const size_t partial_copy_size = source_segment_length_ - address; |
| 1231 | CopyBytes(&source_segment_ptr_[address], |
| 1232 | partial_copy_size, |
| 1233 | VCD_ANNOTATION_DMATCH); |
| 1234 | target_bytes_decoded += partial_copy_size; |
| 1235 | address += partial_copy_size; |
| 1236 | size -= partial_copy_size; |
| 1237 | } |
| 1238 | address -= source_segment_length_; |
| 1239 | // address is now based at start of target window |
| 1240 | const char* const target_segment_ptr = parent_->decoded_target()->data() + |
| 1241 | target_window_start_pos_; |
| 1242 | while (size > (target_bytes_decoded - address)) { |
| 1243 | // Recursive copy that extends into the yet-to-be-copied target data |
| 1244 | const size_t partial_copy_size = target_bytes_decoded - address; |
| 1245 | CopyBytes(&target_segment_ptr[address], |
| 1246 | partial_copy_size, |
| 1247 | VCD_ANNOTATION_BMATCH); |
| 1248 | target_bytes_decoded += partial_copy_size; |
| 1249 | address += partial_copy_size; |
| 1250 | size -= partial_copy_size; |
| 1251 | } |
| 1252 | CopyBytes(&target_segment_ptr[address], size, VCD_ANNOTATION_BMATCH); |
| 1253 | return RESULT_SUCCESS; |
| 1254 | } |
| 1255 | |
| 1256 | int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) { |
| 1257 | if (IsInterleaved() && (instructions_and_sizes_.UnparsedData() |
| 1258 | != parseable_chunk->UnparsedData())) { |
| 1259 | LOG(DFATAL) << "Internal error: interleaved format is used, but the" |
| 1260 | " input pointer does not point to the instructions section" |
| 1261 | << LOG_ENDL; |
| 1262 | return RESULT_ERROR; |
| 1263 | } |
| 1264 | while (TargetBytesDecoded() < target_window_length_) { |
| 1265 | int32_t decoded_size = VCD_INSTRUCTION_ERROR; |
| 1266 | unsigned char mode = 0; |
| 1267 | VCDiffInstructionType instruction = |
| 1268 | reader_.GetNextInstruction(&decoded_size, &mode); |
| 1269 | switch (instruction) { |
| 1270 | case VCD_INSTRUCTION_END_OF_DATA: |
| 1271 | UpdateInstructionPointer(parseable_chunk); |
| 1272 | return RESULT_END_OF_DATA; |
| 1273 | case VCD_INSTRUCTION_ERROR: |
| 1274 | return RESULT_ERROR; |
| 1275 | default: |
| 1276 | break; |
| 1277 | } |
| 1278 | const size_t size = static_cast<size_t>(decoded_size); |
| 1279 | // The value of "size" itself could be enormous (say, INT32_MAX) |
| 1280 | // so check it individually against the limit to protect against |
| 1281 | // overflow when adding it to something else. |
| 1282 | if ((size > target_window_length_) || |
| 1283 | ((size + TargetBytesDecoded()) > target_window_length_)) { |
| 1284 | LOG(ERROR) << VCDiffInstructionName(instruction) |
| 1285 | << " with size " << size |
| 1286 | << " plus existing " << TargetBytesDecoded() |
| 1287 | << " bytes of target data exceeds length of target" |
| 1288 | " window (" << target_window_length_ << " bytes)" |
| 1289 | << LOG_ENDL; |
| 1290 | return RESULT_ERROR; |
| 1291 | } |
| 1292 | VCDiffResult result = RESULT_SUCCESS; |
| 1293 | switch (instruction) { |
| 1294 | case VCD_ADD: |
| 1295 | result = DecodeAdd(size); |
| 1296 | break; |
| 1297 | case VCD_RUN: |
| 1298 | result = DecodeRun(size); |
| 1299 | break; |
| 1300 | case VCD_COPY: |
| 1301 | result = DecodeCopy(size, mode); |
| 1302 | break; |
| 1303 | default: |
| 1304 | LOG(DFATAL) << "Unexpected instruction type " << instruction |
| 1305 | << "in opcode stream" << LOG_ENDL; |
| 1306 | return RESULT_ERROR; |
| 1307 | } |
| 1308 | switch (result) { |
| 1309 | case RESULT_END_OF_DATA: |
| 1310 | reader_.UnGetInstruction(); |
| 1311 | UpdateInstructionPointer(parseable_chunk); |
| 1312 | return RESULT_END_OF_DATA; |
| 1313 | case RESULT_ERROR: |
| 1314 | return RESULT_ERROR; |
| 1315 | case RESULT_SUCCESS: |
| 1316 | break; |
| 1317 | } |
| 1318 | } |
| 1319 | if (TargetBytesDecoded() != target_window_length_) { |
| 1320 | LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded() |
| 1321 | << " bytes) does not match expected size (" |
| 1322 | << target_window_length_ << " bytes)" << LOG_ENDL; |
| 1323 | return RESULT_ERROR; |
| 1324 | } |
| 1325 | const char* const target_window_start = |
| 1326 | parent_->decoded_target()->data() + target_window_start_pos_; |
| 1327 | if (has_checksum_ && |
| 1328 | (ComputeAdler32(target_window_start, target_window_length_) |
| 1329 | != expected_checksum_)) { |
| 1330 | LOG(ERROR) << "Target data does not match checksum; this could mean " |
| 1331 | "that the wrong dictionary was used" << LOG_ENDL; |
| 1332 | return RESULT_ERROR; |
| 1333 | } |
| 1334 | if (!instructions_and_sizes_.Empty()) { |
| 1335 | LOG(ERROR) << "Excess instructions and sizes left over " |
| 1336 | "after decoding target window" << LOG_ENDL; |
| 1337 | return RESULT_ERROR; |
| 1338 | } |
| 1339 | if (!IsInterleaved()) { |
| 1340 | // Standard format is being used, with three separate sections for the |
| 1341 | // instructions, data, and addresses. |
| 1342 | if (!data_for_add_and_run_.Empty()) { |
| 1343 | LOG(ERROR) << "Excess ADD/RUN data left over " |
| 1344 | "after decoding target window" << LOG_ENDL; |
| 1345 | return RESULT_ERROR; |
| 1346 | } |
| 1347 | if (!addresses_for_copy_.Empty()) { |
| 1348 | LOG(ERROR) << "Excess COPY addresses left over " |
| 1349 | "after decoding target window" << LOG_ENDL; |
| 1350 | return RESULT_ERROR; |
| 1351 | } |
| 1352 | // Reached the end of the window. Update the ParseableChunk to point to the |
| 1353 | // end of the addresses section, which is the last section in the window. |
| 1354 | parseable_chunk->SetPosition(addresses_for_copy_.End()); |
| 1355 | } else { |
| 1356 | // Interleaved format is being used. The window may have been only |
| 1357 | // partially decoded. |
| 1358 | UpdateInstructionPointer(parseable_chunk); |
| 1359 | } |
| 1360 | return RESULT_SUCCESS; |
| 1361 | } |
| 1362 | |
| 1363 | VCDiffResult VCDiffDeltaFileWindow::DecodeWindows( |
| 1364 | ParseableChunk* parseable_chunk) { |
| 1365 | if (!parent_) { |
| 1366 | LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() " |
| 1367 | "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL; |
| 1368 | return RESULT_ERROR; |
| 1369 | } |
| 1370 | while (!parseable_chunk->Empty()) { |
| 1371 | if (!found_header_) { |
| 1372 | switch (ReadHeader(parseable_chunk)) { |
| 1373 | case RESULT_END_OF_DATA: |
| 1374 | return RESULT_END_OF_DATA; |
| 1375 | case RESULT_ERROR: |
| 1376 | return RESULT_ERROR; |
| 1377 | default: |
| 1378 | // Reset address cache between windows (RFC section 5.1) |
| 1379 | if (!parent_->addr_cache()->Init()) { |
| 1380 | LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL; |
| 1381 | return RESULT_ERROR; |
| 1382 | } |
| 1383 | } |
| 1384 | } else { |
| 1385 | // We are resuming a window that was partially decoded before a |
| 1386 | // RESULT_END_OF_DATA was returned. This can only happen on the first |
| 1387 | // loop iteration, and only if the interleaved format is enabled and used. |
| 1388 | if (!IsInterleaved()) { |
| 1389 | LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window" |
| 1390 | " when interleaved format is not being used" << LOG_ENDL; |
| 1391 | return RESULT_ERROR; |
| 1392 | } |
| 1393 | UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(), |
| 1394 | parseable_chunk->End()); |
| 1395 | reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(), |
| 1396 | instructions_and_sizes_.End()); |
| 1397 | } |
| 1398 | switch (DecodeBody(parseable_chunk)) { |
| 1399 | case RESULT_END_OF_DATA: |
| 1400 | if (MoreDataExpected()) { |
| 1401 | return RESULT_END_OF_DATA; |
| 1402 | } else { |
| 1403 | LOG(ERROR) << "End of data reached while decoding VCDIFF delta file" |
| 1404 | << LOG_ENDL; |
| 1405 | // fall through to RESULT_ERROR case |
| 1406 | } |
| 1407 | case RESULT_ERROR: |
| 1408 | return RESULT_ERROR; |
| 1409 | default: |
| 1410 | break; // DecodeBody succeeded |
| 1411 | } |
| 1412 | AppendAnnotatedOutput(parent_->annotated_output()); |
| 1413 | // Get ready to read a new delta window |
| 1414 | Reset(); |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 1415 | if (parent_->ReachedPlannedTargetFileSize()) { |
| 1416 | // Found exactly the length we expected. Stop decoding. |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1417 | return RESULT_SUCCESS; |
| 1418 | } |
| 1419 | } |
| 1420 | return RESULT_SUCCESS; |
| 1421 | } |
| 1422 | |
| 1423 | // *** Methods for VCDiffStreamingDecoder |
| 1424 | |
| 1425 | VCDiffStreamingDecoder::VCDiffStreamingDecoder() |
| 1426 | : impl_(new VCDiffStreamingDecoderImpl) { } |
| 1427 | |
| 1428 | VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; } |
| 1429 | |
| 1430 | void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) { |
| 1431 | impl_->StartDecoding(source, len); |
| 1432 | } |
| 1433 | |
| 1434 | bool VCDiffStreamingDecoder::DecodeChunkToInterface( |
| 1435 | const char* data, |
| 1436 | size_t len, |
| 1437 | OutputStringInterface* output_string) { |
| 1438 | return impl_->DecodeChunk(data, len, output_string); |
| 1439 | } |
| 1440 | |
| 1441 | bool VCDiffStreamingDecoder::FinishDecoding() { |
| 1442 | return impl_->FinishDecoding(); |
| 1443 | } |
| 1444 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame^] | 1445 | bool VCDiffStreamingDecoder::SetMaximumTargetFileSize( |
| 1446 | size_t new_maximum_target_file_size) { |
| 1447 | return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size); |
| 1448 | } |
| 1449 | |
| 1450 | bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize( |
| 1451 | size_t new_maximum_target_window_size) { |
| 1452 | return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size); |
| 1453 | } |
| 1454 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1455 | void VCDiffStreamingDecoder::EnableAnnotatedOutput() { |
| 1456 | impl_->EnableAnnotatedOutput(); |
| 1457 | } |
| 1458 | |
| 1459 | void VCDiffStreamingDecoder::DisableAnnotatedOutput() { |
| 1460 | impl_->DisableAnnotatedOutput(); |
| 1461 | } |
| 1462 | |
| 1463 | void VCDiffStreamingDecoder::GetAnnotatedOutputToInterface( |
| 1464 | OutputStringInterface* annotated_output) { |
| 1465 | impl_->GetAnnotatedOutput(annotated_output); |
| 1466 | } |
| 1467 | |
| 1468 | bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr, |
| 1469 | size_t dictionary_size, |
| 1470 | const string& encoding, |
| 1471 | OutputStringInterface* target) { |
| 1472 | target->clear(); |
| 1473 | decoder_.StartDecoding(dictionary_ptr, dictionary_size); |
| 1474 | if (!decoder_.DecodeChunkToInterface(encoding.data(), |
| 1475 | encoding.size(), |
| 1476 | target)) { |
| 1477 | return false; |
| 1478 | } |
| 1479 | return decoder_.FinishDecoding(); |
| 1480 | } |
| 1481 | |
| 1482 | } // namespace open_vcdiff |