openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1 | // Copyright 2008 Google Inc. |
| 2 | // Author: Lincoln Smith |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | // Implements a Decoder for the format described in |
| 17 | // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. |
| 18 | // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html |
| 19 | // |
| 20 | // The RFC describes the possibility of using a secondary compressor |
| 21 | // to further reduce the size of each section of the VCDIFF output. |
| 22 | // That feature is not supported in this implementation of the encoder |
| 23 | // and decoder. |
| 24 | // No secondary compressor types have been publicly registered with |
| 25 | // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids |
| 26 | // in the more than five years since the registry was created, so there |
| 27 | // is no standard set of compressor IDs which would be generated by other |
| 28 | // encoders or accepted by other decoders. |
| 29 | |
| 30 | #include <config.h> |
| 31 | #include "google/vcdecoder.h" |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 32 | #include <stddef.h> // size_t, ptrdiff_t |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 33 | #include <stdint.h> // int32_t |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 34 | #include <string.h> // memcpy, memset |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 35 | #include <memory> // auto_ptr |
| 36 | #include <string> |
| 37 | #include "addrcache.h" |
| 38 | #include "checksum.h" |
| 39 | #include "codetable.h" |
| 40 | #include "decodetable.h" |
| 41 | #include "headerparser.h" |
| 42 | #include "logging.h" |
| 43 | #include "google/output_string.h" |
| 44 | #include "varint_bigendian.h" |
| 45 | #include "vcdiff_defs.h" |
| 46 | |
| 47 | namespace open_vcdiff { |
| 48 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 49 | // This class is used to parse delta file windows as described |
| 50 | // in RFC sections 4.2 and 4.3. Its methods are not thread-safe. |
| 51 | // |
| 52 | // Here is the window format copied from the RFC: |
| 53 | // |
| 54 | // Window1 |
| 55 | // Win_Indicator - byte |
| 56 | // [Source segment size] - integer |
| 57 | // [Source segment position] - integer |
| 58 | // The delta encoding of the target window |
| 59 | // Length of the delta encoding - integer |
| 60 | // The delta encoding |
| 61 | // Size of the target window - integer |
| 62 | // Delta_Indicator - byte |
| 63 | // Length of data for ADDs and RUNs - integer |
| 64 | // Length of instructions and sizes - integer |
| 65 | // Length of addresses for COPYs - integer |
| 66 | // Data section for ADDs and RUNs - array of bytes |
| 67 | // Instructions and sizes section - array of bytes |
| 68 | // Addresses section for COPYs - array of bytes |
| 69 | // Window2 |
| 70 | // ... |
| 71 | // |
| 72 | // Sample usage: |
| 73 | // |
| 74 | // VCDiffDeltaFileWindow delta_window_; |
| 75 | // delta_window_.Init(parent); |
| 76 | // ParseableChunk parseable_chunk(input_buffer, |
| 77 | // input_size, |
| 78 | // leftover_unencoded_bytes); |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 79 | // while (!parseable_chunk.Empty()) { |
| 80 | // switch (delta_window_.DecodeWindow(&parseable_chunk)) { |
| 81 | // case RESULT_END_OF_DATA: |
| 82 | // <Read more input and retry DecodeWindow later.> |
| 83 | // case RESULT_ERROR: |
| 84 | // <Handle error case. An error log message has already been generated.> |
| 85 | // } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 86 | // } |
| 87 | // |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 88 | // DecodeWindow consumes only a single window, and needs to be placed within |
| 89 | // a loop if multiple windows are to be processed. |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 90 | // |
| 91 | class VCDiffDeltaFileWindow { |
| 92 | public: |
| 93 | VCDiffDeltaFileWindow(); |
| 94 | ~VCDiffDeltaFileWindow(); |
| 95 | |
| 96 | // Init() should be called immediately after constructing the |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 97 | // VCDiffDeltaFileWindow(). It must be called before DecodeWindow() can be |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 98 | // invoked, or an error will occur. |
| 99 | void Init(VCDiffStreamingDecoderImpl* parent); |
| 100 | |
| 101 | // Resets the pointers to the data sections in the current window. |
| 102 | void Reset(); |
| 103 | |
| 104 | bool UseCodeTable(const VCDiffCodeTableData& code_table_data, |
| 105 | unsigned char max_mode) { |
| 106 | return reader_.UseCodeTable(code_table_data, max_mode); |
| 107 | } |
| 108 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 109 | // Decodes a single delta window using the input data from *parseable_chunk. |
| 110 | // Appends the decoded target window to parent_->decoded_target(). Returns |
| 111 | // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if |
| 112 | // the end of input was reached before the entire window could be decoded and |
| 113 | // more input is expected (only possible if IsInterleaved() is true), or |
| 114 | // RESULT_ERROR if an error occurred during decoding. In the RESULT_ERROR |
| 115 | // case, the value of parseable_chunk->pointer_ is undefined; otherwise, |
| 116 | // parseable_chunk->Advance() is called to point to the input data position |
| 117 | // just after the data that has been decoded. |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 118 | // |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 119 | VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 120 | |
| 121 | bool FoundWindowHeader() const { |
| 122 | return found_header_; |
| 123 | } |
| 124 | |
| 125 | bool MoreDataExpected() const { |
| 126 | // When parsing an interleaved-format delta file, |
| 127 | // every time DecodeBody() exits, interleaved_bytes_expected_ |
| 128 | // will be decremented by the number of bytes parsed. If it |
| 129 | // reaches zero, then there is no more data expected because |
| 130 | // the size of the interleaved section (given in the window |
| 131 | // header) has been reached. |
| 132 | return IsInterleaved() && (interleaved_bytes_expected_ > 0); |
| 133 | } |
| 134 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 135 | size_t target_window_start_pos() const { return target_window_start_pos_; } |
| 136 | |
| 137 | void set_target_window_start_pos(size_t new_start_pos) { |
| 138 | target_window_start_pos_ = new_start_pos; |
| 139 | } |
| 140 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 141 | // Returns the number of bytes remaining to be decoded in the target window. |
| 142 | // If not in the process of decoding a window, returns 0. |
| 143 | size_t TargetBytesRemaining(); |
| 144 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 145 | private: |
| 146 | // Reads the header of the window section as described in RFC sections 4.2 and |
| 147 | // 4.3, up to and including the value "Length of addresses for COPYs". If the |
| 148 | // entire header is found, this function sets up the DeltaWindowSections |
| 149 | // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so |
| 150 | // that the decoder can begin decoding the opcodes in these sections. Returns |
| 151 | // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of |
| 152 | // available data was reached before the entire header could be read. (The |
| 153 | // latter may be an error condition if there is no more data available.) |
| 154 | // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the |
| 155 | // parsed header. |
| 156 | // |
| 157 | VCDiffResult ReadHeader(ParseableChunk* parseable_chunk); |
| 158 | |
| 159 | // After the window header has been parsed as far as the Delta_Indicator, |
| 160 | // this function is called to parse the following delta window header fields: |
| 161 | // |
| 162 | // Length of data for ADDs and RUNs - integer (VarintBE format) |
| 163 | // Length of instructions and sizes - integer (VarintBE format) |
| 164 | // Length of addresses for COPYs - integer (VarintBE format) |
| 165 | // |
| 166 | // If has_checksum_ is true, it also looks for the following element: |
| 167 | // |
| 168 | // Adler32 checksum - unsigned 32-bit integer (VarintBE format) |
| 169 | // |
| 170 | // It sets up the DeltaWindowSections instructions_and_sizes_, |
| 171 | // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format |
| 172 | // is being used, all three sections will include the entire window body; if |
| 173 | // the standard format is used, three non-overlapping window sections will be |
| 174 | // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA |
| 175 | // if standard format is being used and there is not enough input data to read |
| 176 | // the entire window body. Otherwise, returns RESULT_SUCCESS. |
| 177 | VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser); |
| 178 | |
| 179 | // Decodes the body of the window section as described in RFC sections 4.3, |
| 180 | // including the sections "Data section for ADDs and RUNs", "Instructions |
| 181 | // and sizes section", and "Addresses section for COPYs". These sections |
| 182 | // must already have been set up by ReadWindowHeader(). Returns a |
| 183 | // non-negative value on success, or RESULT_END_OF_DATA if the end of input |
| 184 | // was reached before the entire window could be decoded (only possible if |
| 185 | // IsInterleaved() is true), or RESULT_ERROR if an error occurred during |
| 186 | // decoding. Appends as much of the decoded target window as possible to |
| 187 | // parent->decoded_target(). |
| 188 | // |
| 189 | int DecodeBody(ParseableChunk* parseable_chunk); |
| 190 | |
| 191 | // Returns the number of bytes already decoded into the target window. |
| 192 | size_t TargetBytesDecoded(); |
| 193 | |
| 194 | // Decodes a single ADD instruction, updating parent_->decoded_target_. |
| 195 | VCDiffResult DecodeAdd(size_t size); |
| 196 | |
| 197 | // Decodes a single RUN instruction, updating parent_->decoded_target_. |
| 198 | VCDiffResult DecodeRun(size_t size); |
| 199 | |
| 200 | // Decodes a single COPY instruction, updating parent_->decoded_target_. |
| 201 | VCDiffResult DecodeCopy(size_t size, unsigned char mode); |
| 202 | |
| 203 | // When using the interleaved format, this function is called both on parsing |
| 204 | // the header and on resuming after a RESULT_END_OF_DATA was returned from a |
| 205 | // previous call to DecodeBody(). It sets up all three section pointers to |
| 206 | // reference the same interleaved stream of instructions, sizes, addresses, |
| 207 | // and data. These pointers must be reset every time that work resumes on a |
| 208 | // delta window, because the input data string may have been changed or |
| 209 | // resized since DecodeBody() last returned. |
| 210 | void UpdateInterleavedSectionPointers(const char* data_pos, |
| 211 | const char* data_end) { |
| 212 | const ptrdiff_t available_data = data_end - data_pos; |
| 213 | // Don't read past the end of currently-available data |
| 214 | if (available_data > interleaved_bytes_expected_) { |
| 215 | instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_); |
| 216 | } else { |
| 217 | instructions_and_sizes_.Init(data_pos, available_data); |
| 218 | } |
| 219 | data_for_add_and_run_.Init(&instructions_and_sizes_); |
| 220 | addresses_for_copy_.Init(&instructions_and_sizes_); |
| 221 | } |
| 222 | |
| 223 | // If true, the interleaved format described in AllowInterleaved() is used |
| 224 | // for the current delta file. Only valid after ReadWindowHeader() has been |
| 225 | // called and returned a positive number (i.e., the whole header was parsed), |
| 226 | // but before the window has finished decoding. |
| 227 | // |
| 228 | bool IsInterleaved() const { |
| 229 | // If the sections are interleaved, both addresses_for_copy_ and |
| 230 | // data_for_add_and_run_ should point at instructions_and_sizes_. |
| 231 | return !addresses_for_copy_.IsOwned(); |
| 232 | } |
| 233 | |
| 234 | // Executes a single COPY or ADD instruction, appending data to |
| 235 | // parent_->decoded_target(). |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 236 | void CopyBytes(const char* data, size_t size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 237 | |
| 238 | // Executes a single RUN instruction, appending data to |
| 239 | // parent_->decoded_target(). |
| 240 | void RunByte(unsigned char byte, size_t size); |
| 241 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 242 | // Advance *parseable_chunk to point to the current position in the |
| 243 | // instructions/sizes section. If interleaved format is used, then |
| 244 | // decrement the number of expected bytes in the instructions/sizes section |
| 245 | // by the number of instruction/size bytes parsed. |
| 246 | void UpdateInstructionPointer(ParseableChunk* parseable_chunk); |
| 247 | |
| 248 | // The parent object which was passed to Init(). |
| 249 | VCDiffStreamingDecoderImpl* parent_; |
| 250 | |
| 251 | // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader() |
| 252 | // has been called and succeeded in parsing the delta window header, but the |
| 253 | // entire window has not yet been decoded. |
| 254 | bool found_header_; |
| 255 | |
| 256 | // Contents and length of the current source window. source_segment_ptr_ |
| 257 | // will be non-NULL if (a) the window section header for the current window |
| 258 | // has been read, but the window has not yet finished decoding; or |
| 259 | // (b) the window did not specify a source segment. |
| 260 | const char* source_segment_ptr_; |
| 261 | size_t source_segment_length_; |
| 262 | |
| 263 | // The delta encoding window sections as defined in RFC section 4.3. |
| 264 | // The pointer for each section will be incremented as data is consumed and |
| 265 | // decoded from that section. If the interleaved format is used, |
| 266 | // data_for_add_and_run_ and addresses_for_copy_ will both point to |
| 267 | // instructions_and_sizes_; otherwise, they will be separate data sections. |
| 268 | // |
| 269 | DeltaWindowSection instructions_and_sizes_; |
| 270 | DeltaWindowSection data_for_add_and_run_; |
| 271 | DeltaWindowSection addresses_for_copy_; |
| 272 | |
| 273 | // The expected bytes left to decode in instructions_and_sizes_. Only used |
| 274 | // for the interleaved format. |
| 275 | int interleaved_bytes_expected_; |
| 276 | |
| 277 | // The expected length of the target window once it has been decoded. |
| 278 | size_t target_window_length_; |
| 279 | |
| 280 | // The index in decoded_target at which the first byte of the current |
| 281 | // target window was/will be written. |
| 282 | size_t target_window_start_pos_; |
| 283 | |
| 284 | // If has_checksum_ is true, then expected_checksum_ contains an Adler32 |
| 285 | // checksum of the target window data. This is an extension included in the |
| 286 | // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard. |
| 287 | bool has_checksum_; |
| 288 | VCDChecksum expected_checksum_; |
| 289 | |
| 290 | VCDiffCodeTableReader reader_; |
| 291 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 292 | // Making these private avoids implicit copy constructor & assignment operator |
| 293 | VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT |
| 294 | void operator=(const VCDiffDeltaFileWindow&); |
| 295 | }; |
| 296 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 297 | // *** Inline methods for VCDiffDeltaFileWindow |
| 298 | |
| 299 | inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) { |
| 300 | Reset(); |
| 301 | } |
| 302 | |
| 303 | inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { } |
| 304 | |
| 305 | inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) { |
| 306 | parent_ = parent; |
| 307 | } |
| 308 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 309 | class VCDiffStreamingDecoderImpl { |
| 310 | public: |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 311 | typedef std::string string; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 312 | |
| 313 | // The default maximum target file size (and target window size) if |
| 314 | // SetMaximumTargetFileSize() is not called. |
| 315 | static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB |
| 316 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 317 | // The largest value that can be passed to SetMaximumTargetWindowSize(). |
| 318 | // Using a larger value will result in an error. |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 319 | static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX |
| 320 | |
| 321 | // A constant that is the default value for planned_target_file_size_, |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 322 | // indicating that the decoder does not have an expected length |
| 323 | // for the target data. |
| 324 | static const size_t kUnlimitedBytes = static_cast<size_t>(-3); |
| 325 | |
| 326 | VCDiffStreamingDecoderImpl(); |
| 327 | ~VCDiffStreamingDecoderImpl(); |
| 328 | |
| 329 | // Resets all member variables to their initial states. |
| 330 | void Reset(); |
| 331 | |
| 332 | // These functions are identical to their counterparts |
| 333 | // in VCDiffStreamingDecoder. |
| 334 | // |
| 335 | void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); |
| 336 | |
| 337 | bool DecodeChunk(const char* data, |
| 338 | size_t len, |
| 339 | OutputStringInterface* output_string); |
| 340 | |
| 341 | bool FinishDecoding(); |
| 342 | |
| 343 | // If true, the version of VCDIFF used in the current delta file allows |
| 344 | // for the interleaved format, in which instructions, addresses and data |
| 345 | // are all sent interleaved in the instructions section of each window |
| 346 | // rather than being sent in separate sections. This is not part of |
| 347 | // the VCDIFF draft standard, so we've defined a special version code |
| 348 | // 'S' which implies that this feature is available. Even if interleaving |
| 349 | // is supported, it is not mandatory; interleaved format will be implied |
| 350 | // if the address and data sections are both zero-length. |
| 351 | // |
| 352 | bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; } |
| 353 | |
| 354 | // If true, the version of VCDIFF used in the current delta file allows |
| 355 | // each delta window to contain an Adler32 checksum of the target window data. |
| 356 | // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then |
| 357 | // this checksum will appear as a variable-length integer, just after the |
| 358 | // "length of addresses for COPYs" value and before the window data sections. |
| 359 | // It is possible for some windows in a delta file to use the checksum feature |
| 360 | // and for others not to use it (and leave the flag bit set to 0.) |
| 361 | // Just as with AllowInterleaved(), this extension is not part of the draft |
| 362 | // standard and is only available when the version code 'S' is specified. |
| 363 | // |
| 364 | bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; } |
| 365 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 366 | bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) { |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 367 | maximum_target_file_size_ = new_maximum_target_file_size; |
| 368 | return true; |
| 369 | } |
| 370 | |
| 371 | bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) { |
| 372 | if (new_maximum_target_window_size > kTargetSizeLimit) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 373 | VCD_ERROR << "Specified maximum target window size " |
| 374 | << new_maximum_target_window_size << " exceeds limit of " |
| 375 | << kTargetSizeLimit << " bytes" << VCD_ENDL; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 376 | return false; |
| 377 | } |
| 378 | maximum_target_window_size_ = new_maximum_target_window_size; |
| 379 | return true; |
| 380 | } |
| 381 | |
| 382 | // See description of planned_target_file_size_, below. |
| 383 | bool HasPlannedTargetFileSize() const { |
| 384 | return planned_target_file_size_ != kUnlimitedBytes; |
| 385 | } |
| 386 | |
| 387 | void SetPlannedTargetFileSize(size_t planned_target_file_size) { |
| 388 | planned_target_file_size_ = planned_target_file_size; |
| 389 | } |
| 390 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 391 | void AddToTotalTargetWindowSize(size_t window_size) { |
| 392 | total_of_target_window_sizes_ += window_size; |
| 393 | } |
| 394 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 395 | // Checks to see whether the decoded target data has reached its planned size. |
| 396 | bool ReachedPlannedTargetFileSize() const { |
| 397 | if (!HasPlannedTargetFileSize()) { |
| 398 | return false; |
| 399 | } |
| 400 | // The planned target file size should not have been exceeded. |
| 401 | // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of |
| 402 | // each target window would not make the target file exceed that limit, and |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 403 | // DecodeBody() will return RESULT_ERROR if the actual decoded output ever |
| 404 | // exceeds the advertised target window size. |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 405 | if (total_of_target_window_sizes_ > planned_target_file_size_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 406 | VCD_DFATAL << "Internal error: Decoded data size " |
| 407 | << total_of_target_window_sizes_ |
| 408 | << " exceeds planned target file size " |
| 409 | << planned_target_file_size_ << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 410 | return true; |
| 411 | } |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 412 | return total_of_target_window_sizes_ == planned_target_file_size_; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 413 | } |
| 414 | |
| 415 | // Checks to see whether adding a new target window of the specified size |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 416 | // would exceed the planned target file size, the maximum target file size, |
| 417 | // or the maximum target window size. If so, logs an error and returns true; |
| 418 | // otherwise, returns false. |
| 419 | bool TargetWindowWouldExceedSizeLimits(size_t window_size) const; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 420 | |
| 421 | // Returns the amount of input data passed to the last DecodeChunk() |
| 422 | // that was not consumed by the decoder. This is essential if |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 423 | // SetPlannedTargetFileSize() is being used, in order to preserve the |
| 424 | // remaining input data stream once the planned target file has been decoded. |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 425 | size_t GetUnconsumedDataSize() const { |
| 426 | return unparsed_bytes_.size(); |
| 427 | } |
| 428 | |
| 429 | // This function will return true if the decoder has parsed a complete delta |
| 430 | // file header plus zero or more delta file windows, with no data left over. |
| 431 | // It will also return true if no delta data at all was decoded. If these |
| 432 | // conditions are not met, then FinishDecoding() should not be called. |
| 433 | bool IsDecodingComplete() const { |
| 434 | if (!FoundFileHeader()) { |
| 435 | // No complete delta file header has been parsed yet. DecodeChunk() |
| 436 | // may have received some data that it hasn't yet parsed, in which case |
| 437 | // decoding is incomplete. |
| 438 | return unparsed_bytes_.empty(); |
| 439 | } else if (custom_code_table_decoder_.get()) { |
| 440 | // The decoder is in the middle of parsing a custom code table. |
| 441 | return false; |
| 442 | } else if (delta_window_.FoundWindowHeader()) { |
| 443 | // The decoder is in the middle of parsing an interleaved format delta |
| 444 | // window. |
| 445 | return false; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 446 | } else if (ReachedPlannedTargetFileSize()) { |
| 447 | // The decoder found exactly the planned number of bytes. In this case |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 448 | // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover |
| 449 | // data after the end of the delta file. |
| 450 | return true; |
| 451 | } else { |
| 452 | // No complete delta file window has been parsed yet. DecodeChunk() |
| 453 | // may have received some data that it hasn't yet parsed, in which case |
| 454 | // decoding is incomplete. |
| 455 | return unparsed_bytes_.empty(); |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | const char* dictionary_ptr() const { return dictionary_ptr_; } |
| 460 | |
| 461 | size_t dictionary_size() const { return dictionary_size_; } |
| 462 | |
| 463 | VCDiffAddressCache* addr_cache() { return addr_cache_.get(); } |
| 464 | |
| 465 | string* decoded_target() { return &decoded_target_; } |
| 466 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 467 | bool allow_vcd_target() const { return allow_vcd_target_; } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 468 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 469 | void SetAllowVcdTarget(bool allow_vcd_target) { |
| 470 | if (start_decoding_was_called_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 471 | VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()" |
| 472 | << VCD_ENDL; |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 473 | return; |
| 474 | } |
| 475 | allow_vcd_target_ = allow_vcd_target; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 476 | } |
| 477 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 478 | private: |
| 479 | // Reads the VCDiff delta file header section as described in RFC section 4.1, |
| 480 | // except the custom code table data. Returns RESULT_ERROR if an error |
| 481 | // occurred, or RESULT_END_OF_DATA if the end of available data was reached |
| 482 | // before the entire header could be read. (The latter may be an error |
| 483 | // condition if there is no more data available.) Otherwise, advances |
| 484 | // data->position_ past the header and returns RESULT_SUCCESS. |
| 485 | // |
| 486 | VCDiffResult ReadDeltaFileHeader(ParseableChunk* data); |
| 487 | |
| 488 | // Indicates whether or not the header has already been read. |
| 489 | bool FoundFileHeader() const { return addr_cache_.get() != NULL; } |
| 490 | |
| 491 | // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta |
| 492 | // file header, this function parses the custom cache sizes and initializes |
| 493 | // a nested VCDiffStreamingDecoderImpl object that will be used to parse the |
| 494 | // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an |
| 495 | // error occurred, or RESULT_END_OF_DATA if the end of available data was |
| 496 | // reached before the custom cache sizes could be read. Otherwise, returns |
| 497 | // the number of bytes read. |
| 498 | // |
| 499 | int InitCustomCodeTable(const char* data_start, const char* data_end); |
| 500 | |
| 501 | // If a custom code table was specified in the header section that was parsed |
| 502 | // by ReadDeltaFileHeader(), this function makes a recursive call to another |
| 503 | // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the |
| 504 | // custom code table is expected to be supplied as an embedded VCDIFF |
| 505 | // encoding that uses the standard code table. Returns RESULT_ERROR if an |
| 506 | // error occurs, or RESULT_END_OF_DATA if the end of available data was |
| 507 | // reached before the entire custom code table could be read. Otherwise, |
| 508 | // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded |
| 509 | // custom code table. If the function returns RESULT_SUCCESS or |
| 510 | // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes. |
| 511 | // |
| 512 | VCDiffResult ReadCustomCodeTable(ParseableChunk* data); |
| 513 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 514 | // Called after the decoder exhausts all input data. This function |
| 515 | // copies from decoded_target_ into output_string all the data that |
| 516 | // has not yet been output. It sets decoded_target_output_position_ |
| 517 | // to mark the start of the next data that needs to be output. |
| 518 | void AppendNewOutputText(OutputStringInterface* output_string); |
| 519 | |
| 520 | // Appends to output_string the portion of decoded_target_ that has |
| 521 | // not yet been output, then clears decoded_target_. This function is |
| 522 | // called after each complete target window has been decoded if |
| 523 | // allow_vcd_target is false. In that case, there is no need to retain |
| 524 | // target data from any window except the current window. |
| 525 | void FlushDecodedTarget(OutputStringInterface* output_string); |
| 526 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 527 | // Contents and length of the source (dictionary) data. |
| 528 | const char* dictionary_ptr_; |
| 529 | size_t dictionary_size_; |
| 530 | |
| 531 | // This string will be used to store any unparsed bytes left over when |
| 532 | // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA. |
| 533 | // It will also be used to concatenate those unparsed bytes with the data |
| 534 | // supplied to the next call to DecodeChunk(), so that they appear in |
| 535 | // contiguous memory. |
| 536 | string unparsed_bytes_; |
| 537 | |
| 538 | // The portion of the target file that has been decoded so far. This will be |
| 539 | // used to fill the output string for DecodeChunk(), and will also be used to |
| 540 | // execute COPY instructions that reference target data. Since the source |
| 541 | // window can come from a range of addresses in the previously decoded target |
| 542 | // data, the entire target file needs to be available to the decoder, not just |
| 543 | // the current target window. |
| 544 | string decoded_target_; |
| 545 | |
| 546 | // The VCDIFF version byte (also known as "header4") from the |
| 547 | // delta file header. |
| 548 | unsigned char vcdiff_version_code_; |
| 549 | |
| 550 | VCDiffDeltaFileWindow delta_window_; |
| 551 | |
| 552 | std::auto_ptr<VCDiffAddressCache> addr_cache_; |
| 553 | |
| 554 | // Will be NULL unless a custom code table has been defined. |
| 555 | std::auto_ptr<VCDiffCodeTableData> custom_code_table_; |
| 556 | |
| 557 | // Used to receive the decoded custom code table. |
| 558 | string custom_code_table_string_; |
| 559 | |
| 560 | // If a custom code table is specified, it will be expressed |
| 561 | // as an embedded VCDIFF delta file which uses the default code table |
| 562 | // as the source file (dictionary). Use a child decoder object |
| 563 | // to decode that delta file. |
| 564 | std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_; |
| 565 | |
| 566 | // If set, then the decoder is expecting *exactly* this number of |
| 567 | // target bytes to be decoded from one or more delta file windows. |
| 568 | // If this number is exceeded while decoding a window, but was not met |
| 569 | // before starting on that window, an error will be reported. |
| 570 | // If FinishDecoding() is called before this number is met, an error |
| 571 | // will also be reported. This feature is used for decoding the |
| 572 | // embedded code table data within a VCDIFF delta file; we want to |
| 573 | // stop processing the embedded data once the entire code table has |
| 574 | // been decoded, and treat the rest of the available data as part |
| 575 | // of the enclosing delta file. |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 576 | size_t planned_target_file_size_; |
| 577 | |
| 578 | size_t maximum_target_file_size_; |
| 579 | |
| 580 | size_t maximum_target_window_size_; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 581 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 582 | // Contains the sum of the decoded sizes of all target windows seen so far, |
| 583 | // including the expected total size of the current target window in progress |
| 584 | // (even if some of the current target window has not yet been decoded.) |
| 585 | size_t total_of_target_window_sizes_; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 586 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 587 | // Contains the byte position within decoded_target_ of the first data that |
| 588 | // has not yet been output by AppendNewOutputText(). |
| 589 | size_t decoded_target_output_position_; |
| 590 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 591 | // This value is used to ensure the correct order of calls to the interface |
| 592 | // functions, i.e., a single call to StartDecoding(), followed by zero or |
| 593 | // more calls to DecodeChunk(), followed by a single call to |
| 594 | // FinishDecoding(). |
| 595 | bool start_decoding_was_called_; |
| 596 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 597 | // If this value is true then the VCD_TARGET flag can be specified to allow |
| 598 | // the source segment to be chosen from the previously-decoded target data. |
| 599 | // (This is the default behavior.) If it is false, then specifying the |
| 600 | // VCD_TARGET flag is considered an error, and the decoder does not need to |
| 601 | // keep in memory any decoded target data prior to the current window. |
| 602 | bool allow_vcd_target_; |
| 603 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 604 | // Making these private avoids implicit copy constructor & assignment operator |
| 605 | VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT |
| 606 | void operator=(const VCDiffStreamingDecoderImpl&); |
| 607 | }; |
| 608 | |
| 609 | // *** Methods for VCDiffStreamingDecoderImpl |
| 610 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 611 | const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize; |
| 612 | const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes; |
| 613 | |
| 614 | VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() |
| 615 | : maximum_target_file_size_(kDefaultMaximumTargetFileSize), |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 616 | maximum_target_window_size_(kDefaultMaximumTargetFileSize), |
| 617 | allow_vcd_target_(true) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 618 | delta_window_.Init(this); |
| 619 | Reset(); |
| 620 | } |
| 621 | |
| 622 | // Reset() will delete the component objects without reallocating them. |
| 623 | VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); } |
| 624 | |
| 625 | void VCDiffStreamingDecoderImpl::Reset() { |
| 626 | start_decoding_was_called_ = false; |
| 627 | dictionary_ptr_ = NULL; |
| 628 | dictionary_size_ = 0; |
| 629 | vcdiff_version_code_ = '\0'; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 630 | planned_target_file_size_ = kUnlimitedBytes; |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 631 | total_of_target_window_sizes_ = 0; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 632 | addr_cache_.reset(); |
| 633 | custom_code_table_.reset(); |
| 634 | custom_code_table_decoder_.reset(); |
| 635 | delta_window_.Reset(); |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 636 | decoded_target_output_position_ = 0; |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 637 | } |
| 638 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 639 | void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr, |
| 640 | size_t dictionary_size) { |
| 641 | if (start_decoding_was_called_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 642 | VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()" |
| 643 | << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 644 | return; |
| 645 | } |
| 646 | unparsed_bytes_.clear(); |
| 647 | decoded_target_.clear(); // delta_window_.Reset() depends on this |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 648 | Reset(); |
| 649 | dictionary_ptr_ = dictionary_ptr; |
| 650 | dictionary_size_ = dictionary_size; |
| 651 | start_decoding_was_called_ = true; |
| 652 | } |
| 653 | |
| 654 | // Reads the VCDiff delta file header section as described in RFC section 4.1: |
| 655 | // |
| 656 | // Header1 - byte = 0xD6 (ASCII 'V' | 0x80) |
| 657 | // Header2 - byte = 0xC3 (ASCII 'C' | 0x80) |
| 658 | // Header3 - byte = 0xC4 (ASCII 'D' | 0x80) |
| 659 | // Header4 - byte |
| 660 | // Hdr_Indicator - byte |
| 661 | // [Secondary compressor ID] - byte |
| 662 | // [Length of code table data] - integer |
| 663 | // [Code table data] |
| 664 | // |
| 665 | // Initializes the code table and address cache objects. Returns RESULT_ERROR |
| 666 | // if an error occurred, and RESULT_END_OF_DATA if the end of available data was |
| 667 | // reached before the entire header could be read. (The latter may be an error |
| 668 | // condition if there is no more data available.) Otherwise, returns |
| 669 | // RESULT_SUCCESS, and removes the header bytes from the data string. |
| 670 | // |
| 671 | // It's relatively inefficient to expect this function to parse any number of |
| 672 | // input bytes available, down to 1 byte, but it is necessary in case the input |
| 673 | // is not a properly formatted VCDIFF delta file. If the entire input consists |
| 674 | // of two bytes "12", then we should recognize that it does not match the |
| 675 | // initial VCDIFF magic number "VCD" and report an error, rather than waiting |
| 676 | // indefinitely for more input that will never arrive. |
| 677 | // |
| 678 | VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader( |
| 679 | ParseableChunk* data) { |
| 680 | if (FoundFileHeader()) { |
| 681 | return RESULT_SUCCESS; |
| 682 | } |
| 683 | size_t data_size = data->UnparsedSize(); |
| 684 | const DeltaFileHeader* header = |
| 685 | reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData()); |
| 686 | bool wrong_magic_number = false; |
| 687 | switch (data_size) { |
| 688 | // Verify only the bytes that are available. |
| 689 | default: |
| 690 | // Found header contents up to and including VCDIFF version |
| 691 | vcdiff_version_code_ = header->header4; |
| 692 | if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284) |
| 693 | (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 694 | VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 695 | return RESULT_ERROR; |
| 696 | } |
| 697 | // fall through |
| 698 | case 3: |
| 699 | if (header->header3 != 0xC4) { // magic value 'D' | 0x80 |
| 700 | wrong_magic_number = true; |
| 701 | } |
| 702 | // fall through |
| 703 | case 2: |
| 704 | if (header->header2 != 0xC3) { // magic value 'C' | 0x80 |
| 705 | wrong_magic_number = true; |
| 706 | } |
| 707 | // fall through |
| 708 | case 1: |
| 709 | if (header->header1 != 0xD6) { // magic value 'V' | 0x80 |
| 710 | wrong_magic_number = true; |
| 711 | } |
| 712 | // fall through |
| 713 | case 0: |
| 714 | if (wrong_magic_number) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 715 | VCD_ERROR << "Did not find VCDIFF header bytes; " |
| 716 | "input is not a VCDIFF delta file" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 717 | return RESULT_ERROR; |
| 718 | } |
| 719 | if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA; |
| 720 | } |
| 721 | // Secondary compressor not supported. |
| 722 | if (header->hdr_indicator & VCD_DECOMPRESS) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 723 | VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 724 | return RESULT_ERROR; |
| 725 | } |
| 726 | if (header->hdr_indicator & VCD_CODETABLE) { |
| 727 | int bytes_parsed = InitCustomCodeTable( |
| 728 | data->UnparsedData() + sizeof(DeltaFileHeader), |
| 729 | data->End()); |
| 730 | switch (bytes_parsed) { |
| 731 | case RESULT_ERROR: |
| 732 | return RESULT_ERROR; |
| 733 | case RESULT_END_OF_DATA: |
| 734 | return RESULT_END_OF_DATA; |
| 735 | default: |
| 736 | data->Advance(sizeof(DeltaFileHeader) + bytes_parsed); |
| 737 | } |
| 738 | } else { |
| 739 | addr_cache_.reset(new VCDiffAddressCache); |
| 740 | // addr_cache_->Init() will be called |
| 741 | // from VCDiffStreamingDecoderImpl::DecodeChunk() |
| 742 | data->Advance(sizeof(DeltaFileHeader)); |
| 743 | } |
| 744 | return RESULT_SUCCESS; |
| 745 | } |
| 746 | |
| 747 | int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start, |
| 748 | const char* data_end) { |
| 749 | // A custom code table is being specified. Parse the variable-length |
| 750 | // cache sizes and begin parsing the encoded custom code table. |
| 751 | int32_t near_cache_size = 0, same_cache_size = 0; |
| 752 | VCDiffHeaderParser header_parser(data_start, data_end); |
| 753 | if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) { |
| 754 | return header_parser.GetResult(); |
| 755 | } |
| 756 | if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) { |
| 757 | return header_parser.GetResult(); |
| 758 | } |
| 759 | custom_code_table_.reset(new struct VCDiffCodeTableData); |
| 760 | memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData)); |
| 761 | custom_code_table_string_.clear(); |
| 762 | addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size)); |
| 763 | // addr_cache_->Init() will be called |
| 764 | // from VCDiffStreamingDecoderImpl::DecodeChunk() |
| 765 | |
| 766 | // If we reach this point (the start of the custom code table) |
| 767 | // without encountering a RESULT_END_OF_DATA condition, then we won't call |
| 768 | // ReadDeltaFileHeader() again for this delta file. |
| 769 | // |
| 770 | // Instantiate a recursive decoder to interpret the custom code table |
| 771 | // as a VCDIFF encoding of the default code table. |
| 772 | custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl); |
| 773 | custom_code_table_decoder_->StartDecoding( |
| 774 | reinterpret_cast<const char*>( |
| 775 | &VCDiffCodeTableData::kDefaultCodeTableData), |
| 776 | sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 777 | custom_code_table_decoder_->SetPlannedTargetFileSize( |
| 778 | sizeof(*custom_code_table_)); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 779 | return static_cast<int>(header_parser.ParsedSize()); |
| 780 | } |
| 781 | |
| 782 | VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable( |
| 783 | ParseableChunk* data) { |
| 784 | if (!custom_code_table_decoder_.get()) { |
| 785 | return RESULT_SUCCESS; |
| 786 | } |
| 787 | if (!custom_code_table_.get()) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 788 | VCD_DFATAL << "Internal error: custom_code_table_decoder_ is set," |
| 789 | " but custom_code_table_ is NULL" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 790 | return RESULT_ERROR; |
| 791 | } |
| 792 | OutputString<string> output_string(&custom_code_table_string_); |
| 793 | if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(), |
| 794 | data->UnparsedSize(), |
| 795 | &output_string)) { |
| 796 | return RESULT_ERROR; |
| 797 | } |
| 798 | if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) { |
| 799 | // Skip over the consumed data. |
| 800 | data->Finish(); |
| 801 | return RESULT_END_OF_DATA; |
| 802 | } |
| 803 | if (!custom_code_table_decoder_->FinishDecoding()) { |
| 804 | return RESULT_ERROR; |
| 805 | } |
| 806 | if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 807 | VCD_DFATAL << "Decoded custom code table size (" |
| 808 | << custom_code_table_string_.length() |
| 809 | << ") does not match size of a code table (" |
| 810 | << sizeof(*custom_code_table_) << ")" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 811 | return RESULT_ERROR; |
| 812 | } |
| 813 | memcpy(custom_code_table_.get(), |
| 814 | custom_code_table_string_.data(), |
| 815 | sizeof(*custom_code_table_)); |
| 816 | custom_code_table_string_.clear(); |
| 817 | // Skip over the consumed data. |
| 818 | data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize()); |
| 819 | custom_code_table_decoder_.reset(); |
| 820 | delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode()); |
| 821 | return RESULT_SUCCESS; |
| 822 | } |
| 823 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 824 | void VCDiffStreamingDecoderImpl::FlushDecodedTarget( |
| 825 | OutputStringInterface* output_string) { |
| 826 | output_string->append( |
| 827 | decoded_target_.data() + decoded_target_output_position_, |
| 828 | decoded_target_.size() - decoded_target_output_position_); |
| 829 | decoded_target_.clear(); |
| 830 | delta_window_.set_target_window_start_pos(0); |
| 831 | decoded_target_output_position_ = 0; |
| 832 | } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 833 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 834 | void VCDiffStreamingDecoderImpl::AppendNewOutputText( |
| 835 | OutputStringInterface* output_string) { |
| 836 | const size_t bytes_decoded_this_chunk = |
| 837 | decoded_target_.size() - decoded_target_output_position_; |
| 838 | if (bytes_decoded_this_chunk > 0) { |
| 839 | size_t target_bytes_remaining = delta_window_.TargetBytesRemaining(); |
| 840 | if (target_bytes_remaining > 0) { |
| 841 | // The decoder is midway through decoding a target window. Resize |
| 842 | // output_string to match the expected length. The interface guarantees |
| 843 | // not to resize output_string more than once per target window decoded. |
| 844 | output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk |
| 845 | + target_bytes_remaining); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 846 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 847 | output_string->append( |
| 848 | decoded_target_.data() + decoded_target_output_position_, |
| 849 | bytes_decoded_this_chunk); |
| 850 | decoded_target_output_position_ = decoded_target_.size(); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 851 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 852 | } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 853 | |
| 854 | bool VCDiffStreamingDecoderImpl::DecodeChunk( |
| 855 | const char* data, |
| 856 | size_t len, |
| 857 | OutputStringInterface* output_string) { |
| 858 | if (!start_decoding_was_called_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 859 | VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 860 | Reset(); |
| 861 | return false; |
| 862 | } |
| 863 | ParseableChunk parseable_chunk(data, len); |
| 864 | if (!unparsed_bytes_.empty()) { |
| 865 | unparsed_bytes_.append(data, len); |
| 866 | parseable_chunk.SetDataBuffer(unparsed_bytes_.data(), |
| 867 | unparsed_bytes_.size()); |
| 868 | } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 869 | VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk); |
| 870 | if (RESULT_SUCCESS == result) { |
| 871 | result = ReadCustomCodeTable(&parseable_chunk); |
| 872 | } |
| 873 | if (RESULT_SUCCESS == result) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 874 | while (!parseable_chunk.Empty()) { |
| 875 | result = delta_window_.DecodeWindow(&parseable_chunk); |
| 876 | if (RESULT_SUCCESS != result) { |
| 877 | break; |
| 878 | } |
| 879 | if (ReachedPlannedTargetFileSize()) { |
| 880 | // Found exactly the length we expected. Stop decoding. |
| 881 | break; |
| 882 | } |
| 883 | if (!allow_vcd_target()) { |
| 884 | // VCD_TARGET will never be used to reference target data before the |
| 885 | // start of the current window, so flush and clear the contents of |
| 886 | // decoded_target_. |
| 887 | FlushDecodedTarget(output_string); |
| 888 | } |
| 889 | } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 890 | } |
| 891 | if (RESULT_ERROR == result) { |
| 892 | Reset(); // Don't allow further DecodeChunk calls |
| 893 | return false; |
| 894 | } |
| 895 | unparsed_bytes_.assign(parseable_chunk.UnparsedData(), |
| 896 | parseable_chunk.UnparsedSize()); |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 897 | AppendNewOutputText(output_string); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 898 | return true; |
| 899 | } |
| 900 | |
| 901 | // Finishes decoding after all data has been received. Returns true |
| 902 | // if decoding of the entire stream was successful. |
| 903 | bool VCDiffStreamingDecoderImpl::FinishDecoding() { |
| 904 | bool success = true; |
| 905 | if (!start_decoding_was_called_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 906 | VCD_WARNING << "FinishDecoding() called before StartDecoding()," |
| 907 | " or called after DecodeChunk() returned false" |
| 908 | << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 909 | success = false; |
| 910 | } else if (!IsDecodingComplete()) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 911 | VCD_ERROR << "FinishDecoding() called before parsing entire" |
| 912 | " delta file window" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 913 | success = false; |
| 914 | } |
| 915 | // Reset the object state for the next decode operation |
| 916 | Reset(); |
| 917 | return success; |
| 918 | } |
| 919 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 920 | bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits( |
| 921 | size_t window_size) const { |
| 922 | if (window_size > maximum_target_window_size_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 923 | VCD_ERROR << "Length of target window (" << window_size |
| 924 | << ") exceeds limit of " << maximum_target_window_size_ |
| 925 | << " bytes" << VCD_ENDL; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 926 | return true; |
| 927 | } |
| 928 | if (HasPlannedTargetFileSize()) { |
| 929 | // The logical expression to check would be: |
| 930 | // |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 931 | // total_of_target_window_sizes_ + window_size > planned_target_file_size_ |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 932 | // |
| 933 | // but the addition might cause an integer overflow if target_bytes_to_add |
| 934 | // is very large. So it is better to check target_bytes_to_add against |
| 935 | // the remaining planned target bytes. |
| 936 | size_t remaining_planned_target_file_size = |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 937 | planned_target_file_size_ - total_of_target_window_sizes_; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 938 | if (window_size > remaining_planned_target_file_size) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 939 | VCD_ERROR << "Length of target window (" << window_size |
| 940 | << " bytes) plus previous windows (" |
| 941 | << total_of_target_window_sizes_ |
| 942 | << " bytes) would exceed planned size of " |
| 943 | << planned_target_file_size_ << " bytes" << VCD_ENDL; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 944 | return true; |
| 945 | } |
| 946 | } |
| 947 | size_t remaining_maximum_target_bytes = |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 948 | maximum_target_file_size_ - total_of_target_window_sizes_; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 949 | if (window_size > remaining_maximum_target_bytes) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 950 | VCD_ERROR << "Length of target window (" << window_size |
| 951 | << " bytes) plus previous windows (" |
| 952 | << total_of_target_window_sizes_ |
| 953 | << " bytes) would exceed maximum target file size of " |
| 954 | << maximum_target_file_size_ << " bytes" << VCD_ENDL; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 955 | return true; |
| 956 | } |
| 957 | return false; |
| 958 | } |
| 959 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 960 | // *** Methods for VCDiffDeltaFileWindow |
| 961 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 962 | void VCDiffDeltaFileWindow::Reset() { |
| 963 | found_header_ = false; |
| 964 | |
| 965 | // Mark the start of the current target window. |
| 966 | target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U; |
| 967 | target_window_length_ = 0; |
| 968 | |
| 969 | source_segment_ptr_ = NULL; |
| 970 | source_segment_length_ = 0; |
| 971 | |
| 972 | instructions_and_sizes_.Invalidate(); |
| 973 | data_for_add_and_run_.Invalidate(); |
| 974 | addresses_for_copy_.Invalidate(); |
| 975 | |
| 976 | interleaved_bytes_expected_ = 0; |
| 977 | |
| 978 | has_checksum_ = false; |
| 979 | expected_checksum_ = 0; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 980 | } |
| 981 | |
| 982 | VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections( |
| 983 | VCDiffHeaderParser* header_parser) { |
| 984 | size_t add_and_run_data_length = 0; |
| 985 | size_t instructions_and_sizes_length = 0; |
| 986 | size_t addresses_length = 0; |
| 987 | if (!header_parser->ParseSectionLengths(has_checksum_, |
| 988 | &add_and_run_data_length, |
| 989 | &instructions_and_sizes_length, |
| 990 | &addresses_length, |
| 991 | &expected_checksum_)) { |
| 992 | return header_parser->GetResult(); |
| 993 | } |
| 994 | if (parent_->AllowInterleaved() && |
| 995 | (add_and_run_data_length == 0) && |
| 996 | (addresses_length == 0)) { |
| 997 | // The interleaved format is being used. |
| 998 | interleaved_bytes_expected_ = |
| 999 | static_cast<int>(instructions_and_sizes_length); |
| 1000 | UpdateInterleavedSectionPointers(header_parser->UnparsedData(), |
| 1001 | header_parser->End()); |
| 1002 | } else { |
| 1003 | // If interleaved format is not used, then the whole window contents |
| 1004 | // must be available before decoding can begin. If only part of |
| 1005 | // the current window is available, then report end of data |
| 1006 | // and re-parse the whole header when DecodeChunk() is called again. |
| 1007 | if (header_parser->UnparsedSize() < (add_and_run_data_length + |
| 1008 | instructions_and_sizes_length + |
| 1009 | addresses_length)) { |
| 1010 | return RESULT_END_OF_DATA; |
| 1011 | } |
| 1012 | data_for_add_and_run_.Init(header_parser->UnparsedData(), |
| 1013 | add_and_run_data_length); |
| 1014 | instructions_and_sizes_.Init(data_for_add_and_run_.End(), |
| 1015 | instructions_and_sizes_length); |
| 1016 | addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length); |
| 1017 | if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1018 | VCD_ERROR << "The end of the instructions section " |
| 1019 | "does not match the end of the delta window" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1020 | return RESULT_ERROR; |
| 1021 | } |
| 1022 | } |
| 1023 | reader_.Init(instructions_and_sizes_.UnparsedDataAddr(), |
| 1024 | instructions_and_sizes_.End()); |
| 1025 | return RESULT_SUCCESS; |
| 1026 | } |
| 1027 | |
| 1028 | // Here are the elements of the delta window header to be parsed, |
| 1029 | // from section 4 of the RFC: |
| 1030 | // |
| 1031 | // Window1 |
| 1032 | // Win_Indicator - byte |
| 1033 | // [Source segment size] - integer |
| 1034 | // [Source segment position] - integer |
| 1035 | // The delta encoding of the target window |
| 1036 | // Length of the delta encoding - integer |
| 1037 | // The delta encoding |
| 1038 | // Size of the target window - integer |
| 1039 | // Delta_Indicator - byte |
| 1040 | // Length of data for ADDs and RUNs - integer |
| 1041 | // Length of instructions and sizes - integer |
| 1042 | // Length of addresses for COPYs - integer |
| 1043 | // Data section for ADDs and RUNs - array of bytes |
| 1044 | // Instructions and sizes section - array of bytes |
| 1045 | // Addresses section for COPYs - array of bytes |
| 1046 | // |
| 1047 | VCDiffResult VCDiffDeltaFileWindow::ReadHeader( |
| 1048 | ParseableChunk* parseable_chunk) { |
openvcdiff@gmail.com | f1dd933 | 2009-10-09 22:40:32 +0000 | [diff] [blame] | 1049 | std::string* decoded_target = parent_->decoded_target(); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1050 | VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(), |
| 1051 | parseable_chunk->End()); |
| 1052 | size_t source_segment_position = 0; |
| 1053 | unsigned char win_indicator = 0; |
| 1054 | if (!header_parser.ParseWinIndicatorAndSourceSegment( |
| 1055 | parent_->dictionary_size(), |
| 1056 | decoded_target->size(), |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1057 | parent_->allow_vcd_target(), |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1058 | &win_indicator, |
| 1059 | &source_segment_length_, |
| 1060 | &source_segment_position)) { |
| 1061 | return header_parser.GetResult(); |
| 1062 | } |
| 1063 | has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM); |
| 1064 | if (!header_parser.ParseWindowLengths(&target_window_length_)) { |
| 1065 | return header_parser.GetResult(); |
| 1066 | } |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 1067 | if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) { |
| 1068 | // An error has been logged by TargetWindowWouldExceedSizeLimits(). |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1069 | return RESULT_ERROR; |
| 1070 | } |
| 1071 | header_parser.ParseDeltaIndicator(); |
| 1072 | VCDiffResult setup_return_code = SetUpWindowSections(&header_parser); |
| 1073 | if (RESULT_SUCCESS != setup_return_code) { |
| 1074 | return setup_return_code; |
| 1075 | } |
| 1076 | // Reserve enough space in the output string for the current target window. |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1077 | const size_t wanted_capacity = |
| 1078 | target_window_start_pos_ + target_window_length_; |
| 1079 | if (decoded_target->capacity() < wanted_capacity) { |
| 1080 | decoded_target->reserve(wanted_capacity); |
| 1081 | } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1082 | // Get a pointer to the start of the source segment. |
| 1083 | if (win_indicator & VCD_SOURCE) { |
| 1084 | source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position; |
| 1085 | } else if (win_indicator & VCD_TARGET) { |
| 1086 | // This assignment must happen after the reserve(). |
| 1087 | // decoded_target should not be resized again while processing this window, |
| 1088 | // so source_segment_ptr_ should remain valid. |
| 1089 | source_segment_ptr_ = decoded_target->data() + source_segment_position; |
| 1090 | } |
| 1091 | // The whole window header was found and parsed successfully. |
| 1092 | found_header_ = true; |
| 1093 | parseable_chunk->Advance(header_parser.ParsedSize()); |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1094 | parent_->AddToTotalTargetWindowSize(target_window_length_); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1095 | return RESULT_SUCCESS; |
| 1096 | } |
| 1097 | |
| 1098 | void VCDiffDeltaFileWindow::UpdateInstructionPointer( |
| 1099 | ParseableChunk* parseable_chunk) { |
| 1100 | if (IsInterleaved()) { |
| 1101 | size_t bytes_parsed = instructions_and_sizes_.ParsedSize(); |
| 1102 | // Reduce expected instruction segment length by bytes parsed |
| 1103 | interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed); |
| 1104 | parseable_chunk->Advance(bytes_parsed); |
| 1105 | } |
| 1106 | } |
| 1107 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1108 | inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1109 | return parent_->decoded_target()->size() - target_window_start_pos_; |
| 1110 | } |
| 1111 | |
| 1112 | size_t VCDiffDeltaFileWindow::TargetBytesRemaining() { |
| 1113 | if (target_window_length_ == 0) { |
| 1114 | // There is no window being decoded at present |
| 1115 | return 0; |
| 1116 | } else { |
| 1117 | return target_window_length_ - TargetBytesDecoded(); |
| 1118 | } |
| 1119 | } |
| 1120 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1121 | inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1122 | parent_->decoded_target()->append(data, size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1123 | } |
| 1124 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1125 | inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1126 | parent_->decoded_target()->append(size, byte); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1127 | } |
| 1128 | |
| 1129 | VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) { |
| 1130 | if (size > data_for_add_and_run_.UnparsedSize()) { |
| 1131 | return RESULT_END_OF_DATA; |
| 1132 | } |
| 1133 | // Write the next "size" data bytes |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1134 | CopyBytes(data_for_add_and_run_.UnparsedData(), size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1135 | data_for_add_and_run_.Advance(size); |
| 1136 | return RESULT_SUCCESS; |
| 1137 | } |
| 1138 | |
| 1139 | VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) { |
| 1140 | if (data_for_add_and_run_.Empty()) { |
| 1141 | return RESULT_END_OF_DATA; |
| 1142 | } |
| 1143 | // Write "size" copies of the next data byte |
| 1144 | RunByte(*data_for_add_and_run_.UnparsedData(), size); |
| 1145 | data_for_add_and_run_.Advance(1); |
| 1146 | return RESULT_SUCCESS; |
| 1147 | } |
| 1148 | |
| 1149 | VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size, |
| 1150 | unsigned char mode) { |
| 1151 | // Keep track of the number of target bytes decoded as a local variable |
| 1152 | // to avoid recalculating it each time it is needed. |
| 1153 | size_t target_bytes_decoded = TargetBytesDecoded(); |
| 1154 | const VCDAddress here_address = |
| 1155 | static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded); |
| 1156 | const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress( |
| 1157 | here_address, |
| 1158 | mode, |
| 1159 | addresses_for_copy_.UnparsedDataAddr(), |
| 1160 | addresses_for_copy_.End()); |
| 1161 | switch (decoded_address) { |
| 1162 | case RESULT_ERROR: |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1163 | VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1164 | return RESULT_ERROR; |
| 1165 | case RESULT_END_OF_DATA: |
| 1166 | return RESULT_END_OF_DATA; |
| 1167 | default: |
| 1168 | if ((decoded_address < 0) || (decoded_address > here_address)) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1169 | VCD_DFATAL << "Internal error: unexpected address " << decoded_address |
| 1170 | << " returned from DecodeAddress, with here_address = " |
| 1171 | << here_address << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1172 | return RESULT_ERROR; |
| 1173 | } |
| 1174 | break; |
| 1175 | } |
| 1176 | size_t address = static_cast<size_t>(decoded_address); |
| 1177 | if ((address + size) <= source_segment_length_) { |
| 1178 | // Copy all data from source segment |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1179 | CopyBytes(&source_segment_ptr_[address], size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1180 | return RESULT_SUCCESS; |
| 1181 | } |
| 1182 | // Copy some data from target window... |
| 1183 | if (address < source_segment_length_) { |
| 1184 | // ... plus some data from source segment |
| 1185 | const size_t partial_copy_size = source_segment_length_ - address; |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1186 | CopyBytes(&source_segment_ptr_[address], partial_copy_size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1187 | target_bytes_decoded += partial_copy_size; |
| 1188 | address += partial_copy_size; |
| 1189 | size -= partial_copy_size; |
| 1190 | } |
| 1191 | address -= source_segment_length_; |
| 1192 | // address is now based at start of target window |
| 1193 | const char* const target_segment_ptr = parent_->decoded_target()->data() + |
| 1194 | target_window_start_pos_; |
| 1195 | while (size > (target_bytes_decoded - address)) { |
| 1196 | // Recursive copy that extends into the yet-to-be-copied target data |
| 1197 | const size_t partial_copy_size = target_bytes_decoded - address; |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1198 | CopyBytes(&target_segment_ptr[address], partial_copy_size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1199 | target_bytes_decoded += partial_copy_size; |
| 1200 | address += partial_copy_size; |
| 1201 | size -= partial_copy_size; |
| 1202 | } |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1203 | CopyBytes(&target_segment_ptr[address], size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1204 | return RESULT_SUCCESS; |
| 1205 | } |
| 1206 | |
| 1207 | int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) { |
| 1208 | if (IsInterleaved() && (instructions_and_sizes_.UnparsedData() |
| 1209 | != parseable_chunk->UnparsedData())) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1210 | VCD_DFATAL << "Internal error: interleaved format is used, but the" |
| 1211 | " input pointer does not point to the instructions section" |
| 1212 | << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1213 | return RESULT_ERROR; |
| 1214 | } |
| 1215 | while (TargetBytesDecoded() < target_window_length_) { |
| 1216 | int32_t decoded_size = VCD_INSTRUCTION_ERROR; |
| 1217 | unsigned char mode = 0; |
| 1218 | VCDiffInstructionType instruction = |
| 1219 | reader_.GetNextInstruction(&decoded_size, &mode); |
| 1220 | switch (instruction) { |
| 1221 | case VCD_INSTRUCTION_END_OF_DATA: |
| 1222 | UpdateInstructionPointer(parseable_chunk); |
| 1223 | return RESULT_END_OF_DATA; |
| 1224 | case VCD_INSTRUCTION_ERROR: |
| 1225 | return RESULT_ERROR; |
| 1226 | default: |
| 1227 | break; |
| 1228 | } |
| 1229 | const size_t size = static_cast<size_t>(decoded_size); |
| 1230 | // The value of "size" itself could be enormous (say, INT32_MAX) |
| 1231 | // so check it individually against the limit to protect against |
| 1232 | // overflow when adding it to something else. |
| 1233 | if ((size > target_window_length_) || |
| 1234 | ((size + TargetBytesDecoded()) > target_window_length_)) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1235 | VCD_ERROR << VCDiffInstructionName(instruction) |
| 1236 | << " with size " << size |
| 1237 | << " plus existing " << TargetBytesDecoded() |
| 1238 | << " bytes of target data exceeds length of target" |
| 1239 | " window (" << target_window_length_ << " bytes)" |
| 1240 | << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1241 | return RESULT_ERROR; |
| 1242 | } |
| 1243 | VCDiffResult result = RESULT_SUCCESS; |
| 1244 | switch (instruction) { |
| 1245 | case VCD_ADD: |
| 1246 | result = DecodeAdd(size); |
| 1247 | break; |
| 1248 | case VCD_RUN: |
| 1249 | result = DecodeRun(size); |
| 1250 | break; |
| 1251 | case VCD_COPY: |
| 1252 | result = DecodeCopy(size, mode); |
| 1253 | break; |
| 1254 | default: |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1255 | VCD_DFATAL << "Unexpected instruction type " << instruction |
| 1256 | << "in opcode stream" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1257 | return RESULT_ERROR; |
| 1258 | } |
| 1259 | switch (result) { |
| 1260 | case RESULT_END_OF_DATA: |
| 1261 | reader_.UnGetInstruction(); |
| 1262 | UpdateInstructionPointer(parseable_chunk); |
| 1263 | return RESULT_END_OF_DATA; |
| 1264 | case RESULT_ERROR: |
| 1265 | return RESULT_ERROR; |
| 1266 | case RESULT_SUCCESS: |
| 1267 | break; |
| 1268 | } |
| 1269 | } |
| 1270 | if (TargetBytesDecoded() != target_window_length_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1271 | VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded() |
| 1272 | << " bytes) does not match expected size (" |
| 1273 | << target_window_length_ << " bytes)" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1274 | return RESULT_ERROR; |
| 1275 | } |
| 1276 | const char* const target_window_start = |
| 1277 | parent_->decoded_target()->data() + target_window_start_pos_; |
| 1278 | if (has_checksum_ && |
| 1279 | (ComputeAdler32(target_window_start, target_window_length_) |
| 1280 | != expected_checksum_)) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1281 | VCD_ERROR << "Target data does not match checksum; this could mean " |
| 1282 | "that the wrong dictionary was used" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1283 | return RESULT_ERROR; |
| 1284 | } |
| 1285 | if (!instructions_and_sizes_.Empty()) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1286 | VCD_ERROR << "Excess instructions and sizes left over " |
| 1287 | "after decoding target window" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1288 | return RESULT_ERROR; |
| 1289 | } |
| 1290 | if (!IsInterleaved()) { |
| 1291 | // Standard format is being used, with three separate sections for the |
| 1292 | // instructions, data, and addresses. |
| 1293 | if (!data_for_add_and_run_.Empty()) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1294 | VCD_ERROR << "Excess ADD/RUN data left over " |
| 1295 | "after decoding target window" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1296 | return RESULT_ERROR; |
| 1297 | } |
| 1298 | if (!addresses_for_copy_.Empty()) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1299 | VCD_ERROR << "Excess COPY addresses left over " |
| 1300 | "after decoding target window" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1301 | return RESULT_ERROR; |
| 1302 | } |
| 1303 | // Reached the end of the window. Update the ParseableChunk to point to the |
| 1304 | // end of the addresses section, which is the last section in the window. |
| 1305 | parseable_chunk->SetPosition(addresses_for_copy_.End()); |
| 1306 | } else { |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1307 | // Interleaved format is being used. |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1308 | UpdateInstructionPointer(parseable_chunk); |
| 1309 | } |
| 1310 | return RESULT_SUCCESS; |
| 1311 | } |
| 1312 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1313 | VCDiffResult VCDiffDeltaFileWindow::DecodeWindow( |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1314 | ParseableChunk* parseable_chunk) { |
| 1315 | if (!parent_) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1316 | VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() " |
| 1317 | "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1318 | return RESULT_ERROR; |
| 1319 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1320 | if (!found_header_) { |
| 1321 | switch (ReadHeader(parseable_chunk)) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1322 | case RESULT_END_OF_DATA: |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1323 | return RESULT_END_OF_DATA; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1324 | case RESULT_ERROR: |
| 1325 | return RESULT_ERROR; |
| 1326 | default: |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1327 | // Reset address cache between windows (RFC section 5.1) |
| 1328 | if (!parent_->addr_cache()->Init()) { |
| 1329 | VCD_DFATAL << "Error initializing address cache" << VCD_ENDL; |
| 1330 | return RESULT_ERROR; |
| 1331 | } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1332 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1333 | } else { |
| 1334 | // We are resuming a window that was partially decoded before a |
| 1335 | // RESULT_END_OF_DATA was returned. This can only happen on the first |
| 1336 | // loop iteration, and only if the interleaved format is enabled and used. |
| 1337 | if (!IsInterleaved()) { |
| 1338 | VCD_DFATAL << "Internal error: Resumed decoding of a delta file window" |
| 1339 | " when interleaved format is not being used" << VCD_ENDL; |
| 1340 | return RESULT_ERROR; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1341 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1342 | UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(), |
| 1343 | parseable_chunk->End()); |
| 1344 | reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(), |
| 1345 | instructions_and_sizes_.End()); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1346 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 1347 | switch (DecodeBody(parseable_chunk)) { |
| 1348 | case RESULT_END_OF_DATA: |
| 1349 | if (MoreDataExpected()) { |
| 1350 | return RESULT_END_OF_DATA; |
| 1351 | } else { |
| 1352 | VCD_ERROR << "End of data reached while decoding VCDIFF delta file" |
| 1353 | << VCD_ENDL; |
| 1354 | // fall through to RESULT_ERROR case |
| 1355 | } |
| 1356 | case RESULT_ERROR: |
| 1357 | return RESULT_ERROR; |
| 1358 | default: |
| 1359 | break; // DecodeBody succeeded |
| 1360 | } |
| 1361 | // Get ready to read a new delta window |
| 1362 | Reset(); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1363 | return RESULT_SUCCESS; |
| 1364 | } |
| 1365 | |
| 1366 | // *** Methods for VCDiffStreamingDecoder |
| 1367 | |
| 1368 | VCDiffStreamingDecoder::VCDiffStreamingDecoder() |
| 1369 | : impl_(new VCDiffStreamingDecoderImpl) { } |
| 1370 | |
| 1371 | VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; } |
| 1372 | |
| 1373 | void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) { |
| 1374 | impl_->StartDecoding(source, len); |
| 1375 | } |
| 1376 | |
| 1377 | bool VCDiffStreamingDecoder::DecodeChunkToInterface( |
| 1378 | const char* data, |
| 1379 | size_t len, |
| 1380 | OutputStringInterface* output_string) { |
| 1381 | return impl_->DecodeChunk(data, len, output_string); |
| 1382 | } |
| 1383 | |
| 1384 | bool VCDiffStreamingDecoder::FinishDecoding() { |
| 1385 | return impl_->FinishDecoding(); |
| 1386 | } |
| 1387 | |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 1388 | bool VCDiffStreamingDecoder::SetMaximumTargetFileSize( |
| 1389 | size_t new_maximum_target_file_size) { |
| 1390 | return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size); |
| 1391 | } |
| 1392 | |
| 1393 | bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize( |
| 1394 | size_t new_maximum_target_window_size) { |
| 1395 | return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size); |
| 1396 | } |
| 1397 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 1398 | void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) { |
| 1399 | impl_->SetAllowVcdTarget(allow_vcd_target); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1400 | } |
| 1401 | |
| 1402 | bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr, |
| 1403 | size_t dictionary_size, |
| 1404 | const string& encoding, |
| 1405 | OutputStringInterface* target) { |
| 1406 | target->clear(); |
| 1407 | decoder_.StartDecoding(dictionary_ptr, dictionary_size); |
| 1408 | if (!decoder_.DecodeChunkToInterface(encoding.data(), |
| 1409 | encoding.size(), |
| 1410 | target)) { |
| 1411 | return false; |
| 1412 | } |
| 1413 | return decoder_.FinishDecoding(); |
| 1414 | } |
| 1415 | |
| 1416 | } // namespace open_vcdiff |