openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1 | // Copyright 2008 Google Inc. |
| 2 | // Author: Lincoln Smith |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | |
| 16 | #ifndef OPEN_VCDIFF_HEADERPARSER_H_ |
| 17 | #define OPEN_VCDIFF_HEADERPARSER_H_ |
| 18 | |
| 19 | #include <config.h> |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 20 | #include <stddef.h> // NULL |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 21 | #include <stdint.h> // int32_t, uint32_t |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 22 | #include "checksum.h" // VCDChecksum |
| 23 | #include "vcdiff_defs.h" // VCDiffResult |
| 24 | |
| 25 | namespace open_vcdiff { |
| 26 | |
| 27 | // This class contains a contiguous memory buffer with start and end pointers, |
| 28 | // as well as a position pointer which shows how much of the buffer has been |
| 29 | // parsed and how much remains. |
| 30 | // |
| 31 | // Because no virtual destructor is defined for ParseableChunk, a pointer to |
| 32 | // a child class of ParseableChunk must be destroyed using its specific type, |
| 33 | // rather than as a ParseableChunk*. |
| 34 | class ParseableChunk { |
| 35 | public: |
| 36 | ParseableChunk(const char* data_start, size_t data_size) { |
| 37 | SetDataBuffer(data_start, data_size); |
| 38 | } |
| 39 | |
| 40 | const char* End() const { return end_; } |
| 41 | |
| 42 | // The number of bytes remaining to be parsed. This is not necessarily the |
| 43 | // same as the initial size of the buffer; it changes with each call to |
| 44 | // Advance(). |
| 45 | size_t UnparsedSize() const { |
| 46 | return end_ - position_; |
| 47 | } |
| 48 | |
| 49 | // The number of bytes that have already been parsed. |
| 50 | size_t ParsedSize() const { |
| 51 | return position_ - start_; |
| 52 | } |
| 53 | |
| 54 | bool Empty() const { return 0 == UnparsedSize(); } |
| 55 | |
| 56 | // The start of the data remaining to be parsed. |
| 57 | const char* UnparsedData() const { return position_; } |
| 58 | |
| 59 | // Returns a pointer to the start of the data remaining to be parsed. |
| 60 | const char** UnparsedDataAddr() { return &position_; } |
| 61 | |
| 62 | // Moves the parsing position forward by number_of_bytes. |
| 63 | void Advance(size_t number_of_bytes); |
| 64 | |
| 65 | // Jumps the parsing position to a new location. |
| 66 | void SetPosition(const char* position); |
| 67 | |
| 68 | // Jumps the parsing position to the end of the data chunk. |
| 69 | void Finish() { |
| 70 | position_ = end_; |
| 71 | } |
| 72 | |
| 73 | // Jumps the parsing position so that there are now number_of_bytes |
| 74 | // bytes left to parse. This number should be smaller than the size of data |
| 75 | // to be parsed before the function was called. |
| 76 | void FinishExcept(size_t number_of_bytes); |
| 77 | |
| 78 | void SetDataBuffer(const char* data_start, size_t data_size) { |
| 79 | start_ = data_start; |
| 80 | end_ = data_start + data_size; |
| 81 | position_ = start_; |
| 82 | } |
| 83 | |
| 84 | private: |
| 85 | const char* start_; |
| 86 | const char* end_; |
| 87 | |
| 88 | // The current parsing position within the data chunk. |
| 89 | // Must always respect start_ <= position_ <= end_. |
| 90 | const char* position_; |
| 91 | |
| 92 | // Making these private avoids implicit copy constructor & assignment operator |
| 93 | ParseableChunk(const ParseableChunk&); |
| 94 | void operator=(const ParseableChunk&); |
| 95 | }; |
| 96 | |
| 97 | // Represents one of the three sections in the delta window, as described in |
| 98 | // RFC section 4.3: |
| 99 | // * Data section for ADDs and RUNs |
| 100 | // * Instructions and sizes section |
| 101 | // * Addresses section for COPYs |
| 102 | // When using the interleaved format, data and addresses are pulled from the |
| 103 | // instructions and sizes section rather than being stored in separate sections. |
| 104 | // For that reason, this class allows one DeltaWindowSection to be based on |
| 105 | // another, such that the same position pointer is shared by both sections; |
| 106 | // i.e., UnparsedDataAddr() returns the same value for both objects. |
| 107 | // To achieve this end, one extra level of indirection (a pointer to a |
| 108 | // ParseableChunk object) is added. |
| 109 | class DeltaWindowSection { |
| 110 | public: |
| 111 | DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { } |
| 112 | |
| 113 | ~DeltaWindowSection() { |
| 114 | FreeChunk(); |
| 115 | } |
| 116 | |
| 117 | void Init(const char* data_start, size_t data_size) { |
| 118 | if (owned_ && parseable_chunk_) { |
| 119 | // Reuse the already-allocated ParseableChunk object. |
| 120 | parseable_chunk_->SetDataBuffer(data_start, data_size); |
| 121 | } else { |
| 122 | parseable_chunk_ = new ParseableChunk(data_start, data_size); |
| 123 | owned_ = true; |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | void Init(DeltaWindowSection* original) { |
| 128 | FreeChunk(); |
| 129 | parseable_chunk_ = original->parseable_chunk_; |
| 130 | owned_ = false; |
| 131 | } |
| 132 | |
| 133 | void Invalidate() { FreeChunk(); } |
| 134 | |
| 135 | bool IsOwned() const { return owned_; } |
| 136 | |
| 137 | // The following functions just pass their arguments to the underlying |
| 138 | // ParseableChunk object. |
| 139 | |
| 140 | const char* End() const { |
| 141 | return parseable_chunk_->End(); |
| 142 | } |
| 143 | |
| 144 | size_t UnparsedSize() const { |
| 145 | return parseable_chunk_->UnparsedSize(); |
| 146 | } |
| 147 | |
| 148 | size_t ParsedSize() const { |
| 149 | return parseable_chunk_->ParsedSize(); |
| 150 | } |
| 151 | |
| 152 | bool Empty() const { |
| 153 | return parseable_chunk_->Empty(); |
| 154 | } |
| 155 | |
| 156 | const char* UnparsedData() const { |
| 157 | return parseable_chunk_->UnparsedData(); |
| 158 | } |
| 159 | |
| 160 | const char** UnparsedDataAddr() { |
| 161 | return parseable_chunk_->UnparsedDataAddr(); |
| 162 | } |
| 163 | |
| 164 | void Advance(size_t number_of_bytes) { |
| 165 | return parseable_chunk_->Advance(number_of_bytes); |
| 166 | } |
| 167 | private: |
| 168 | void FreeChunk() { |
| 169 | if (owned_) { |
| 170 | delete parseable_chunk_; |
| 171 | } |
| 172 | parseable_chunk_ = NULL; |
| 173 | } |
| 174 | |
| 175 | // Will be NULL until Init() has been called. If owned_ is true, this will |
| 176 | // point to a ParseableChunk object that has been allocated with "new" and |
| 177 | // must be deleted by this DeltaWindowSection object. If owned_ is false, |
| 178 | // this points at the parseable_chunk_ owned by a different DeltaWindowSection |
| 179 | // object. In this case, it is important to free the DeltaWindowSection which |
| 180 | // does not own the ParseableChunk before (or simultaneously to) freeing the |
| 181 | // DeltaWindowSection that owns it, or else deleted memory may be accessed. |
| 182 | ParseableChunk* parseable_chunk_; |
| 183 | bool owned_; |
| 184 | |
| 185 | // Making these private avoids implicit copy constructor & assignment operator |
| 186 | DeltaWindowSection(const DeltaWindowSection&); |
| 187 | void operator=(const DeltaWindowSection&); |
| 188 | }; |
| 189 | |
| 190 | // Used to parse the bytes and Varints that make up the delta file header |
| 191 | // or delta window header. |
| 192 | class VCDiffHeaderParser { |
| 193 | public: |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 194 | // header_start should be the start of the header to be parsed; |
| 195 | // data_end is the position just after the last byte of available data |
| 196 | // (which may extend far past the end of the header.) |
| 197 | VCDiffHeaderParser(const char* header_start, const char* data_end); |
| 198 | |
| 199 | // One of these functions should be called for each element of the header. |
| 200 | // variable_description is a description of the value that we are attempting |
| 201 | // to parse, and will only be used to create descriptive error messages. |
| 202 | // If the function returns true, then the element was parsed successfully |
| 203 | // and its value has been placed in *value. If the function returns false, |
| 204 | // then *value is unchanged, and GetResult() can be called to return the |
| 205 | // reason that the element could not be parsed, which will be either |
| 206 | // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end |
| 207 | // was reached before the end of the element to be parsed.) Once one of these |
| 208 | // functions has returned false, further calls to any of the Parse... |
| 209 | // functions will also return false without performing any additional actions. |
| 210 | // Typical usage is as follows: |
| 211 | // int32_t segment_length = 0; |
| 212 | // if (!header_parser.ParseInt32("segment length", &segment_length)) { |
| 213 | // return header_parser.GetResult(); |
| 214 | // } |
| 215 | // |
| 216 | // The following example takes advantage of the fact that calling a Parse... |
| 217 | // function after an error or end-of-data condition is legal and does nothing. |
| 218 | // It can thus parse more than one element in a row and check the status |
| 219 | // afterwards. If the first call to ParseInt32() fails, the second will have |
| 220 | // no effect: |
| 221 | // |
| 222 | // int32_t segment_length = 0, segment_position = 0; |
| 223 | // header_parser.ParseInt32("segment length", &segment_length)); |
| 224 | // header_parser.ParseInt32("segment position", &segment_position)); |
| 225 | // if (RESULT_SUCCESS != header_parser.GetResult()) { |
| 226 | // return header_parser.GetResult(); |
| 227 | // } |
| 228 | // |
| 229 | bool ParseByte(unsigned char* value); |
| 230 | bool ParseInt32(const char* variable_description, int32_t* value); |
| 231 | bool ParseUInt32(const char* variable_description, uint32_t* value); |
| 232 | bool ParseChecksum(const char* variable_description, VCDChecksum* value); |
| 233 | bool ParseSize(const char* variable_description, size_t* value); |
| 234 | |
| 235 | // Parses the first three elements of the delta window header: |
| 236 | // |
| 237 | // Win_Indicator - byte |
| 238 | // [Source segment size] - integer (VarintBE format) |
| 239 | // [Source segment position] - integer (VarintBE format) |
| 240 | // |
| 241 | // Returns true if the values were parsed successfully and the values were |
| 242 | // found to be acceptable. Returns false otherwise, in which case |
| 243 | // GetResult() can be called to return the reason that the two values |
| 244 | // could not be validated. This will be either RESULT_ERROR (an error |
| 245 | // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was |
| 246 | // reached before the end of the values to be parsed.) If return value is |
| 247 | // true, then *win_indicator, *source_segment_length, and |
| 248 | // *source_segment_position are populated with the parsed values. Otherwise, |
| 249 | // the values of these output arguments are undefined. |
| 250 | // |
| 251 | // dictionary_size: The size of the dictionary (source) file. Used to |
| 252 | // validate the limits of source_segment_length and |
| 253 | // source_segment_position if the source segment is taken from the |
| 254 | // dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.) |
| 255 | // decoded_target_size: The size of the target data that has been decoded |
| 256 | // so far, including all target windows. Used to validate the limits of |
| 257 | // source_segment_length and source_segment_position if the source segment |
| 258 | // is taken from the target (i.e., if the parsed *win_indicator equals |
| 259 | // VCD_TARGET.) |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 260 | // allow_vcd_target: If this argument is false, and the parsed *win_indicator |
| 261 | // is VCD_TARGET, then an error is produced; if true, VCD_TARGET is |
| 262 | // allowed. |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 263 | // win_indicator (output): Points to a single unsigned char (not an array) |
| 264 | // that will receive the parsed value of Win_Indicator. |
| 265 | // source_segment_length (output): The parsed length of the source segment. |
| 266 | // source_segment_position (output): The parsed zero-based index in the |
| 267 | // source/target file from which the source segment is to be taken. |
| 268 | bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size, |
| 269 | size_t decoded_target_size, |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 270 | bool allow_vcd_target, |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 271 | unsigned char* win_indicator, |
| 272 | size_t* source_segment_length, |
| 273 | size_t* source_segment_position); |
| 274 | |
| 275 | // Parses the following two elements of the delta window header: |
| 276 | // |
| 277 | // Length of the delta encoding - integer (VarintBE format) |
| 278 | // Size of the target window - integer (VarintBE format) |
| 279 | // |
| 280 | // Return conditions and values are the same as for |
| 281 | // ParseWinIndicatorAndSourceSegment(), above. |
| 282 | // |
| 283 | bool ParseWindowLengths(size_t* target_window_length); |
| 284 | |
| 285 | // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS. |
| 286 | // Returns a pointer to the end of the delta window (which might not point to |
| 287 | // a valid memory location if there is insufficient input data.) |
| 288 | // |
| 289 | const char* EndOfDeltaWindow() const; |
| 290 | |
| 291 | // Parses the following element of the delta window header: |
| 292 | // |
| 293 | // Delta_Indicator - byte |
| 294 | // |
| 295 | // Because none of the bits in Delta_Indicator are used by this implementation |
| 296 | // of VCDIFF, this function does not have an output argument to return the |
| 297 | // value of that field. It may return RESULT_SUCCESS, RESULT_ERROR, or |
| 298 | // RESULT_END_OF_DATA as with the other Parse...() functions. |
| 299 | // |
| 300 | bool ParseDeltaIndicator(); |
| 301 | |
| 302 | // Parses the following 3 elements of the delta window header: |
| 303 | // |
| 304 | // Length of data for ADDs and RUNs - integer (VarintBE format) |
| 305 | // Length of instructions and sizes - integer (VarintBE format) |
| 306 | // Length of addresses for COPYs - integer (VarintBE format) |
| 307 | // |
| 308 | // If has_checksum is true, it also looks for the following element: |
| 309 | // |
| 310 | // Adler32 checksum - unsigned 32-bit integer (VarintBE format) |
| 311 | // |
| 312 | // Return conditions and values are the same as for |
| 313 | // ParseWinIndicatorAndSourceSegment(), above. |
| 314 | // |
| 315 | bool ParseSectionLengths(bool has_checksum, |
| 316 | size_t* add_and_run_data_length, |
| 317 | size_t* instructions_and_sizes_length, |
| 318 | size_t* addresses_length, |
| 319 | VCDChecksum* checksum); |
| 320 | |
| 321 | // If one of the Parse... functions returned false, this function |
| 322 | // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA) |
| 323 | // describing the reason for the most recent parse failure. If none of the |
| 324 | // Parse... functions has returned false, returns RESULT_SUCCESS. |
| 325 | VCDiffResult GetResult() const { |
| 326 | return return_code_; |
| 327 | } |
| 328 | |
| 329 | // The following functions just pass their arguments to the underlying |
| 330 | // ParseableChunk object. |
| 331 | |
| 332 | const char* End() const { |
| 333 | return parseable_chunk_.End(); |
| 334 | } |
| 335 | |
| 336 | size_t UnparsedSize() const { |
| 337 | return parseable_chunk_.UnparsedSize(); |
| 338 | } |
| 339 | |
| 340 | size_t ParsedSize() const { |
| 341 | return parseable_chunk_.ParsedSize(); |
| 342 | } |
| 343 | |
| 344 | const char* UnparsedData() const { |
| 345 | return parseable_chunk_.UnparsedData(); |
| 346 | } |
| 347 | |
| 348 | private: |
| 349 | // Parses two variable-length integers representing the source segment length |
| 350 | // and source segment position (== offset.) Checks whether the source segment |
| 351 | // length and position would cause it to exceed the size of the source file or |
| 352 | // target file. Returns true if the values were parsed successfully and the |
| 353 | // values were found to be acceptable. Returns false otherwise, in which case |
| 354 | // GetResult() can be called to return the reason that the two values could |
| 355 | // not be validated, which will be either RESULT_ERROR (an error occurred and |
| 356 | // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before |
| 357 | // the end of the integers to be parsed.) |
| 358 | // from_size: The requested size of the source segment. |
| 359 | // from_boundary_name: A NULL-terminated string naming the end of the |
| 360 | // source or target file, used in error messages. |
| 361 | // from_name: A NULL-terminated string naming the source or target file, |
| 362 | // also used in error messages. |
| 363 | // source_segment_length (output): The parsed length of the source segment. |
| 364 | // source_segment_position (output): The parsed zero-based index in the |
| 365 | // source/target file from which the source segment is to be taken. |
| 366 | // |
| 367 | bool ParseSourceSegmentLengthAndPosition(size_t from_size, |
| 368 | const char* from_boundary_name, |
| 369 | const char* from_name, |
| 370 | size_t* source_segment_length, |
| 371 | size_t* source_segment_position); |
| 372 | |
| 373 | ParseableChunk parseable_chunk_; |
| 374 | |
| 375 | // Contains the result code of the last Parse...() operation that failed |
| 376 | // (RESULT_ERROR or RESULT_END_OF_DATA). If no Parse...() method has been |
| 377 | // called, or if all calls to Parse...() were successful, then this contains |
| 378 | // RESULT_SUCCESS. |
| 379 | VCDiffResult return_code_; |
| 380 | |
| 381 | // Will be zero until ParseWindowLengths() has been called. After |
| 382 | // ParseWindowLengths() has been called successfully, this contains the |
| 383 | // parsed length of the delta encoding. |
| 384 | size_t delta_encoding_length_; |
| 385 | |
| 386 | // Will be NULL until ParseWindowLengths() has been called. After |
| 387 | // ParseWindowLengths() has been called successfully, this points to the |
| 388 | // beginning of the section of the current window titled "The delta encoding" |
| 389 | // in the RFC, i.e., to the position just after the length of the delta |
| 390 | // encoding. |
| 391 | const char* delta_encoding_start_; |
| 392 | |
| 393 | // Making these private avoids implicit copy constructor & assignment operator |
| 394 | VCDiffHeaderParser(const VCDiffHeaderParser&); |
| 395 | void operator=(const VCDiffHeaderParser&); |
| 396 | }; |
| 397 | |
| 398 | } // namespace open_vcdiff |
| 399 | |
| 400 | #endif // OPEN_VCDIFF_HEADERPARSER_H_ |