blob: eb0ea8e171ff00b5256d1ffeabe621ae08529d15 [file] [log] [blame]
openvcdiff311c7142008-08-26 19:29:25 +00001// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef OPEN_VCDIFF_HEADERPARSER_H_
17#define OPEN_VCDIFF_HEADERPARSER_H_
18
19#include <config.h>
openvcdiff28db8072008-10-10 23:29:11 +000020#include <stddef.h> // NULL
openvcdiff311c7142008-08-26 19:29:25 +000021#include <stdint.h> // int32_t, uint32_t
openvcdiff311c7142008-08-26 19:29:25 +000022#include "checksum.h" // VCDChecksum
23#include "vcdiff_defs.h" // VCDiffResult
24
25namespace open_vcdiff {
26
27// This class contains a contiguous memory buffer with start and end pointers,
28// as well as a position pointer which shows how much of the buffer has been
29// parsed and how much remains.
30//
31// Because no virtual destructor is defined for ParseableChunk, a pointer to
32// a child class of ParseableChunk must be destroyed using its specific type,
33// rather than as a ParseableChunk*.
34class ParseableChunk {
35 public:
36 ParseableChunk(const char* data_start, size_t data_size) {
37 SetDataBuffer(data_start, data_size);
38 }
39
40 const char* End() const { return end_; }
41
42 // The number of bytes remaining to be parsed. This is not necessarily the
43 // same as the initial size of the buffer; it changes with each call to
44 // Advance().
45 size_t UnparsedSize() const {
46 return end_ - position_;
47 }
48
49 // The number of bytes that have already been parsed.
50 size_t ParsedSize() const {
51 return position_ - start_;
52 }
53
54 bool Empty() const { return 0 == UnparsedSize(); }
55
56 // The start of the data remaining to be parsed.
57 const char* UnparsedData() const { return position_; }
58
59 // Returns a pointer to the start of the data remaining to be parsed.
60 const char** UnparsedDataAddr() { return &position_; }
61
62 // Moves the parsing position forward by number_of_bytes.
63 void Advance(size_t number_of_bytes);
64
65 // Jumps the parsing position to a new location.
66 void SetPosition(const char* position);
67
68 // Jumps the parsing position to the end of the data chunk.
69 void Finish() {
70 position_ = end_;
71 }
72
73 // Jumps the parsing position so that there are now number_of_bytes
74 // bytes left to parse. This number should be smaller than the size of data
75 // to be parsed before the function was called.
76 void FinishExcept(size_t number_of_bytes);
77
78 void SetDataBuffer(const char* data_start, size_t data_size) {
79 start_ = data_start;
80 end_ = data_start + data_size;
81 position_ = start_;
82 }
83
84 private:
85 const char* start_;
86 const char* end_;
87
88 // The current parsing position within the data chunk.
89 // Must always respect start_ <= position_ <= end_.
90 const char* position_;
91
92 // Making these private avoids implicit copy constructor & assignment operator
93 ParseableChunk(const ParseableChunk&);
94 void operator=(const ParseableChunk&);
95};
96
97// Represents one of the three sections in the delta window, as described in
98// RFC section 4.3:
99// * Data section for ADDs and RUNs
100// * Instructions and sizes section
101// * Addresses section for COPYs
102// When using the interleaved format, data and addresses are pulled from the
103// instructions and sizes section rather than being stored in separate sections.
104// For that reason, this class allows one DeltaWindowSection to be based on
105// another, such that the same position pointer is shared by both sections;
106// i.e., UnparsedDataAddr() returns the same value for both objects.
107// To achieve this end, one extra level of indirection (a pointer to a
108// ParseableChunk object) is added.
109class DeltaWindowSection {
110 public:
111 DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { }
112
113 ~DeltaWindowSection() {
114 FreeChunk();
115 }
116
117 void Init(const char* data_start, size_t data_size) {
118 if (owned_ && parseable_chunk_) {
119 // Reuse the already-allocated ParseableChunk object.
120 parseable_chunk_->SetDataBuffer(data_start, data_size);
121 } else {
122 parseable_chunk_ = new ParseableChunk(data_start, data_size);
123 owned_ = true;
124 }
125 }
126
127 void Init(DeltaWindowSection* original) {
128 FreeChunk();
129 parseable_chunk_ = original->parseable_chunk_;
130 owned_ = false;
131 }
132
133 void Invalidate() { FreeChunk(); }
134
135 bool IsOwned() const { return owned_; }
136
137 // The following functions just pass their arguments to the underlying
138 // ParseableChunk object.
139
140 const char* End() const {
141 return parseable_chunk_->End();
142 }
143
144 size_t UnparsedSize() const {
145 return parseable_chunk_->UnparsedSize();
146 }
147
148 size_t ParsedSize() const {
149 return parseable_chunk_->ParsedSize();
150 }
151
152 bool Empty() const {
153 return parseable_chunk_->Empty();
154 }
155
156 const char* UnparsedData() const {
157 return parseable_chunk_->UnparsedData();
158 }
159
160 const char** UnparsedDataAddr() {
161 return parseable_chunk_->UnparsedDataAddr();
162 }
163
164 void Advance(size_t number_of_bytes) {
165 return parseable_chunk_->Advance(number_of_bytes);
166 }
167 private:
168 void FreeChunk() {
169 if (owned_) {
170 delete parseable_chunk_;
171 }
172 parseable_chunk_ = NULL;
173 }
174
175 // Will be NULL until Init() has been called. If owned_ is true, this will
176 // point to a ParseableChunk object that has been allocated with "new" and
177 // must be deleted by this DeltaWindowSection object. If owned_ is false,
178 // this points at the parseable_chunk_ owned by a different DeltaWindowSection
179 // object. In this case, it is important to free the DeltaWindowSection which
180 // does not own the ParseableChunk before (or simultaneously to) freeing the
181 // DeltaWindowSection that owns it, or else deleted memory may be accessed.
182 ParseableChunk* parseable_chunk_;
183 bool owned_;
184
185 // Making these private avoids implicit copy constructor & assignment operator
186 DeltaWindowSection(const DeltaWindowSection&);
187 void operator=(const DeltaWindowSection&);
188};
189
190// Used to parse the bytes and Varints that make up the delta file header
191// or delta window header.
192class VCDiffHeaderParser {
193 public:
openvcdiff311c7142008-08-26 19:29:25 +0000194 // header_start should be the start of the header to be parsed;
195 // data_end is the position just after the last byte of available data
196 // (which may extend far past the end of the header.)
197 VCDiffHeaderParser(const char* header_start, const char* data_end);
198
199 // One of these functions should be called for each element of the header.
200 // variable_description is a description of the value that we are attempting
201 // to parse, and will only be used to create descriptive error messages.
202 // If the function returns true, then the element was parsed successfully
203 // and its value has been placed in *value. If the function returns false,
204 // then *value is unchanged, and GetResult() can be called to return the
205 // reason that the element could not be parsed, which will be either
206 // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end
207 // was reached before the end of the element to be parsed.) Once one of these
208 // functions has returned false, further calls to any of the Parse...
209 // functions will also return false without performing any additional actions.
210 // Typical usage is as follows:
211 // int32_t segment_length = 0;
212 // if (!header_parser.ParseInt32("segment length", &segment_length)) {
213 // return header_parser.GetResult();
214 // }
215 //
216 // The following example takes advantage of the fact that calling a Parse...
217 // function after an error or end-of-data condition is legal and does nothing.
218 // It can thus parse more than one element in a row and check the status
219 // afterwards. If the first call to ParseInt32() fails, the second will have
220 // no effect:
221 //
222 // int32_t segment_length = 0, segment_position = 0;
223 // header_parser.ParseInt32("segment length", &segment_length));
224 // header_parser.ParseInt32("segment position", &segment_position));
225 // if (RESULT_SUCCESS != header_parser.GetResult()) {
226 // return header_parser.GetResult();
227 // }
228 //
229 bool ParseByte(unsigned char* value);
230 bool ParseInt32(const char* variable_description, int32_t* value);
231 bool ParseUInt32(const char* variable_description, uint32_t* value);
232 bool ParseChecksum(const char* variable_description, VCDChecksum* value);
233 bool ParseSize(const char* variable_description, size_t* value);
234
235 // Parses the first three elements of the delta window header:
236 //
237 // Win_Indicator - byte
238 // [Source segment size] - integer (VarintBE format)
239 // [Source segment position] - integer (VarintBE format)
240 //
241 // Returns true if the values were parsed successfully and the values were
242 // found to be acceptable. Returns false otherwise, in which case
243 // GetResult() can be called to return the reason that the two values
244 // could not be validated. This will be either RESULT_ERROR (an error
245 // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was
246 // reached before the end of the values to be parsed.) If return value is
247 // true, then *win_indicator, *source_segment_length, and
248 // *source_segment_position are populated with the parsed values. Otherwise,
249 // the values of these output arguments are undefined.
250 //
251 // dictionary_size: The size of the dictionary (source) file. Used to
252 // validate the limits of source_segment_length and
253 // source_segment_position if the source segment is taken from the
254 // dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.)
255 // decoded_target_size: The size of the target data that has been decoded
256 // so far, including all target windows. Used to validate the limits of
257 // source_segment_length and source_segment_position if the source segment
258 // is taken from the target (i.e., if the parsed *win_indicator equals
259 // VCD_TARGET.)
openvcdiffbaf44ea2009-04-09 19:20:49 +0000260 // allow_vcd_target: If this argument is false, and the parsed *win_indicator
261 // is VCD_TARGET, then an error is produced; if true, VCD_TARGET is
262 // allowed.
openvcdiff311c7142008-08-26 19:29:25 +0000263 // win_indicator (output): Points to a single unsigned char (not an array)
264 // that will receive the parsed value of Win_Indicator.
265 // source_segment_length (output): The parsed length of the source segment.
266 // source_segment_position (output): The parsed zero-based index in the
267 // source/target file from which the source segment is to be taken.
268 bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size,
269 size_t decoded_target_size,
openvcdiffbaf44ea2009-04-09 19:20:49 +0000270 bool allow_vcd_target,
openvcdiff311c7142008-08-26 19:29:25 +0000271 unsigned char* win_indicator,
272 size_t* source_segment_length,
273 size_t* source_segment_position);
274
275 // Parses the following two elements of the delta window header:
276 //
277 // Length of the delta encoding - integer (VarintBE format)
278 // Size of the target window - integer (VarintBE format)
279 //
280 // Return conditions and values are the same as for
281 // ParseWinIndicatorAndSourceSegment(), above.
282 //
283 bool ParseWindowLengths(size_t* target_window_length);
284
285 // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS.
286 // Returns a pointer to the end of the delta window (which might not point to
287 // a valid memory location if there is insufficient input data.)
288 //
289 const char* EndOfDeltaWindow() const;
290
291 // Parses the following element of the delta window header:
292 //
293 // Delta_Indicator - byte
294 //
295 // Because none of the bits in Delta_Indicator are used by this implementation
296 // of VCDIFF, this function does not have an output argument to return the
297 // value of that field. It may return RESULT_SUCCESS, RESULT_ERROR, or
298 // RESULT_END_OF_DATA as with the other Parse...() functions.
299 //
300 bool ParseDeltaIndicator();
301
302 // Parses the following 3 elements of the delta window header:
303 //
304 // Length of data for ADDs and RUNs - integer (VarintBE format)
305 // Length of instructions and sizes - integer (VarintBE format)
306 // Length of addresses for COPYs - integer (VarintBE format)
307 //
308 // If has_checksum is true, it also looks for the following element:
309 //
310 // Adler32 checksum - unsigned 32-bit integer (VarintBE format)
311 //
312 // Return conditions and values are the same as for
313 // ParseWinIndicatorAndSourceSegment(), above.
314 //
315 bool ParseSectionLengths(bool has_checksum,
316 size_t* add_and_run_data_length,
317 size_t* instructions_and_sizes_length,
318 size_t* addresses_length,
319 VCDChecksum* checksum);
320
321 // If one of the Parse... functions returned false, this function
322 // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA)
323 // describing the reason for the most recent parse failure. If none of the
324 // Parse... functions has returned false, returns RESULT_SUCCESS.
325 VCDiffResult GetResult() const {
326 return return_code_;
327 }
328
329 // The following functions just pass their arguments to the underlying
330 // ParseableChunk object.
331
332 const char* End() const {
333 return parseable_chunk_.End();
334 }
335
336 size_t UnparsedSize() const {
337 return parseable_chunk_.UnparsedSize();
338 }
339
340 size_t ParsedSize() const {
341 return parseable_chunk_.ParsedSize();
342 }
343
344 const char* UnparsedData() const {
345 return parseable_chunk_.UnparsedData();
346 }
347
348 private:
349 // Parses two variable-length integers representing the source segment length
350 // and source segment position (== offset.) Checks whether the source segment
351 // length and position would cause it to exceed the size of the source file or
352 // target file. Returns true if the values were parsed successfully and the
353 // values were found to be acceptable. Returns false otherwise, in which case
354 // GetResult() can be called to return the reason that the two values could
355 // not be validated, which will be either RESULT_ERROR (an error occurred and
356 // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before
357 // the end of the integers to be parsed.)
358 // from_size: The requested size of the source segment.
359 // from_boundary_name: A NULL-terminated string naming the end of the
360 // source or target file, used in error messages.
361 // from_name: A NULL-terminated string naming the source or target file,
362 // also used in error messages.
363 // source_segment_length (output): The parsed length of the source segment.
364 // source_segment_position (output): The parsed zero-based index in the
365 // source/target file from which the source segment is to be taken.
366 //
367 bool ParseSourceSegmentLengthAndPosition(size_t from_size,
368 const char* from_boundary_name,
369 const char* from_name,
370 size_t* source_segment_length,
371 size_t* source_segment_position);
372
373 ParseableChunk parseable_chunk_;
374
375 // Contains the result code of the last Parse...() operation that failed
376 // (RESULT_ERROR or RESULT_END_OF_DATA). If no Parse...() method has been
377 // called, or if all calls to Parse...() were successful, then this contains
378 // RESULT_SUCCESS.
379 VCDiffResult return_code_;
380
381 // Will be zero until ParseWindowLengths() has been called. After
382 // ParseWindowLengths() has been called successfully, this contains the
383 // parsed length of the delta encoding.
384 size_t delta_encoding_length_;
385
386 // Will be NULL until ParseWindowLengths() has been called. After
387 // ParseWindowLengths() has been called successfully, this points to the
388 // beginning of the section of the current window titled "The delta encoding"
389 // in the RFC, i.e., to the position just after the length of the delta
390 // encoding.
391 const char* delta_encoding_start_;
392
393 // Making these private avoids implicit copy constructor & assignment operator
394 VCDiffHeaderParser(const VCDiffHeaderParser&);
395 void operator=(const VCDiffHeaderParser&);
396};
397
398} // namespace open_vcdiff
399
400#endif // OPEN_VCDIFF_HEADERPARSER_H_