openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1 | // Copyright 2008 Google Inc. |
| 2 | // Author: Lincoln Smith |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | |
| 16 | #ifndef OPEN_VCDIFF_ENCODETABLE_H_ |
| 17 | #define OPEN_VCDIFF_ENCODETABLE_H_ |
| 18 | |
| 19 | #include <config.h> |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 20 | #include <stddef.h> // size_t |
| 21 | #include <stdint.h> // int32_t |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 22 | #include <string> |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 23 | #include "addrcache.h" |
| 24 | #include "checksum.h" |
| 25 | #include "codetable.h" |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 26 | #include "codetablewriter_interface.h" |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 27 | |
| 28 | namespace open_vcdiff { |
| 29 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 30 | class OutputStringInterface; |
| 31 | class VCDiffInstructionMap; |
| 32 | |
| 33 | // The method calls after construction *must* conform |
| 34 | // to the following pattern: |
| 35 | // {{Add|Copy|Run}* [AddChecksum] Output}* |
| 36 | // |
| 37 | // When Output has been called in this sequence, a complete target window |
| 38 | // (as defined in RFC 3284 section 4.3) will have been appended to |
| 39 | // out (unless no calls to Add, Run, or Copy were made, in which |
| 40 | // case Output will do nothing.) The output will not be available for use |
| 41 | // until after each call to Output(). |
| 42 | // |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 43 | // NOT threadsafe. |
| 44 | // |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 45 | class VCDiffCodeTableWriter : public CodeTableWriterInterface { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 46 | public: |
| 47 | // This constructor uses the default code table. |
| 48 | // If interleaved is true, the encoder writes each delta file window |
| 49 | // by interleaving instructions and sizes with their corresponding |
| 50 | // addresses and data, rather than placing these elements into three |
| 51 | // separate sections. This facilitates providing partially |
| 52 | // decoded results when only a portion of a delta file window |
| 53 | // is received (e.g. when HTTP over TCP is used as the |
| 54 | // transmission protocol.) The interleaved format is |
| 55 | // not consistent with the VCDIFF draft standard. |
| 56 | // |
| 57 | explicit VCDiffCodeTableWriter(bool interleaved); |
| 58 | |
| 59 | // Uses a non-standard code table and non-standard cache sizes. The caller |
| 60 | // must guarantee that code_table_data remains allocated for the lifetime of |
| 61 | // the VCDiffCodeTableWriter object. Note that this is different from how |
| 62 | // VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given |
| 63 | // encoder will use either the default code table or a statically-defined |
| 64 | // non-standard code table, whereas the decoder must have the ability to read |
| 65 | // an arbitrary non-standard code table from a delta file and discard it once |
| 66 | // the file has been decoded. |
| 67 | // |
| 68 | VCDiffCodeTableWriter(bool interleaved, |
| 69 | int near_cache_size, |
| 70 | int same_cache_size, |
| 71 | const VCDiffCodeTableData& code_table_data, |
| 72 | unsigned char max_mode); |
| 73 | |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 74 | virtual ~VCDiffCodeTableWriter(); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 75 | |
| 76 | // Initializes the constructed object for use. |
| 77 | // This method must be called after a VCDiffCodeTableWriter is constructed |
| 78 | // and before any of its other methods can be called. It will return |
| 79 | // false if there was an error initializing the object, or true if it |
| 80 | // was successful. After the object has been initialized and used, |
| 81 | // Init() can be called again to restore the initial state of the object. |
| 82 | // |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 83 | virtual bool Init(size_t dictionary_size); |
| 84 | |
| 85 | // Write the header (as defined in section 4.1 of the RFC) to *out. |
| 86 | // This includes information that can be gathered |
| 87 | // before the first chunk of input is available. |
| 88 | virtual void WriteHeader(OutputStringInterface* out, |
| 89 | VCDiffFormatExtensionFlags format_extensions); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 90 | |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 91 | virtual size_t target_length() const { return target_length_; } |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 92 | |
| 93 | // Encode an ADD opcode with the "size" bytes starting at data |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 94 | virtual void Add(const char* data, size_t size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 95 | |
| 96 | // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes. |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 97 | virtual void Copy(int32_t offset, size_t size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 98 | |
| 99 | // Encode a RUN opcode for "size" copies of the value "byte". |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 100 | virtual void Run(size_t size, unsigned char byte); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 101 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 102 | virtual void AddChecksum(VCDChecksum checksum) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 103 | add_checksum_ = true; |
| 104 | checksum_ = checksum; |
| 105 | } |
| 106 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 107 | // Appends the encoded delta window to the output |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 108 | // string. The output string is not null-terminated and may contain embedded |
| 109 | // '\0' characters. |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 110 | virtual void Output(OutputStringInterface* out); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 111 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 112 | // There should not be any need to output more data |
| 113 | // since EncodeChunk() encodes a complete target window |
| 114 | // and there is no end-of-delta-file marker. |
| 115 | virtual void FinishEncoding(OutputStringInterface* /*out*/) {} |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 116 | |
| 117 | private: |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 118 | typedef std::string string; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 119 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 120 | // The maximum value for the mode of a COPY instruction. |
| 121 | const unsigned char max_mode_; |
| 122 | |
| 123 | // If interleaved is true, sets data_for_add_and_run_ and |
| 124 | // addresses_for_copy_ to point at instructions_and_sizes_, |
| 125 | // so that instructions, sizes, addresses and data will be |
| 126 | // combined into a single interleaved stream. |
| 127 | // If interleaved is false, sets data_for_add_and_run_ and |
| 128 | // addresses_for_copy_ to point at their corresponding |
| 129 | // separate_... strings, so that the three sections will |
| 130 | // be generated separately from one another. |
| 131 | // |
| 132 | void InitSectionPointers(bool interleaved); |
| 133 | |
| 134 | // Determines the best opcode to encode an instruction, and appends |
| 135 | // or substitutes that opcode and its size into the |
| 136 | // instructions_and_sizes_ string. |
| 137 | // |
| 138 | void EncodeInstruction(VCDiffInstructionType inst, |
| 139 | size_t size, |
| 140 | unsigned char mode); |
| 141 | |
| 142 | void EncodeInstruction(VCDiffInstructionType inst, size_t size) { |
| 143 | return EncodeInstruction(inst, size, 0); |
| 144 | } |
| 145 | |
| 146 | // Calculates the number of bytes needed to store the given size value as a |
| 147 | // variable-length integer (VarintBE). |
| 148 | static size_t CalculateLengthOfSizeAsVarint(size_t size); |
| 149 | |
| 150 | // Appends the size value to the string as a variable-length integer. |
| 151 | static void AppendSizeToString(size_t size, string* out); |
| 152 | |
| 153 | // Appends the size value to the output string as a variable-length integer. |
| 154 | static void AppendSizeToOutputString(size_t size, OutputStringInterface* out); |
| 155 | |
| 156 | // Calculates the "Length of the delta encoding" field for the delta window |
| 157 | // header, based on the sizes of the sections and of the other header |
| 158 | // elements. |
| 159 | size_t CalculateLengthOfTheDeltaEncoding() const; |
| 160 | |
| 161 | // None of the following 'string' objects are null-terminated. |
| 162 | |
| 163 | // A series of instruction opcodes, each of which may be followed |
| 164 | // by one or two Varint values representing the size parameters |
| 165 | // of the first and second instruction in the opcode. |
| 166 | string instructions_and_sizes_; |
| 167 | |
| 168 | // A series of data arguments (byte values) used for ADD and RUN |
| 169 | // instructions. Depending on whether interleaved output is used |
| 170 | // for streaming or not, the pointer may point to |
| 171 | // separate_data_for_add_and_run_ or to instructions_and_sizes_. |
| 172 | string *data_for_add_and_run_; |
| 173 | string separate_data_for_add_and_run_; |
| 174 | |
| 175 | // A series of Varint addresses used for COPY instructions. |
| 176 | // For the SAME mode, a byte value is stored instead of a Varint. |
| 177 | // Depending on whether interleaved output is used |
| 178 | // for streaming or not, the pointer may point to |
| 179 | // separate_addresses_for_copy_ or to instructions_and_sizes_. |
| 180 | string *addresses_for_copy_; |
| 181 | string separate_addresses_for_copy_; |
| 182 | |
| 183 | VCDiffAddressCache address_cache_; |
| 184 | |
| 185 | size_t dictionary_size_; |
| 186 | |
| 187 | // The number of bytes of target data that has been encoded so far. |
| 188 | // Each time Add(), Copy(), or Run() is called, this will be incremented. |
| 189 | // The target length is used to compute HERE mode addresses |
| 190 | // for COPY instructions, and is also written into the header |
| 191 | // of the delta window when Output() is called. |
| 192 | // |
| 193 | size_t target_length_; |
| 194 | |
| 195 | const VCDiffCodeTableData* code_table_data_; |
| 196 | |
| 197 | // The instruction map facilitates finding an opcode quickly given an |
| 198 | // instruction inst, size, and mode. This is an alternate representation |
| 199 | // of the same information that is found in code_table_data_. |
| 200 | // |
| 201 | const VCDiffInstructionMap* instruction_map_; |
| 202 | |
| 203 | // The zero-based index within instructions_and_sizes_ of the byte |
| 204 | // that contains the last single-instruction opcode generated by |
| 205 | // EncodeInstruction(). (See that function for exhaustive details.) |
| 206 | // It is necessary to use an index rather than a pointer for this value |
| 207 | // because instructions_and_sizes_ may be resized, which would invalidate |
| 208 | // any pointers into its data buffer. The value -1 is reserved to mean that |
| 209 | // either no opcodes have been generated yet, or else the last opcode |
| 210 | // generated was a double-instruction opcode. |
| 211 | // |
| 212 | int last_opcode_index_; |
| 213 | |
| 214 | // If true, an Adler32 checksum of the target window data will be written as |
| 215 | // a variable-length integer, just after the size of the addresses section. |
| 216 | // |
| 217 | bool add_checksum_; |
| 218 | |
| 219 | // The checksum to be written to the current target window, |
| 220 | // if add_checksum_ is true. |
| 221 | // This will not be calculated based on the individual calls to Add(), Run(), |
| 222 | // and Copy(), which would be unnecessarily expensive. Instead, the code |
| 223 | // that uses the VCDiffCodeTableWriter object is expected to calculate |
| 224 | // the checksum all at once and to call AddChecksum() with that value. |
| 225 | // Must be called sometime before calling Output(), though it can be called |
| 226 | // either before or after the calls to Add(), Run(), and Copy(). |
| 227 | // |
| 228 | VCDChecksum checksum_; |
| 229 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 230 | // Making these private avoids implicit copy constructor & assignment operator |
| 231 | VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT |
| 232 | void operator=(const VCDiffCodeTableWriter&); |
| 233 | }; |
| 234 | |
| 235 | }; // namespace open_vcdiff |
| 236 | |
| 237 | #endif // OPEN_VCDIFF_ENCODETABLE_H_ |