openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1 | // Copyright 2007 Google Inc. |
| 2 | // Author: Lincoln Smith |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | // Classes to implement the Address Cache and Address Encoding |
| 17 | // algorithms described in sections 5.1 - 5.4 of RFC 3284 - |
| 18 | // The VCDIFF Generic Differencing and Compression Data Format. |
| 19 | // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html |
| 20 | |
| 21 | #ifndef OPEN_VCDIFF_ADDRCACHE_H_ |
| 22 | #define OPEN_VCDIFF_ADDRCACHE_H_ |
| 23 | |
| 24 | #include <config.h> |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 25 | #include <vector> |
| 26 | #include "vcdiff_defs.h" // VCDAddress |
| 27 | |
| 28 | namespace open_vcdiff { |
| 29 | |
| 30 | // Implements the "same" and "near" caches |
| 31 | // as described in RFC 3284, section 5. The "near" cache allows |
| 32 | // efficient reuse of one of the last four referenced addresses |
| 33 | // plus a small offset, and the "same" cache allows efficient reuse |
| 34 | // of an exact recent address distinguished by its lowest-order bits. |
| 35 | // |
| 36 | // NOT threadsafe. |
| 37 | // |
| 38 | class VCDiffAddressCache { |
| 39 | public: |
| 40 | // The default cache sizes specified in the RFC |
| 41 | static const int kDefaultNearCacheSize = 4; |
| 42 | static const int kDefaultSameCacheSize = 3; |
| 43 | |
| 44 | VCDiffAddressCache(int near_cache_size, int same_cache_size); |
| 45 | |
| 46 | // This version of the constructor uses the default values |
| 47 | // kDefaultNearCacheSize and kDefaultSameCacheSize. |
| 48 | VCDiffAddressCache(); |
| 49 | |
| 50 | // Initializes the object before use. This method must be called after |
| 51 | // constructing a VCDiffAddressCache/ object, before any other method may be |
| 52 | // called. This is because Init() validates near_cache_size_ and |
| 53 | // same_cache_size_ before initializing the same and near caches. After the |
| 54 | // object has been initialized and used, Init() can be called again to reset |
| 55 | // it to its initial state. |
| 56 | // |
| 57 | bool Init(); |
| 58 | |
| 59 | int near_cache_size() const { return near_cache_size_; } |
| 60 | |
| 61 | int same_cache_size() const { return same_cache_size_; } |
| 62 | |
| 63 | // Returns the first mode number that represents one of the NEAR modes. |
| 64 | // The number of NEAR modes is near_cache_size. Each NEAR mode refers to |
| 65 | // an element of the near_addresses_ array, where a recently-referenced |
| 66 | // address is stored. |
| 67 | // |
openvcdiff | 83bbde0 | 2008-10-23 23:43:46 +0000 | [diff] [blame] | 68 | static unsigned char FirstNearMode() { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 69 | return VCD_FIRST_NEAR_MODE; |
| 70 | } |
| 71 | |
| 72 | // Returns the first mode number that represents one of the SAME modes. |
| 73 | // The number of SAME modes is same_cache_size. Each SAME mode refers to |
| 74 | // a block of 256 elements of the same_addresses_ array; the lowest-order |
| 75 | // 8 bits of the address are used to find the element of this block that |
| 76 | // may match the desired address value. |
| 77 | // |
openvcdiff | 83bbde0 | 2008-10-23 23:43:46 +0000 | [diff] [blame] | 78 | unsigned char FirstSameMode() const { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 79 | return VCD_FIRST_NEAR_MODE + near_cache_size(); |
| 80 | } |
| 81 | |
| 82 | // Returns the maximum valid mode number, which happens to be |
| 83 | // the last SAME mode. |
| 84 | // |
openvcdiff | 83bbde0 | 2008-10-23 23:43:46 +0000 | [diff] [blame] | 85 | unsigned char LastMode() const { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 86 | return FirstSameMode() + same_cache_size() - 1; |
| 87 | } |
| 88 | |
openvcdiff | 83bbde0 | 2008-10-23 23:43:46 +0000 | [diff] [blame] | 89 | static unsigned char DefaultLastMode() { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 90 | return VCD_FIRST_NEAR_MODE |
| 91 | + kDefaultNearCacheSize + kDefaultSameCacheSize - 1; |
| 92 | } |
| 93 | |
| 94 | // See the definition of enum VCDiffModes in vcdiff_defs.h, |
| 95 | // as well as section 5.3 of the RFC, for a description of |
| 96 | // each address mode type (SELF, HERE, NEAR, and SAME). |
| 97 | static bool IsSelfMode(unsigned char mode) { |
| 98 | return mode == VCD_SELF_MODE; |
| 99 | } |
| 100 | |
| 101 | static bool IsHereMode(unsigned char mode) { |
| 102 | return mode == VCD_HERE_MODE; |
| 103 | } |
| 104 | |
| 105 | bool IsNearMode(unsigned char mode) const { |
| 106 | return (mode >= FirstNearMode()) && (mode < FirstSameMode()); |
| 107 | } |
| 108 | |
| 109 | bool IsSameMode(unsigned char mode) const { |
| 110 | return (mode >= FirstSameMode()) && (mode <= LastMode()); |
| 111 | } |
| 112 | |
| 113 | static VCDAddress DecodeSelfAddress(int32_t encoded_address) { |
| 114 | return encoded_address; |
| 115 | } |
| 116 | |
| 117 | static VCDAddress DecodeHereAddress(int32_t encoded_address, |
| 118 | VCDAddress here_address) { |
| 119 | return here_address - encoded_address; |
| 120 | } |
| 121 | |
| 122 | VCDAddress DecodeNearAddress(unsigned char mode, |
| 123 | int32_t encoded_address) const { |
| 124 | return NearAddress(mode - FirstNearMode()) + encoded_address; |
| 125 | } |
| 126 | |
| 127 | VCDAddress DecodeSameAddress(unsigned char mode, |
| 128 | unsigned char encoded_address) const { |
| 129 | return SameAddress(((mode - FirstSameMode()) * 256) + encoded_address); |
| 130 | } |
| 131 | |
| 132 | // Returns true if, when using the given mode, an encoded address |
| 133 | // should be written to the delta file as a variable-length integer; |
| 134 | // returns false if the encoded address should be written |
| 135 | // as a byte value (unsigned char). |
| 136 | bool WriteAddressAsVarintForMode(unsigned char mode) const { |
| 137 | return !IsSameMode(mode); |
| 138 | } |
| 139 | |
| 140 | // An accessor for an element of the near_addresses_ array. |
| 141 | // No bounds checking is performed; the caller must ensure that |
| 142 | // Init() has already been called, and that |
| 143 | // 0 <= pos < near_cache_size_ |
| 144 | // |
| 145 | VCDAddress NearAddress(int pos) const { |
| 146 | return near_addresses_[pos]; |
| 147 | } |
| 148 | |
| 149 | // An accessor for an element of the same_addresses_ array. |
| 150 | // No bounds checking is performed; the caller must ensure that |
| 151 | // Init() has already been called, and that |
| 152 | // 0 <= pos < (same_cache_size_ * 256) |
| 153 | // |
| 154 | VCDAddress SameAddress(int pos) const { |
| 155 | return same_addresses_[pos]; |
| 156 | } |
| 157 | |
| 158 | // This method will be called whenever an address is calculated for an |
| 159 | // encoded or decoded COPY instruction, and will update the contents |
| 160 | // of the SAME and NEAR caches. |
| 161 | // |
| 162 | void UpdateCache(VCDAddress address); |
| 163 | |
| 164 | // Determines the address mode that yields the most compact encoding |
| 165 | // of the given address value. The most compact encoding |
| 166 | // is found by looking for the numerically lowest encoded address. |
| 167 | // Sets *encoded_addr to the encoded representation of the address |
| 168 | // and returns the mode used. |
| 169 | // |
| 170 | // The caller should pass the return value to the method |
| 171 | // WriteAddressAsVarintForMode() to determine whether encoded_addr |
| 172 | // should be written to the delta file as a variable-length integer |
| 173 | // or as a byte (unsigned char). |
| 174 | // |
| 175 | unsigned char EncodeAddress(VCDAddress address, |
| 176 | VCDAddress here_address, |
| 177 | VCDAddress* encoded_addr); |
| 178 | |
| 179 | // Interprets the next value in the address_stream using the provided mode, |
| 180 | // which may need to access the SAME or NEAR address cache. Returns the |
| 181 | // decoded address, or one of the following values: |
| 182 | // RESULT_ERROR: An invalid address value was found in address_stream. |
| 183 | // RESULT_END_OF_DATA: The limit address_stream_end was reached before |
| 184 | // the address could be decoded. If more streamed data is expected, |
| 185 | // this means that the consumer should block and wait for more data |
| 186 | // before continuing to decode. If no more data is expected, this |
| 187 | // return value signals an error condition. |
| 188 | // |
| 189 | // If successful, *address_stream will be incremented past the decoded address |
| 190 | // position. If RESULT_ERROR or RESULT_END_OF_DATA is returned, |
| 191 | // then the value of *address_stream will not have changed. |
| 192 | // |
| 193 | VCDAddress DecodeAddress(VCDAddress here_address, |
| 194 | unsigned char mode, |
| 195 | const char** address_stream, |
| 196 | const char* address_stream_end); |
| 197 | |
| 198 | private: |
| 199 | // The number of addresses to be kept in the NEAR cache. |
| 200 | const int near_cache_size_; |
| 201 | // The number of 256-byte blocks to store in the SAME cache. |
| 202 | const int same_cache_size_; |
| 203 | // The next position in the NEAR cache to which an address will be written. |
| 204 | int next_slot_; |
| 205 | // NEAR cache contents |
| 206 | std::vector<VCDAddress> near_addresses_; |
| 207 | // SAME cache contents |
| 208 | std::vector<VCDAddress> same_addresses_; |
| 209 | |
| 210 | // Making these private avoids implicit copy constructor & assignment operator |
| 211 | VCDiffAddressCache(const VCDiffAddressCache&); // NOLINT |
| 212 | void operator=(const VCDiffAddressCache&); |
| 213 | }; |
| 214 | |
| 215 | } // namespace open_vcdiff |
| 216 | |
| 217 | #endif // OPEN_VCDIFF_ADDRCACHE_H_ |