openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1 | // Copyright 2008 Google Inc. |
| 2 | // Author: Lincoln Smith |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | |
| 16 | #include <config.h> |
| 17 | #include "google/vcencoder.h" |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 18 | #include <stdlib.h> // free, posix_memalign |
| 19 | #include <string.h> // memcpy |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 20 | #include <algorithm> |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 21 | #include <string> |
| 22 | #include <vector> |
| 23 | #include "blockhash.h" |
| 24 | #include "checksum.h" |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 25 | #include "testing.h" |
| 26 | #include "varint_bigendian.h" |
| 27 | #include "google/vcdecoder.h" |
| 28 | #include "vcdiff_defs.h" |
| 29 | |
| 30 | #ifdef HAVE_EXT_ROPE |
| 31 | #include <ext/rope> |
| 32 | #include "output_string_crope.h" |
| 33 | using __gnu_cxx::crope; |
| 34 | #endif // HAVE_EXT_ROPE |
| 35 | |
| 36 | #ifdef HAVE_MALLOC_H |
| 37 | #include <malloc.h> |
| 38 | #endif // HAVE_MALLOC_H |
| 39 | |
| 40 | #ifdef HAVE_SYS_MMAN_H |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 41 | #if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600 |
| 42 | #undef _XOPEN_SOURCE |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 43 | #define _XOPEN_SOURCE 600 // posix_memalign |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 44 | #endif |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 45 | #include <sys/mman.h> // mprotect |
| 46 | #endif // HAVE_SYS_MMAN_H |
| 47 | |
| 48 | #ifdef HAVE_UNISTD_H |
| 49 | #include <unistd.h> // getpagesize |
| 50 | #endif // HAVE_UNISTD_H |
| 51 | |
| 52 | namespace open_vcdiff { |
| 53 | namespace { |
| 54 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 55 | static const size_t kFileHeaderSize = sizeof(DeltaFileHeader); |
| 56 | |
| 57 | // This is to check the maximum possible encoding size |
| 58 | // if using a single ADD instruction, so assume that the |
| 59 | // dictionary size, the length of the ADD data, the size |
| 60 | // of the target window, and the length of the delta window |
| 61 | // are all two-byte Varints, that is, 128 <= length < 4096. |
| 62 | // This figure includes three extra bytes for a zero-sized |
| 63 | // ADD instruction with a two-byte Varint explicit size. |
| 64 | // Any additional COPY & ADD instructions must reduce |
| 65 | // the length of the encoding from this maximum. |
| 66 | static const size_t kWindowHeaderSize = 21; |
| 67 | |
| 68 | class VerifyEncodedBytesTest : public testing::Test { |
| 69 | public: |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 70 | typedef std::string string; |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 71 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 72 | VerifyEncodedBytesTest() : delta_index_(0) { } |
| 73 | virtual ~VerifyEncodedBytesTest() { } |
| 74 | |
| 75 | void ExpectByte(unsigned char b) { |
| 76 | EXPECT_EQ(b, static_cast<unsigned char>(delta_[delta_index_])); |
| 77 | ++delta_index_; |
| 78 | } |
| 79 | |
| 80 | void ExpectString(const char* s) { |
| 81 | const size_t size = strlen(s); // don't include terminating NULL char |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 82 | EXPECT_EQ(s, string(delta_data() + delta_index_, size)); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 83 | delta_index_ += size; |
| 84 | } |
| 85 | |
| 86 | void ExpectNoMoreBytes() { |
| 87 | EXPECT_EQ(delta_index_, delta_size()); |
| 88 | } |
| 89 | |
| 90 | void ExpectSize(size_t size) { |
| 91 | const char* delta_size_pos = &delta_[delta_index_]; |
| 92 | EXPECT_EQ(size, |
| 93 | static_cast<size_t>( |
| 94 | VarintBE<int32_t>::Parse(delta_data() + delta_size(), |
| 95 | &delta_size_pos))); |
| 96 | delta_index_ = delta_size_pos - delta_data(); |
| 97 | } |
| 98 | |
| 99 | void ExpectChecksum(VCDChecksum checksum) { |
| 100 | const char* delta_checksum_pos = &delta_[delta_index_]; |
| 101 | EXPECT_EQ(checksum, |
| 102 | static_cast<VCDChecksum>( |
| 103 | VarintBE<int64_t>::Parse(delta_data() + delta_size(), |
| 104 | &delta_checksum_pos))); |
| 105 | delta_index_ = delta_checksum_pos - delta_data(); |
| 106 | } |
| 107 | |
| 108 | const string& delta_as_const() const { return delta_; } |
| 109 | string* delta() { return &delta_; } |
| 110 | |
| 111 | const char* delta_data() const { return delta_as_const().data(); } |
| 112 | size_t delta_size() const { return delta_as_const().size(); } |
| 113 | |
| 114 | private: |
| 115 | string delta_; |
| 116 | size_t delta_index_; |
| 117 | }; |
| 118 | |
| 119 | class VCDiffEncoderTest : public VerifyEncodedBytesTest { |
| 120 | protected: |
| 121 | static const char kDictionary[]; |
| 122 | static const char kTarget[]; |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 123 | static const char kJSONDiff[]; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 124 | |
| 125 | VCDiffEncoderTest(); |
| 126 | virtual ~VCDiffEncoderTest() { } |
| 127 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 128 | void TestWithFixedChunkSize(VCDiffStreamingEncoder *encoder, |
| 129 | VCDiffStreamingDecoder *decoder, |
| 130 | size_t chunk_size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 131 | void TestWithEncodedChunkVector(size_t chunk_size); |
| 132 | |
| 133 | HashedDictionary hashed_dictionary_; |
| 134 | VCDiffStreamingEncoder encoder_; |
| 135 | VCDiffStreamingDecoder decoder_; |
| 136 | VCDiffEncoder simple_encoder_; |
| 137 | VCDiffDecoder simple_decoder_; |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 138 | VCDiffStreamingEncoder json_encoder_; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 139 | |
| 140 | string result_target_; |
| 141 | }; |
| 142 | |
| 143 | const char VCDiffEncoderTest::kDictionary[] = |
| 144 | "\"Just the place for a Snark!\" the Bellman cried,\n" |
| 145 | "As he landed his crew with care;\n" |
| 146 | "Supporting each man on the top of the tide\n" |
| 147 | "By a finger entwined in his hair.\n"; |
| 148 | |
| 149 | const char VCDiffEncoderTest::kTarget[] = |
| 150 | "\"Just the place for a Snark! I have said it twice:\n" |
| 151 | "That alone should encourage the crew.\n" |
| 152 | "Just the place for a Snark! I have said it thrice:\n" |
| 153 | "What I tell you three times is true.\"\n"; |
| 154 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 155 | const char VCDiffEncoderTest::kJSONDiff[] = |
| 156 | "[\"\\\"Just the place for a Snark! I have said it twice:\\n" |
| 157 | "That alone should encourage the crew.\\n\"," |
| 158 | "161,44," |
| 159 | "\"hrice:\\nWhat I tell you three times is true.\\\"\\n\",]"; |
| 160 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 161 | VCDiffEncoderTest::VCDiffEncoderTest() |
| 162 | : hashed_dictionary_(kDictionary, sizeof(kDictionary)), |
| 163 | encoder_(&hashed_dictionary_, |
| 164 | VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, |
| 165 | /* look_for_target_matches = */ true), |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 166 | simple_encoder_(kDictionary, sizeof(kDictionary)), |
| 167 | json_encoder_(&hashed_dictionary_, |
| 168 | VCD_FORMAT_JSON, |
| 169 | /* look_for_target_matches = */ true) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 170 | EXPECT_TRUE(hashed_dictionary_.Init()); |
| 171 | } |
| 172 | |
| 173 | TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) { |
| 174 | EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); |
| 175 | } |
| 176 | |
| 177 | TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) { |
| 178 | EXPECT_FALSE(encoder_.FinishEncoding(delta())); |
| 179 | } |
| 180 | |
| 181 | TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) { |
| 182 | HashedDictionary nothing_dictionary("", 0); |
| 183 | EXPECT_TRUE(nothing_dictionary.Init()); |
| 184 | VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary, |
| 185 | VCD_STANDARD_FORMAT, |
| 186 | false); |
| 187 | EXPECT_TRUE(nothing_encoder.StartEncoding(delta())); |
| 188 | EXPECT_TRUE(nothing_encoder.FinishEncoding(delta())); |
| 189 | decoder_.StartDecoding("", 0); |
| 190 | EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), |
| 191 | delta_size(), |
| 192 | &result_target_)); |
| 193 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 194 | EXPECT_TRUE(result_target_.empty()); |
| 195 | } |
| 196 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 197 | TEST_F(VCDiffEncoderTest, EncodeNothingJSON) { |
| 198 | HashedDictionary nothing_dictionary("", 0); |
| 199 | EXPECT_TRUE(nothing_dictionary.Init()); |
| 200 | VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary, |
| 201 | VCD_FORMAT_JSON, |
| 202 | false); |
| 203 | EXPECT_TRUE(nothing_encoder.StartEncoding(delta())); |
| 204 | EXPECT_TRUE(nothing_encoder.FinishEncoding(delta())); |
| 205 | EXPECT_EQ("", delta_as_const()); |
| 206 | } |
| 207 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 208 | // A NULL dictionary pointer is legal as long as the dictionary size is 0. |
| 209 | TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) { |
| 210 | HashedDictionary null_dictionary(NULL, 0); |
| 211 | EXPECT_TRUE(null_dictionary.Init()); |
| 212 | VCDiffStreamingEncoder null_encoder(&null_dictionary, |
| 213 | VCD_STANDARD_FORMAT, |
| 214 | false); |
| 215 | EXPECT_TRUE(null_encoder.StartEncoding(delta())); |
| 216 | EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta())); |
| 217 | EXPECT_TRUE(null_encoder.FinishEncoding(delta())); |
| 218 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 219 | delta_size()); |
| 220 | decoder_.StartDecoding(NULL, 0); |
| 221 | EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), |
| 222 | delta_size(), |
| 223 | &result_target_)); |
| 224 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 225 | EXPECT_EQ(kTarget, result_target_); |
| 226 | } |
| 227 | |
| 228 | TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) { |
| 229 | EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); |
| 230 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 231 | delta_size()); |
| 232 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 233 | sizeof(kDictionary), |
| 234 | delta_as_const(), |
| 235 | &result_target_)); |
| 236 | EXPECT_EQ(kTarget, result_target_); |
| 237 | } |
| 238 | |
| 239 | TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) { |
| 240 | simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED); |
| 241 | EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); |
| 242 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 243 | delta_size()); |
| 244 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 245 | sizeof(kDictionary), |
| 246 | delta_as_const(), |
| 247 | &result_target_)); |
| 248 | EXPECT_EQ(kTarget, result_target_); |
| 249 | } |
| 250 | |
| 251 | TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) { |
| 252 | simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM); |
| 253 | EXPECT_TRUE(simple_encoder_.Encode(kTarget, |
| 254 | strlen(kTarget), |
| 255 | delta())); |
| 256 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 257 | delta_size()); |
| 258 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 259 | sizeof(kDictionary), |
| 260 | delta_as_const(), |
| 261 | &result_target_)); |
| 262 | EXPECT_EQ(kTarget, result_target_); |
| 263 | } |
| 264 | |
| 265 | TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) { |
| 266 | EXPECT_TRUE(encoder_.StartEncoding(delta())); |
| 267 | EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); |
| 268 | EXPECT_TRUE(encoder_.FinishEncoding(delta())); |
| 269 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 270 | delta_size()); |
| 271 | decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); |
| 272 | EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), |
| 273 | delta_size(), |
| 274 | &result_target_)); |
| 275 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 276 | EXPECT_EQ(kTarget, result_target_); |
| 277 | } |
| 278 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 279 | TEST_F(VCDiffEncoderTest, EncodeSimpleJSON) { |
| 280 | EXPECT_TRUE(json_encoder_.StartEncoding(delta())); |
| 281 | EXPECT_TRUE(json_encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); |
| 282 | EXPECT_TRUE(json_encoder_.FinishEncoding(delta())); |
| 283 | EXPECT_EQ(kJSONDiff, delta_as_const()); |
| 284 | } |
| 285 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 286 | TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) { |
| 287 | string delta_start, delta_encode, delta_finish; |
| 288 | EXPECT_TRUE(encoder_.StartEncoding(&delta_start)); |
| 289 | EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode)); |
| 290 | EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish)); |
| 291 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 292 | delta_start.size() + delta_encode.size() + delta_finish.size()); |
| 293 | decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); |
| 294 | EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(), |
| 295 | delta_start.size(), |
| 296 | &result_target_)); |
| 297 | EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(), |
| 298 | delta_encode.size(), |
| 299 | &result_target_)); |
| 300 | EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(), |
| 301 | delta_finish.size(), |
| 302 | &result_target_)); |
| 303 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 304 | EXPECT_EQ(kTarget, result_target_); |
| 305 | } |
| 306 | |
| 307 | #ifdef HAVE_EXT_ROPE |
| 308 | // Test that the crope class can be used in place of a string for encoding |
| 309 | // and decoding. |
| 310 | TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) { |
| 311 | crope delta_crope, result_crope; |
| 312 | EXPECT_TRUE(encoder_.StartEncoding(&delta_crope)); |
| 313 | EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope)); |
| 314 | EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope)); |
| 315 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 316 | delta_crope.size()); |
| 317 | decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); |
| 318 | // crope can't guarantee that its characters are contiguous, so the decoding |
| 319 | // has to be done byte-by-byte. |
| 320 | for (crope::const_iterator it = delta_crope.begin(); |
| 321 | it != delta_crope.end(); it++) { |
| 322 | const char this_char = *it; |
| 323 | EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope)); |
| 324 | } |
| 325 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 326 | crope expected_target(kTarget); |
| 327 | EXPECT_EQ(expected_target, result_crope); |
| 328 | } |
| 329 | #endif // HAVE_EXT_ROPE |
| 330 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 331 | // Test the encoding and decoding with a fixed chunk size. |
| 332 | // If decoder is null, only test the encoding. |
| 333 | void VCDiffEncoderTest::TestWithFixedChunkSize(VCDiffStreamingEncoder *encoder, |
| 334 | VCDiffStreamingDecoder *decoder, |
| 335 | size_t chunk_size) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 336 | delta()->clear(); |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 337 | EXPECT_TRUE(encoder->StartEncoding(delta())); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 338 | for (size_t chunk_start_index = 0; |
| 339 | chunk_start_index < strlen(kTarget); |
| 340 | chunk_start_index += chunk_size) { |
| 341 | size_t this_chunk_size = chunk_size; |
| 342 | const size_t bytes_available = strlen(kTarget) - chunk_start_index; |
| 343 | if (this_chunk_size > bytes_available) { |
| 344 | this_chunk_size = bytes_available; |
| 345 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 346 | EXPECT_TRUE(encoder->EncodeChunk(&kTarget[chunk_start_index], |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 347 | this_chunk_size, |
| 348 | delta())); |
| 349 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 350 | EXPECT_TRUE(encoder->FinishEncoding(delta())); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 351 | const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; |
| 352 | const size_t size_of_windows = |
| 353 | strlen(kTarget) + (kWindowHeaderSize * num_windows); |
| 354 | EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size()); |
| 355 | result_target_.clear(); |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 356 | |
| 357 | if (!decoder) return; |
| 358 | |
| 359 | decoder->StartDecoding(kDictionary, sizeof(kDictionary)); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 360 | for (size_t chunk_start_index = 0; |
| 361 | chunk_start_index < delta_size(); |
| 362 | chunk_start_index += chunk_size) { |
| 363 | size_t this_chunk_size = chunk_size; |
| 364 | const size_t bytes_available = delta_size() - chunk_start_index; |
| 365 | if (this_chunk_size > bytes_available) { |
| 366 | this_chunk_size = bytes_available; |
| 367 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 368 | EXPECT_TRUE(decoder->DecodeChunk(delta_data() + chunk_start_index, |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 369 | this_chunk_size, |
| 370 | &result_target_)); |
| 371 | } |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 372 | EXPECT_TRUE(decoder->FinishDecoding()); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 373 | EXPECT_EQ(kTarget, result_target_); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 374 | } |
| 375 | |
| 376 | TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) { |
| 377 | // These specific chunk sizes have failed in the past |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 378 | TestWithFixedChunkSize(&encoder_, &decoder_, 6); |
| 379 | TestWithFixedChunkSize(&encoder_, &decoder_, 45); |
| 380 | TestWithFixedChunkSize(&encoder_, &decoder_, 60); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 381 | |
| 382 | // Now loop through all possible chunk sizes |
| 383 | for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 384 | TestWithFixedChunkSize(&encoder_, &decoder_, chunk_size); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 385 | } |
| 386 | } |
| 387 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 388 | TEST_F(VCDiffEncoderTest, EncodeFixedChunkSizesJSON) { |
| 389 | // There is no JSON decoder; these diffs are created by hand. |
| 390 | TestWithFixedChunkSize(&json_encoder_, NULL, 6); |
| 391 | EXPECT_EQ("[\"\\\"Just \",\"the pl\",\"ace fo\",\"r a Sn\",\"ark! I\"," |
| 392 | "\" have \",\"said i\",\"t twic\",\"e:\\nTha\",\"t alon\"," |
| 393 | "\"e shou\",\"ld enc\",\"ourage\",\" the c\",\"rew.\\nJ\"," |
| 394 | "\"ust th\",\"e plac\",\"e for \",\"a Snar\",\"k! I h\"," |
| 395 | "\"ave sa\",\"id it \",\"thrice\",\":\\nWhat\",\" I tel\"," |
| 396 | "\"l you \",\"three \",\"times \",\"is tru\",\"e.\\\"\\n\",]", |
| 397 | delta_as_const()); |
| 398 | TestWithFixedChunkSize(&json_encoder_, NULL, 45); |
| 399 | EXPECT_EQ("[\"\\\"Just the place for a Snark! I have said it t\"," |
| 400 | "\"wice:\\nThat alone should encourage the crew.\\nJ\"," |
| 401 | "\"ust the place for a Snark! I have said it thr\",\"ice:\\n" |
| 402 | "What I tell you three times is true.\\\"\\n\",]", |
| 403 | delta_as_const()); |
| 404 | TestWithFixedChunkSize(&json_encoder_, NULL, 60); |
| 405 | EXPECT_EQ("[\"\\\"Just the place for a Snark! I have said it twice:\\n" |
| 406 | "That alon\",\"e should encourage the crew.\\n" |
| 407 | "Just the place for a Snark! I h\",\"ave said it thrice:\\n" |
| 408 | "What I tell you three times is true.\\\"\\n\",]", |
| 409 | delta_as_const()); |
| 410 | } |
| 411 | |
| 412 | |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 413 | // If --allow_vcd_target=false is specified, the decoder will throw away some of |
| 414 | // the internally-stored decoded target beyond the current window. Try |
| 415 | // different numbers of encoded window sizes to make sure that this behavior |
| 416 | // does not affect the results. |
| 417 | TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizesNoVcdTarget) { |
| 418 | decoder_.SetAllowVcdTarget(false); |
| 419 | // Loop through all possible chunk sizes |
| 420 | for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 421 | TestWithFixedChunkSize(&encoder_, &decoder_, chunk_size); |
openvcdiff | baf44ea | 2009-04-09 19:20:49 +0000 | [diff] [blame] | 422 | } |
| 423 | } |
| 424 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 425 | // Splits the text to be encoded into fixed-size chunks. Encodes each |
| 426 | // chunk and puts it into a vector of strings. Then decodes each string |
| 427 | // in the vector and appends the result into result_target_. |
| 428 | void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) { |
| 429 | std::vector<string> encoded_chunks; |
| 430 | string this_encoded_chunk; |
| 431 | size_t total_chunk_size = 0; |
| 432 | EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk)); |
| 433 | encoded_chunks.push_back(this_encoded_chunk); |
| 434 | total_chunk_size += this_encoded_chunk.size(); |
| 435 | for (size_t chunk_start_index = 0; |
| 436 | chunk_start_index < strlen(kTarget); |
| 437 | chunk_start_index += chunk_size) { |
| 438 | size_t this_chunk_size = chunk_size; |
| 439 | const size_t bytes_available = strlen(kTarget) - chunk_start_index; |
| 440 | if (this_chunk_size > bytes_available) { |
| 441 | this_chunk_size = bytes_available; |
| 442 | } |
| 443 | this_encoded_chunk.clear(); |
| 444 | EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index], |
| 445 | this_chunk_size, |
| 446 | &this_encoded_chunk)); |
| 447 | encoded_chunks.push_back(this_encoded_chunk); |
| 448 | total_chunk_size += this_encoded_chunk.size(); |
| 449 | } |
| 450 | this_encoded_chunk.clear(); |
| 451 | EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk)); |
| 452 | encoded_chunks.push_back(this_encoded_chunk); |
| 453 | total_chunk_size += this_encoded_chunk.size(); |
| 454 | const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; |
| 455 | const size_t size_of_windows = |
| 456 | strlen(kTarget) + (kWindowHeaderSize * num_windows); |
| 457 | EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size); |
| 458 | result_target_.clear(); |
| 459 | decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); |
| 460 | for (std::vector<string>::iterator it = encoded_chunks.begin(); |
| 461 | it != encoded_chunks.end(); ++it) { |
| 462 | EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_)); |
| 463 | } |
| 464 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 465 | EXPECT_EQ(kTarget, result_target_); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 466 | } |
| 467 | |
| 468 | TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) { |
| 469 | // Loop through all possible chunk sizes |
| 470 | for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { |
| 471 | TestWithEncodedChunkVector(chunk_size); |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | // Verify that HashedDictionary stores a copy of the dictionary text, |
| 476 | // rather than just storing a pointer to it. If the dictionary buffer |
| 477 | // is overwritten after creating a HashedDictionary from it, it shouldn't |
| 478 | // affect an encoder that uses that HashedDictionary. |
| 479 | TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) { |
| 480 | string dictionary_copy(kDictionary, sizeof(kDictionary)); |
| 481 | HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size()); |
| 482 | EXPECT_TRUE(hd_copy.Init()); |
| 483 | VCDiffStreamingEncoder copy_encoder(&hd_copy, |
| 484 | VCD_FORMAT_INTERLEAVED |
| 485 | | VCD_FORMAT_CHECKSUM, |
| 486 | /* look_for_target_matches = */ true); |
| 487 | // Produce a reference version of the encoded text. |
| 488 | string delta_before; |
| 489 | EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before)); |
| 490 | EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, |
| 491 | strlen(kTarget), |
| 492 | &delta_before)); |
| 493 | EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before)); |
| 494 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 495 | delta_before.size()); |
| 496 | |
| 497 | // Overwrite the dictionary text with all 'Q' characters. |
| 498 | dictionary_copy.replace(0, |
| 499 | dictionary_copy.size(), |
| 500 | dictionary_copy.size(), |
| 501 | 'Q'); |
| 502 | // When the encoder is used on the same target text after overwriting |
| 503 | // the dictionary, it should produce the same encoded output. |
| 504 | string delta_after; |
| 505 | EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after)); |
| 506 | EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after)); |
| 507 | EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after)); |
| 508 | EXPECT_EQ(delta_before, delta_after); |
| 509 | } |
| 510 | |
| 511 | // Binary data test part 1: The dictionary and target data should not |
| 512 | // be treated as NULL-terminated. An embedded NULL should be handled like |
| 513 | // any other byte of data. |
| 514 | TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) { |
| 515 | const char embedded_null_dictionary_text[] = |
| 516 | { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 }; |
| 517 | const char embedded_null_target[] = |
| 518 | { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 }; |
| 519 | CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); |
| 520 | CHECK_EQ(8, sizeof(embedded_null_target)); |
| 521 | HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, |
| 522 | sizeof(embedded_null_dictionary_text)); |
| 523 | EXPECT_TRUE(embedded_null_dictionary.Init()); |
| 524 | VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, |
| 525 | VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, |
| 526 | /* look_for_target_matches = */ true); |
| 527 | EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); |
| 528 | EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, |
| 529 | sizeof(embedded_null_target), |
| 530 | delta())); |
| 531 | EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); |
| 532 | decoder_.StartDecoding(embedded_null_dictionary_text, |
| 533 | sizeof(embedded_null_dictionary_text)); |
| 534 | EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), |
| 535 | delta_size(), |
| 536 | &result_target_)); |
| 537 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 538 | EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); |
| 539 | EXPECT_EQ(string(embedded_null_target, |
| 540 | sizeof(embedded_null_target)), |
| 541 | result_target_); |
| 542 | } |
| 543 | |
| 544 | // Binary data test part 2: An embedded CR or LF should be handled like |
| 545 | // any other byte of data. No text-processing of the data should occur. |
| 546 | TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) { |
| 547 | const char embedded_null_dictionary_text[] = |
| 548 | { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A }; |
| 549 | const char embedded_null_target[] = |
| 550 | { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 }; |
| 551 | CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); |
| 552 | CHECK_EQ(8, sizeof(embedded_null_target)); |
| 553 | HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, |
| 554 | sizeof(embedded_null_dictionary_text)); |
| 555 | EXPECT_TRUE(embedded_null_dictionary.Init()); |
| 556 | VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, |
| 557 | VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, |
| 558 | /* look_for_target_matches = */ true); |
| 559 | EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); |
| 560 | EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, |
| 561 | sizeof(embedded_null_target), |
| 562 | delta())); |
| 563 | EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); |
| 564 | decoder_.StartDecoding(embedded_null_dictionary_text, |
| 565 | sizeof(embedded_null_dictionary_text)); |
| 566 | EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), |
| 567 | delta_size(), |
| 568 | &result_target_)); |
| 569 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 570 | EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); |
| 571 | EXPECT_EQ(string(embedded_null_target, |
| 572 | sizeof(embedded_null_target)), |
| 573 | result_target_); |
| 574 | } |
| 575 | |
| 576 | TEST_F(VCDiffEncoderTest, UsingWideCharacters) { |
| 577 | const wchar_t wchar_dictionary_text[] = |
| 578 | L"\"Just the place for a Snark!\" the Bellman cried,\n" |
| 579 | L"As he landed his crew with care;\n" |
| 580 | L"Supporting each man on the top of the tide\n" |
| 581 | L"By a finger entwined in his hair.\n"; |
| 582 | |
| 583 | const wchar_t wchar_target[] = |
| 584 | L"\"Just the place for a Snark! I have said it twice:\n" |
| 585 | L"That alone should encourage the crew.\n" |
| 586 | L"Just the place for a Snark! I have said it thrice:\n" |
| 587 | L"What I tell you three times is true.\"\n"; |
| 588 | |
| 589 | HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text, |
| 590 | sizeof(wchar_dictionary_text)); |
| 591 | EXPECT_TRUE(wchar_dictionary.Init()); |
| 592 | VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary, |
| 593 | VCD_FORMAT_INTERLEAVED |
| 594 | | VCD_FORMAT_CHECKSUM, |
| 595 | /* look_for_target_matches = */ false); |
| 596 | EXPECT_TRUE(wchar_encoder.StartEncoding(delta())); |
| 597 | EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target, |
| 598 | sizeof(wchar_target), |
| 599 | delta())); |
| 600 | EXPECT_TRUE(wchar_encoder.FinishEncoding(delta())); |
| 601 | decoder_.StartDecoding((const char*) wchar_dictionary_text, |
| 602 | sizeof(wchar_dictionary_text)); |
| 603 | EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), |
| 604 | delta_size(), |
| 605 | &result_target_)); |
| 606 | EXPECT_TRUE(decoder_.FinishDecoding()); |
| 607 | const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data(); |
| 608 | EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar)); |
| 609 | EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar)); |
| 610 | } |
| 611 | |
| 612 | #if defined(HAVE_MPROTECT) && \ |
| 613 | (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)) |
| 614 | // Bug 1220602: Make sure the encoder doesn't read past the end of the input |
| 615 | // buffer. |
| 616 | TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) { |
| 617 | const size_t target_size = strlen(kTarget); |
| 618 | |
| 619 | // Allocate two memory pages. |
| 620 | const int page_size = getpagesize(); |
| 621 | void* two_pages = NULL; |
| 622 | #ifdef HAVE_POSIX_MEMALIGN |
| 623 | posix_memalign(&two_pages, page_size, 2 * page_size); |
| 624 | #else // !HAVE_POSIX_MEMALIGN |
| 625 | two_pages = memalign(page_size, 2 * page_size); |
| 626 | #endif // HAVE_POSIX_MEMALIGN |
| 627 | char* const first_page = reinterpret_cast<char*>(two_pages); |
| 628 | char* const second_page = first_page + page_size; |
| 629 | |
| 630 | // Place the target string at the end of the first page. |
| 631 | char* const target_with_guard = second_page - target_size; |
| 632 | memcpy(target_with_guard, kTarget, target_size); |
| 633 | |
| 634 | // Make the second page unreadable. |
| 635 | mprotect(second_page, page_size, PROT_NONE); |
| 636 | |
| 637 | // Now perform the encode operation, which will cause a segmentation fault |
| 638 | // if it reads past the end of the buffer. |
| 639 | EXPECT_TRUE(encoder_.StartEncoding(delta())); |
| 640 | EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); |
| 641 | EXPECT_TRUE(encoder_.FinishEncoding(delta())); |
| 642 | |
| 643 | // Undo the mprotect. |
| 644 | mprotect(second_page, page_size, PROT_READ|PROT_WRITE); |
| 645 | free(two_pages); |
| 646 | } |
| 647 | |
| 648 | TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) { |
| 649 | const size_t target_size = strlen(kTarget); |
| 650 | |
| 651 | // Allocate two memory pages. |
| 652 | const int page_size = getpagesize(); |
| 653 | void* two_pages = NULL; |
| 654 | #ifdef HAVE_POSIX_MEMALIGN |
| 655 | posix_memalign(&two_pages, page_size, 2 * page_size); |
| 656 | #else // !HAVE_POSIX_MEMALIGN |
| 657 | two_pages = memalign(page_size, 2 * page_size); |
| 658 | #endif // HAVE_POSIX_MEMALIGN |
| 659 | char* const first_page = reinterpret_cast<char*>(two_pages); |
| 660 | char* const second_page = first_page + page_size; |
| 661 | |
| 662 | // Make the first page unreadable. |
| 663 | mprotect(first_page, page_size, PROT_NONE); |
| 664 | |
| 665 | // Place the target string at the beginning of the second page. |
| 666 | char* const target_with_guard = second_page; |
| 667 | memcpy(target_with_guard, kTarget, target_size); |
| 668 | |
| 669 | // Now perform the encode operation, which will cause a segmentation fault |
| 670 | // if it reads past the beginning of the buffer. |
| 671 | EXPECT_TRUE(encoder_.StartEncoding(delta())); |
| 672 | EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); |
| 673 | EXPECT_TRUE(encoder_.FinishEncoding(delta())); |
| 674 | |
| 675 | // Undo the mprotect. |
| 676 | mprotect(first_page, page_size, PROT_READ|PROT_WRITE); |
| 677 | free(two_pages); |
| 678 | } |
| 679 | #endif // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN) |
| 680 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 681 | class VCDiffHTML1Test : public VerifyEncodedBytesTest { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 682 | protected: |
| 683 | static const char kDictionary[]; |
| 684 | static const char kTarget[]; |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 685 | static const char kRedundantTarget[]; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 686 | |
| 687 | VCDiffHTML1Test(); |
| 688 | virtual ~VCDiffHTML1Test() { } |
| 689 | |
| 690 | void SimpleEncode(); |
| 691 | void StreamingEncode(); |
| 692 | |
| 693 | HashedDictionary hashed_dictionary_; |
| 694 | VCDiffStreamingEncoder encoder_; |
| 695 | VCDiffStreamingDecoder decoder_; |
| 696 | VCDiffEncoder simple_encoder_; |
| 697 | VCDiffDecoder simple_decoder_; |
| 698 | |
| 699 | string result_target_; |
| 700 | }; |
| 701 | |
| 702 | const char VCDiffHTML1Test::kDictionary[] = |
| 703 | "<html><font color=red>This part from the dict</font><br>"; |
| 704 | |
| 705 | const char VCDiffHTML1Test::kTarget[] = |
| 706 | "<html><font color=red>This part from the dict</font><br>\n" |
| 707 | "And this part is not...</html>"; |
| 708 | |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 709 | const char VCDiffHTML1Test::kRedundantTarget[] = |
| 710 | "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| 711 | "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| 712 | "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" |
| 713 | "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; // 256 |
| 714 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 715 | VCDiffHTML1Test::VCDiffHTML1Test() |
| 716 | : hashed_dictionary_(kDictionary, sizeof(kDictionary)), |
| 717 | encoder_(&hashed_dictionary_, |
| 718 | VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, |
| 719 | /* look_for_target_matches = */ true), |
| 720 | simple_encoder_(kDictionary, sizeof(kDictionary)) { |
| 721 | EXPECT_TRUE(hashed_dictionary_.Init()); |
| 722 | } |
| 723 | |
| 724 | void VCDiffHTML1Test::SimpleEncode() { |
| 725 | EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); |
| 726 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 727 | delta_size()); |
| 728 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 729 | sizeof(kDictionary), |
| 730 | delta_as_const(), |
| 731 | &result_target_)); |
| 732 | EXPECT_EQ(kTarget, result_target_); |
| 733 | } |
| 734 | |
| 735 | void VCDiffHTML1Test::StreamingEncode() { |
| 736 | EXPECT_TRUE(encoder_.StartEncoding(delta())); |
| 737 | EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); |
| 738 | EXPECT_TRUE(encoder_.FinishEncoding(delta())); |
| 739 | } |
| 740 | |
| 741 | TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) { |
| 742 | SimpleEncode(); |
| 743 | // These values do not depend on the block size used for encoding |
| 744 | ExpectByte(0xD6); // 'V' | 0x80 |
| 745 | ExpectByte(0xC3); // 'C' | 0x80 |
| 746 | ExpectByte(0xC4); // 'D' | 0x80 |
| 747 | ExpectByte(0x00); // Simple encoder never uses interleaved format |
| 748 | ExpectByte(0x00); // Hdr_Indicator |
| 749 | ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) |
| 750 | ExpectByte(sizeof(kDictionary)); // Dictionary length |
| 751 | ExpectByte(0x00); // Source segment position: start of dictionary |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 752 | if (BlockHash::kBlockSize < 16) { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 753 | // A medium block size will catch the "his part " match. |
| 754 | ExpectByte(0x22); // Length of the delta encoding |
| 755 | ExpectSize(strlen(kTarget)); // Size of the target window |
| 756 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 757 | ExpectByte(0x16); // Length of the data section |
| 758 | ExpectByte(0x05); // Length of the instructions section |
| 759 | ExpectByte(0x02); // Length of the address section |
| 760 | // Data section |
| 761 | ExpectString("\nAnd t"); // Data for 1st ADD |
| 762 | ExpectString("is not...</html>"); // Data for 2nd ADD |
| 763 | // Instructions section |
| 764 | ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) |
| 765 | ExpectByte(0x38); // COPY size (56) |
| 766 | ExpectByte(0x07); // ADD size 6 |
| 767 | ExpectByte(0x19); // COPY size 9 mode VCD_SELF |
| 768 | ExpectByte(0x11); // ADD size 16 |
| 769 | // Address section |
| 770 | ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) |
| 771 | ExpectByte(0x17); // COPY address (23) mode VCD_SELF |
| 772 | } else if (BlockHash::kBlockSize <= 56) { |
| 773 | // Any block size up to 56 will catch the matching prefix string. |
| 774 | ExpectByte(0x29); // Length of the delta encoding |
| 775 | ExpectSize(strlen(kTarget)); // Size of the target window |
| 776 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 777 | ExpectByte(0x1F); // Length of the data section |
| 778 | ExpectByte(0x04); // Length of the instructions section |
| 779 | ExpectByte(0x01); // Length of the address section |
| 780 | ExpectString("\nAnd this part is not...</html>"); // Data for ADD |
| 781 | // Instructions section |
| 782 | ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) |
| 783 | ExpectByte(0x38); // COPY size (56) |
| 784 | ExpectByte(0x01); // ADD size 0 |
| 785 | ExpectByte(0x1F); // Size of ADD (31) |
| 786 | // Address section |
| 787 | ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) |
| 788 | } else { |
| 789 | // The matching string is 56 characters long, and the block size is |
| 790 | // 64 or greater, so no match should be found. |
| 791 | ExpectSize(strlen(kTarget) + 7); // Delta encoding len |
| 792 | ExpectSize(strlen(kTarget)); // Size of the target window |
| 793 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 794 | ExpectSize(strlen(kTarget)); // Length of the data section |
| 795 | ExpectByte(0x02); // Length of the instructions section |
| 796 | ExpectByte(0x00); // Length of the address section |
| 797 | // Data section |
| 798 | ExpectString(kTarget); |
| 799 | ExpectByte(0x01); // ADD size 0 |
| 800 | ExpectSize(strlen(kTarget)); |
| 801 | } |
| 802 | ExpectNoMoreBytes(); |
| 803 | } |
| 804 | |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 805 | TEST_F(VCDiffHTML1Test, SimpleEncoderPerformsTargetMatching) { |
| 806 | EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget, |
| 807 | strlen(kRedundantTarget), |
| 808 | delta())); |
| 809 | EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 810 | delta_size()); |
| 811 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 812 | sizeof(kDictionary), |
| 813 | delta_as_const(), |
| 814 | &result_target_)); |
| 815 | EXPECT_EQ(kRedundantTarget, result_target_); |
| 816 | // These values do not depend on the block size used for encoding |
| 817 | ExpectByte(0xD6); // 'V' | 0x80 |
| 818 | ExpectByte(0xC3); // 'C' | 0x80 |
| 819 | ExpectByte(0xC4); // 'D' | 0x80 |
| 820 | ExpectByte(0x00); // Simple encoder never uses interleaved format |
| 821 | ExpectByte(0x00); // Hdr_Indicator |
| 822 | ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) |
| 823 | ExpectByte(sizeof(kDictionary)); // Dictionary length |
| 824 | ExpectByte(0x00); // Source segment position: start of dictionary |
| 825 | ExpectByte(0x0C); // Length of the delta encoding |
| 826 | ExpectSize(strlen(kRedundantTarget)); // Size of the target window |
| 827 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 828 | ExpectByte(0x01); // Length of the data section |
| 829 | ExpectByte(0x04); // Length of the instructions section |
| 830 | ExpectByte(0x01); // Length of the address section |
| 831 | // Data section |
| 832 | ExpectString("A"); // Data for ADD |
| 833 | // Instructions section |
| 834 | ExpectByte(0x02); // ADD size 1 |
| 835 | ExpectByte(0x23); // COPY size 0 mode VCD_HERE |
| 836 | ExpectSize(strlen(kRedundantTarget) - 1); // COPY size 255 |
| 837 | // Address section |
| 838 | ExpectByte(0x01); // COPY address (1) mode VCD_HERE |
| 839 | ExpectNoMoreBytes(); |
| 840 | } |
| 841 | |
| 842 | TEST_F(VCDiffHTML1Test, SimpleEncoderWithoutTargetMatching) { |
| 843 | simple_encoder_.SetTargetMatching(false); |
| 844 | EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget, |
| 845 | strlen(kRedundantTarget), |
| 846 | delta())); |
| 847 | EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 848 | delta_size()); |
| 849 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 850 | sizeof(kDictionary), |
| 851 | delta_as_const(), |
| 852 | &result_target_)); |
| 853 | EXPECT_EQ(kRedundantTarget, result_target_); |
| 854 | // These values do not depend on the block size used for encoding |
| 855 | ExpectByte(0xD6); // 'V' | 0x80 |
| 856 | ExpectByte(0xC3); // 'C' | 0x80 |
| 857 | ExpectByte(0xC4); // 'D' | 0x80 |
| 858 | ExpectByte(0x00); // Simple encoder never uses interleaved format |
| 859 | ExpectByte(0x00); // Hdr_Indicator |
| 860 | ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) |
| 861 | ExpectByte(sizeof(kDictionary)); // Dictionary length |
| 862 | ExpectByte(0x00); // Source segment position: start of dictionary |
| 863 | ExpectSize(strlen(kRedundantTarget) + 0x0A); // Length of the delta encoding |
| 864 | ExpectSize(strlen(kRedundantTarget)); // Size of the target window |
| 865 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 866 | ExpectSize(strlen(kRedundantTarget)); // Length of the data section |
| 867 | ExpectByte(0x03); // Length of the instructions section |
| 868 | ExpectByte(0x00); // Length of the address section |
| 869 | // Data section |
| 870 | ExpectString(kRedundantTarget); // Data for ADD |
| 871 | // Instructions section |
| 872 | ExpectByte(0x01); // ADD size 0 |
| 873 | ExpectSize(strlen(kRedundantTarget)); // ADD size |
| 874 | // Address section empty |
| 875 | ExpectNoMoreBytes(); |
| 876 | } |
| 877 | |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 878 | class VCDiffHTML2Test : public VerifyEncodedBytesTest { |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 879 | protected: |
| 880 | static const char kDictionary[]; |
| 881 | static const char kTarget[]; |
| 882 | |
| 883 | VCDiffHTML2Test(); |
| 884 | virtual ~VCDiffHTML2Test() { } |
| 885 | |
| 886 | void SimpleEncode(); |
| 887 | void StreamingEncode(); |
| 888 | |
| 889 | HashedDictionary hashed_dictionary_; |
| 890 | VCDiffStreamingEncoder encoder_; |
| 891 | VCDiffStreamingDecoder decoder_; |
| 892 | VCDiffEncoder simple_encoder_; |
| 893 | VCDiffDecoder simple_decoder_; |
| 894 | |
| 895 | string result_target_; |
| 896 | }; |
| 897 | |
| 898 | const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test"; |
| 899 | |
| 900 | const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n"; |
| 901 | |
| 902 | VCDiffHTML2Test::VCDiffHTML2Test() |
| 903 | : hashed_dictionary_(kDictionary, sizeof(kDictionary)), |
| 904 | encoder_(&hashed_dictionary_, |
| 905 | VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, |
| 906 | /* look_for_target_matches = */ true), |
| 907 | simple_encoder_(kDictionary, sizeof(kDictionary)) { |
| 908 | EXPECT_TRUE(hashed_dictionary_.Init()); |
| 909 | } |
| 910 | |
| 911 | void VCDiffHTML2Test::SimpleEncode() { |
| 912 | EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); |
| 913 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 914 | delta_size()); |
| 915 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 916 | sizeof(kDictionary), |
| 917 | delta_as_const(), |
| 918 | &result_target_)); |
| 919 | EXPECT_EQ(kTarget, result_target_); |
| 920 | } |
| 921 | |
| 922 | void VCDiffHTML2Test::StreamingEncode() { |
| 923 | EXPECT_TRUE(encoder_.StartEncoding(delta())); |
| 924 | EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); |
| 925 | EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, |
| 926 | delta_size()); |
| 927 | EXPECT_TRUE(simple_decoder_.Decode(kDictionary, |
| 928 | sizeof(kDictionary), |
| 929 | delta_as_const(), |
| 930 | &result_target_)); |
| 931 | EXPECT_EQ(kTarget, result_target_); |
| 932 | } |
| 933 | |
| 934 | TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) { |
| 935 | SimpleEncode(); |
| 936 | // These values do not depend on the block size used for encoding |
| 937 | ExpectByte(0xD6); // 'V' | 0x80 |
| 938 | ExpectByte(0xC3); // 'C' | 0x80 |
| 939 | ExpectByte(0xC4); // 'D' | 0x80 |
| 940 | ExpectByte(0x00); // Simple encoder never uses interleaved format |
| 941 | ExpectByte(0x00); // Hdr_Indicator |
| 942 | ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) |
| 943 | ExpectByte(sizeof(kDictionary)); // Dictionary length |
| 944 | ExpectByte(0x00); // Source segment position: start of dictionary |
| 945 | if (BlockHash::kBlockSize <= 8) { |
| 946 | ExpectByte(12); // Length of the delta encoding |
| 947 | ExpectSize(strlen(kTarget)); // Size of the target window |
| 948 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 949 | ExpectByte(0x04); // Length of the data section |
| 950 | ExpectByte(0x02); // Length of the instructions section |
| 951 | ExpectByte(0x01); // Length of the address section |
| 952 | ExpectByte('!'); |
| 953 | ExpectByte('!'); |
| 954 | ExpectByte('!'); |
| 955 | ExpectByte('\n'); |
| 956 | ExpectByte(0x1E); // COPY size 14 mode VCD_SELF |
| 957 | ExpectByte(0x05); // ADD size 4 |
| 958 | ExpectByte(0x03); // COPY address (3) mode VCD_SELF |
| 959 | } else { |
| 960 | // Larger block sizes will not catch any matches. |
| 961 | ExpectSize(strlen(kTarget) + 7); // Delta encoding len |
| 962 | ExpectSize(strlen(kTarget)); // Size of the target window |
| 963 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 964 | ExpectSize(strlen(kTarget)); // Length of the data section |
| 965 | ExpectByte(0x02); // Length of the instructions section |
| 966 | ExpectByte(0x00); // Length of the address section |
| 967 | // Data section |
| 968 | ExpectString(kTarget); |
| 969 | ExpectByte(0x01); // ADD size 0 |
| 970 | ExpectSize(strlen(kTarget)); |
| 971 | } |
| 972 | ExpectNoMoreBytes(); |
| 973 | } |
| 974 | |
| 975 | TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) { |
| 976 | StreamingEncode(); |
| 977 | const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget)); |
| 978 | CHECK_EQ(5, VarintBE<int64_t>::Length(html2_checksum)); |
| 979 | // These values do not depend on the block size used for encoding |
| 980 | ExpectByte(0xD6); // 'V' | 0x80 |
| 981 | ExpectByte(0xC3); // 'C' | 0x80 |
| 982 | ExpectByte(0xC4); // 'D' | 0x80 |
| 983 | ExpectByte('S'); // Format extensions |
| 984 | ExpectByte(0x00); // Hdr_Indicator |
| 985 | ExpectByte(VCD_SOURCE | VCD_CHECKSUM); // Win_Indicator |
| 986 | ExpectByte(sizeof(kDictionary)); // Dictionary length |
| 987 | ExpectByte(0x00); // Source segment position: start of dictionary |
| 988 | if (BlockHash::kBlockSize <= 8) { |
| 989 | ExpectByte(17); // Length of the delta encoding |
| 990 | ExpectSize(strlen(kTarget)); // Size of the target window |
| 991 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 992 | ExpectByte(0x00); // Length of the data section |
| 993 | ExpectByte(0x07); // Length of the instructions section |
| 994 | ExpectByte(0x00); // Length of the address section |
| 995 | ExpectChecksum(html2_checksum); |
| 996 | ExpectByte(0x1E); // COPY size 14 mode VCD_SELF |
| 997 | ExpectByte(0x03); // COPY address (3) mode VCD_SELF |
| 998 | ExpectByte(0x05); // ADD size 4 |
| 999 | ExpectByte('!'); |
| 1000 | ExpectByte('!'); |
| 1001 | ExpectByte('!'); |
| 1002 | ExpectByte('\n'); |
| 1003 | } else { |
| 1004 | // Larger block sizes will not catch any matches. |
| 1005 | ExpectSize(strlen(kTarget) + 12); // Delta encoding len |
| 1006 | ExpectSize(strlen(kTarget)); // Size of the target window |
| 1007 | ExpectByte(0x00); // Delta_indicator (no compression) |
| 1008 | ExpectByte(0x00); // Length of the data section |
| 1009 | ExpectSize(0x02 + strlen(kTarget)); // Interleaved |
| 1010 | ExpectByte(0x00); // Length of the address section |
| 1011 | ExpectChecksum(html2_checksum); |
| 1012 | // Data section |
| 1013 | ExpectByte(0x01); // ADD size 0 |
| 1014 | ExpectSize(strlen(kTarget)); |
| 1015 | ExpectString(kTarget); |
| 1016 | } |
| 1017 | ExpectNoMoreBytes(); |
| 1018 | } |
| 1019 | |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1020 | } // anonymous namespace |
| 1021 | } // namespace open_vcdiff |