openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 1 | // Copyright 2008 Google Inc. |
| 2 | // Author: Lincoln Smith |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | |
| 16 | #include <config.h> |
| 17 | #include "blockhash.h" |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 18 | #include <limits.h> // INT_MIN |
| 19 | #include <string.h> // memcpy, memcmp, strlen |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 20 | #include <iostream> |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 21 | #include <memory> // auto_ptr |
| 22 | #include "encodetable.h" |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 23 | #include "rolling_hash.h" |
| 24 | #include "testing.h" |
| 25 | |
| 26 | namespace open_vcdiff { |
| 27 | |
| 28 | const int kBlockSize = BlockHash::kBlockSize; |
| 29 | |
| 30 | class BlockHashTest : public testing::Test { |
| 31 | protected: |
| 32 | static const int kTimingTestSize = 1 << 21; // 2M |
| 33 | static const int kTimingTestIterations = 32; |
| 34 | |
| 35 | BlockHashTest() { |
| 36 | dh_.reset(BlockHash::CreateDictionaryHash(sample_text, |
| 37 | strlen(sample_text))); |
| 38 | th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0)); |
| 39 | EXPECT_TRUE(dh_.get() != NULL); |
| 40 | EXPECT_TRUE(th_.get() != NULL); |
| 41 | } |
| 42 | |
| 43 | // BlockHashTest is a friend to BlockHash. Expose the protected functions |
| 44 | // that will be tested by the children of BlockHashTest. |
| 45 | static bool BlockContentsMatch(const char* block1, const char* block2) { |
| 46 | return BlockHash::BlockContentsMatch(block1, block2); |
| 47 | } |
| 48 | |
| 49 | int FirstMatchingBlock(const BlockHash& block_hash, |
| 50 | uint32_t hash_value, |
| 51 | const char* block_ptr) const { |
| 52 | return block_hash.FirstMatchingBlock(hash_value, block_ptr); |
| 53 | } |
| 54 | |
| 55 | int NextMatchingBlock(const BlockHash& block_hash, |
| 56 | int block_number, |
| 57 | const char* block_ptr) const { |
| 58 | return block_hash.NextMatchingBlock(block_number, block_ptr); |
| 59 | } |
| 60 | |
| 61 | static int MatchingBytesToLeft(const char* source_match_start, |
| 62 | const char* target_match_start, |
| 63 | int max_bytes) { |
| 64 | return BlockHash::MatchingBytesToLeft(source_match_start, |
| 65 | target_match_start, |
| 66 | max_bytes); |
| 67 | } |
| 68 | |
| 69 | static int MatchingBytesToRight(const char* source_match_end, |
| 70 | const char* target_match_end, |
| 71 | int max_bytes) { |
| 72 | return BlockHash::MatchingBytesToRight(source_match_end, |
| 73 | target_match_end, |
| 74 | max_bytes); |
| 75 | } |
| 76 | |
| 77 | static int StringLengthAsInt(const char* s) { |
| 78 | return static_cast<int>(strlen(s)); |
| 79 | } |
| 80 | |
| 81 | void InitBlocksToDifferAtNthByte(int n) { |
| 82 | CHECK(n < kBlockSize); |
| 83 | memset(compare_buffer_1_, 0xBE, kTimingTestSize); |
| 84 | memset(compare_buffer_2_, 0xBE, kTimingTestSize); |
| 85 | for (int index = n; index < kTimingTestSize; index += kBlockSize) { |
| 86 | compare_buffer_1_[index] = 0x00; |
| 87 | compare_buffer_2_[index] = 0x01; |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | void TestAndPrintTimesForCompareFunctions(bool should_be_identical); |
| 92 | |
| 93 | void TimingTestForBlocksThatDifferAtByte(int n) { |
| 94 | InitBlocksToDifferAtNthByte(n); |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 95 | std::cout << "Comparing blocks that differ at byte " << n << std::endl; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 96 | TestAndPrintTimesForCompareFunctions(false); |
| 97 | } |
| 98 | |
| 99 | // Copy sample_text_without_spaces and search_string_without_spaces |
| 100 | // into newly allocated sample_text and search_string buffers, |
| 101 | // but pad them with space characters so that every character |
| 102 | // in sample_text_without_spaces matches (kBlockSize - 1) |
| 103 | // space characters in sample_text, followed by that character. |
| 104 | // For example: |
| 105 | // Since sample_text_without_spaces begins "The only thing"..., |
| 106 | // if kBlockSize is 4, then 3 space characters will be inserted |
| 107 | // between each letter of sample_text, as follows: |
| 108 | // " T h e o n l y t h i n g"... |
| 109 | // This makes testing simpler, because finding a kBlockSize-byte match |
| 110 | // between the sample text and search string only depends on the |
| 111 | // trailing letter in each block. |
| 112 | static void MakeEachLetterABlock(const char* string_without_spaces, |
| 113 | const char** result) { |
| 114 | const size_t length_without_spaces = strlen(string_without_spaces); |
| 115 | char* padded_text = new char[(kBlockSize * length_without_spaces) + 1]; |
| 116 | memset(padded_text, ' ', kBlockSize * length_without_spaces); |
| 117 | char* padded_text_ptr = padded_text + (kBlockSize - 1); |
| 118 | for (size_t i = 0; i < length_without_spaces; ++i) { |
| 119 | *padded_text_ptr = string_without_spaces[i]; |
| 120 | padded_text_ptr += kBlockSize; |
| 121 | } |
| 122 | padded_text[kBlockSize * length_without_spaces] = '\0'; |
| 123 | *result = padded_text; |
| 124 | } |
| 125 | |
| 126 | static void SetUpTestCase() { |
| 127 | MakeEachLetterABlock(sample_text_without_spaces, &sample_text); |
| 128 | MakeEachLetterABlock(search_string_without_spaces, &search_string); |
| 129 | MakeEachLetterABlock(search_string_altered_without_spaces, |
| 130 | &search_string_altered); |
| 131 | MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string); |
| 132 | MakeEachLetterABlock(search_to_beginning_without_spaces, |
| 133 | &search_to_beginning_string); |
| 134 | MakeEachLetterABlock(sample_text_many_matches_without_spaces, |
| 135 | &sample_text_many_matches); |
| 136 | MakeEachLetterABlock(search_string_many_matches_without_spaces, |
| 137 | &search_string_many_matches); |
| 138 | MakeEachLetterABlock("y", &test_string_y); |
| 139 | MakeEachLetterABlock("e", &test_string_e); |
| 140 | char* new_test_string_unaligned_e = new char[kBlockSize]; |
| 141 | memset(new_test_string_unaligned_e, ' ', kBlockSize); |
| 142 | new_test_string_unaligned_e[kBlockSize - 2] = 'e'; |
| 143 | test_string_unaligned_e = new_test_string_unaligned_e; |
| 144 | char* new_test_string_all_Qs = new char[kBlockSize]; |
| 145 | memset(new_test_string_all_Qs, 'Q', kBlockSize); |
| 146 | test_string_all_Qs = new_test_string_all_Qs; |
| 147 | hashed_y = RollingHash<kBlockSize>::Hash(test_string_y); |
| 148 | hashed_e = RollingHash<kBlockSize>::Hash(test_string_e); |
| 149 | hashed_f = |
| 150 | RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]); |
| 151 | hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e); |
| 152 | hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs); |
| 153 | } |
| 154 | |
| 155 | static void TearDownTestCase() { |
| 156 | delete[] sample_text; |
| 157 | delete[] search_string; |
| 158 | delete[] search_string_altered; |
| 159 | delete[] search_to_end_string; |
| 160 | delete[] search_to_beginning_string; |
| 161 | delete[] sample_text_many_matches; |
| 162 | delete[] search_string_many_matches; |
| 163 | delete[] test_string_y; |
| 164 | delete[] test_string_e; |
| 165 | delete[] test_string_unaligned_e; |
| 166 | delete[] test_string_all_Qs; |
| 167 | } |
| 168 | |
| 169 | // Each block in the sample text and search string is kBlockSize bytes long, |
| 170 | // and consists of (kBlockSize - 1) space characters |
| 171 | // followed by a single letter of text. |
| 172 | |
| 173 | // Block numbers of certain characters within the sample text: |
| 174 | // All six occurrences of "e", in order. |
| 175 | static const int block_of_first_e = 2; |
| 176 | static const int block_of_second_e = 16; |
| 177 | static const int block_of_third_e = 21; |
| 178 | static const int block_of_fourth_e = 27; |
| 179 | static const int block_of_fifth_e = 35; |
| 180 | static const int block_of_sixth_e = 42; |
| 181 | |
| 182 | static const int block_of_y_in_only = 7; |
| 183 | // The block number is multiplied by kBlockSize to arrive at the |
| 184 | // index, which points to the (kBlockSize - 1) space characters before |
| 185 | // the letter specified. |
| 186 | // Indices of certain characters within the sample text. |
| 187 | static const int index_of_first_e = block_of_first_e * kBlockSize; |
| 188 | static const int index_of_fourth_e = block_of_fourth_e * kBlockSize; |
| 189 | static const int index_of_sixth_e = block_of_sixth_e * kBlockSize; |
| 190 | static const int index_of_y_in_only = block_of_y_in_only * kBlockSize; |
| 191 | static const int index_of_space_before_fear_is_fear = 25 * kBlockSize; |
| 192 | static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize; |
| 193 | static const int index_of_i_in_fear_is_fear = 31 * kBlockSize; |
| 194 | static const int index_of_space_before_fear_itself = 33 * kBlockSize; |
| 195 | static const int index_of_space_before_itself = 38 * kBlockSize; |
| 196 | static const int index_of_ababc = 4 * kBlockSize; |
| 197 | |
| 198 | // Indices of certain characters within the search strings. |
| 199 | static const int index_of_second_w_in_what_we = 5 * kBlockSize; |
| 200 | static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize; |
| 201 | static const int index_of_f_in_fearsome = 16 * kBlockSize; |
| 202 | static const int index_of_space_in_eat_itself = 12 * kBlockSize; |
| 203 | static const int index_of_i_in_itself = 13 * kBlockSize; |
| 204 | static const int index_of_t_in_use_the = 4 * kBlockSize; |
| 205 | static const int index_of_o_in_online = 8 * kBlockSize; |
| 206 | |
| 207 | static const char sample_text_without_spaces[]; |
| 208 | static const char search_string_without_spaces[]; |
| 209 | static const char search_string_altered_without_spaces[]; |
| 210 | static const char search_to_end_without_spaces[]; |
| 211 | static const char search_to_beginning_without_spaces[]; |
| 212 | static const char sample_text_many_matches_without_spaces[]; |
| 213 | static const char search_string_many_matches_without_spaces[]; |
| 214 | |
| 215 | static const char* sample_text; |
| 216 | static const char* search_string; |
| 217 | static const char* search_string_altered; |
| 218 | static const char* search_to_end_string; |
| 219 | static const char* search_to_beginning_string; |
| 220 | static const char* sample_text_many_matches; |
| 221 | static const char* search_string_many_matches; |
| 222 | |
| 223 | static const char* test_string_y; |
| 224 | static const char* test_string_e; |
| 225 | static const char* test_string_all_Qs; |
| 226 | static const char* test_string_unaligned_e; |
| 227 | |
| 228 | static uint32_t hashed_y; |
| 229 | static uint32_t hashed_e; |
| 230 | static uint32_t hashed_f; |
| 231 | static uint32_t hashed_unaligned_e; |
| 232 | static uint32_t hashed_all_Qs; |
| 233 | |
| 234 | // Boost scoped_ptr, if available, could be used instead of std::auto_ptr. |
| 235 | std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup |
| 236 | std::auto_ptr<BlockHash> th_; // hash table not populated; |
| 237 | // used to test incremental adds |
| 238 | |
| 239 | BlockHash::Match best_match_; |
| 240 | char* compare_buffer_1_; |
| 241 | char* compare_buffer_2_; |
| 242 | int prime_result_; |
| 243 | }; |
| 244 | |
| 245 | #ifdef GTEST_HAS_DEATH_TEST |
| 246 | typedef BlockHashTest BlockHashDeathTest; |
| 247 | #endif // GTEST_HAS_DEATH_TEST |
| 248 | |
| 249 | // The C++ standard requires a separate definition of these static const values, |
| 250 | // even though their initializers are given within the class definition. |
| 251 | const int BlockHashTest::block_of_first_e; |
| 252 | const int BlockHashTest::block_of_second_e; |
| 253 | const int BlockHashTest::block_of_third_e; |
| 254 | const int BlockHashTest::block_of_fourth_e; |
| 255 | const int BlockHashTest::block_of_fifth_e; |
| 256 | const int BlockHashTest::block_of_sixth_e; |
| 257 | const int BlockHashTest::block_of_y_in_only; |
| 258 | const int BlockHashTest::index_of_first_e; |
| 259 | const int BlockHashTest::index_of_fourth_e; |
| 260 | const int BlockHashTest::index_of_sixth_e; |
| 261 | const int BlockHashTest::index_of_y_in_only; |
| 262 | const int BlockHashTest::index_of_space_before_fear_is_fear; |
| 263 | const int BlockHashTest::index_of_longest_match_ear_is_fear; |
| 264 | const int BlockHashTest::index_of_i_in_fear_is_fear; |
| 265 | const int BlockHashTest::index_of_space_before_fear_itself; |
| 266 | const int BlockHashTest::index_of_space_before_itself; |
| 267 | const int BlockHashTest::index_of_ababc; |
| 268 | const int BlockHashTest::index_of_second_w_in_what_we; |
| 269 | const int BlockHashTest::index_of_second_e_in_what_we_hear; |
| 270 | const int BlockHashTest::index_of_f_in_fearsome; |
| 271 | const int BlockHashTest::index_of_space_in_eat_itself; |
| 272 | const int BlockHashTest::index_of_i_in_itself; |
| 273 | const int BlockHashTest::index_of_t_in_use_the; |
| 274 | const int BlockHashTest::index_of_o_in_online; |
| 275 | |
| 276 | const char BlockHashTest::sample_text_without_spaces[] = |
| 277 | "The only thing we have to fear is fear itself"; |
| 278 | |
| 279 | const char BlockHashTest::search_string_without_spaces[] = |
| 280 | "What we hear is fearsome"; |
| 281 | |
| 282 | const char BlockHashTest::search_string_altered_without_spaces[] = |
| 283 | "Vhat ve hear is fearsomm"; |
| 284 | |
| 285 | const char BlockHashTest::search_to_end_without_spaces[] = |
| 286 | "Pop will eat itself, eventually"; |
| 287 | |
| 288 | const char BlockHashTest::search_to_beginning_without_spaces[] = |
| 289 | "Use The online dictionary"; |
| 290 | |
| 291 | const char BlockHashTest::sample_text_many_matches_without_spaces[] = |
| 292 | "ababababcab"; |
| 293 | |
| 294 | const char BlockHashTest::search_string_many_matches_without_spaces[] = |
| 295 | "ababc"; |
| 296 | |
| 297 | const char* BlockHashTest::sample_text = NULL; |
| 298 | const char* BlockHashTest::search_string = NULL; |
| 299 | const char* BlockHashTest::search_string_altered = NULL; |
| 300 | const char* BlockHashTest::search_to_end_string = NULL; |
| 301 | const char* BlockHashTest::search_to_beginning_string = NULL; |
| 302 | const char* BlockHashTest::sample_text_many_matches = NULL; |
| 303 | const char* BlockHashTest::search_string_many_matches = NULL; |
| 304 | |
| 305 | const char* BlockHashTest::test_string_y = NULL; |
| 306 | const char* BlockHashTest::test_string_e = NULL; |
| 307 | const char* BlockHashTest::test_string_unaligned_e = NULL; |
| 308 | const char* BlockHashTest::test_string_all_Qs = NULL; |
| 309 | |
| 310 | uint32_t BlockHashTest::hashed_y = 0; |
| 311 | uint32_t BlockHashTest::hashed_e = 0; |
| 312 | uint32_t BlockHashTest::hashed_f = 0; |
| 313 | uint32_t BlockHashTest::hashed_unaligned_e = 0; |
| 314 | uint32_t BlockHashTest::hashed_all_Qs = 0; |
| 315 | |
| 316 | void BlockHashTest::TestAndPrintTimesForCompareFunctions( |
| 317 | bool should_be_identical) { |
| 318 | CHECK(compare_buffer_1_ != NULL); |
| 319 | CHECK(compare_buffer_2_ != NULL); |
| 320 | // Prime the memory cache. |
| 321 | prime_result_ = |
| 322 | memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize); |
| 323 | const char* const block1_limit = |
| 324 | &compare_buffer_1_[kTimingTestSize - kBlockSize]; |
| 325 | int block_compare_words_result = 0; |
| 326 | CycleTimer block_compare_words_timer; |
| 327 | block_compare_words_timer.Start(); |
| 328 | for (int i = 0; i < kTimingTestIterations; ++i) { |
| 329 | const char* block1 = compare_buffer_1_; |
| 330 | const char* block2 = compare_buffer_2_; |
| 331 | while (block1 < block1_limit) { |
| 332 | if (!BlockHash::BlockCompareWords(block1, block2)) { |
| 333 | ++block_compare_words_result; |
| 334 | } |
| 335 | block1 += kBlockSize; |
| 336 | block2 += kBlockSize; |
| 337 | } |
| 338 | } |
| 339 | block_compare_words_timer.Stop(); |
| 340 | double time_for_block_compare_words = |
| 341 | static_cast<double>(block_compare_words_timer.GetInUsec()) |
| 342 | / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); |
| 343 | int block_contents_match_result = 0; |
| 344 | CycleTimer block_contents_match_timer; |
| 345 | block_contents_match_timer.Start(); |
| 346 | for (int i = 0; i < kTimingTestIterations; ++i) { |
| 347 | const char* block1 = compare_buffer_1_; |
| 348 | const char* block2 = compare_buffer_2_; |
| 349 | while (block1 < block1_limit) { |
| 350 | if (!BlockHash::BlockContentsMatch(block1, block2)) { |
| 351 | ++block_contents_match_result; |
| 352 | } |
| 353 | block1 += kBlockSize; |
| 354 | block2 += kBlockSize; |
| 355 | } |
| 356 | } |
| 357 | block_contents_match_timer.Stop(); |
| 358 | double time_for_block_contents_match = |
| 359 | static_cast<double>(block_contents_match_timer.GetInUsec()) |
| 360 | / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); |
| 361 | EXPECT_EQ(block_contents_match_result, block_compare_words_result); |
| 362 | if (should_be_identical) { |
| 363 | CHECK_EQ(0, block_compare_words_result); |
| 364 | } else { |
| 365 | CHECK_GT(block_compare_words_result, 0); |
| 366 | } |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 367 | std::cout << "BlockHash::BlockCompareWords: " |
| 368 | << time_for_block_compare_words << " us per operation" << std::endl; |
| 369 | std::cout << "BlockHash::BlockContentsMatch: " |
| 370 | << time_for_block_contents_match << " us per operation" |
| 371 | << std::endl; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 372 | if (time_for_block_compare_words > 0) { |
| 373 | double percent_change = |
| 374 | (((time_for_block_contents_match - time_for_block_compare_words) |
| 375 | / time_for_block_compare_words) * 100.0); |
| 376 | if (percent_change >= 0.0) { |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 377 | std::cout << "BlockContentsMatch is " << percent_change << "%" |
| 378 | << " SLOWER than BlockCompareWords" << std::endl; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 379 | } else { |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 380 | std::cout << "BlockContentsMatch is " << (-percent_change) << "%" |
| 381 | << " FASTER than BlockCompareWords" << std::endl; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 382 | } |
| 383 | } |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 384 | #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS) |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 385 | // Only check timings for optimized build. There's plenty of margin: this |
| 386 | // check will fail only if BlockContentsMatch is at least twice as slow as |
| 387 | // BlockCompareWords. |
| 388 | EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match); |
openvcdiff | 28db807 | 2008-10-10 23:29:11 +0000 | [diff] [blame] | 389 | #endif // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 390 | } |
| 391 | |
| 392 | // The two strings passed to BlockHash::MatchingBytesToLeft do have matching |
| 393 | // characters -- in fact, they're the same string -- but since max_bytes is zero |
| 394 | // or negative, BlockHash::MatchingBytesToLeft should not read from the strings |
| 395 | // and should return 0. |
| 396 | TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) { |
| 397 | EXPECT_EQ(0, MatchingBytesToLeft( |
| 398 | &search_string[index_of_f_in_fearsome], |
| 399 | &search_string[index_of_f_in_fearsome], |
| 400 | 0)); |
| 401 | EXPECT_EQ(0, MatchingBytesToRight( |
| 402 | &search_string[index_of_f_in_fearsome], |
| 403 | &search_string[index_of_f_in_fearsome], |
| 404 | 0)); |
| 405 | } |
| 406 | |
| 407 | TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) { |
| 408 | EXPECT_EQ(0, MatchingBytesToLeft( |
| 409 | &search_string[index_of_f_in_fearsome], |
| 410 | &search_string[index_of_f_in_fearsome], |
| 411 | -1)); |
| 412 | EXPECT_EQ(0, MatchingBytesToLeft( |
| 413 | &search_string[index_of_f_in_fearsome], |
| 414 | &search_string[index_of_f_in_fearsome], |
| 415 | INT_MIN)); |
| 416 | EXPECT_EQ(0, MatchingBytesToRight( |
| 417 | &search_string[index_of_f_in_fearsome], |
| 418 | &search_string[index_of_f_in_fearsome], |
| 419 | -1)); |
| 420 | EXPECT_EQ(0, MatchingBytesToRight( |
| 421 | &search_string[index_of_f_in_fearsome], |
| 422 | &search_string[index_of_f_in_fearsome], |
| 423 | INT_MIN)); |
| 424 | } |
| 425 | |
| 426 | TEST_F(BlockHashTest, MaxBytesOneMatch) { |
| 427 | EXPECT_EQ(1, MatchingBytesToLeft( |
| 428 | &search_string[index_of_f_in_fearsome], |
| 429 | &search_string[index_of_f_in_fearsome], |
| 430 | 1)); |
| 431 | EXPECT_EQ(1, MatchingBytesToRight( |
| 432 | &search_string[index_of_f_in_fearsome], |
| 433 | &search_string[index_of_f_in_fearsome], |
| 434 | 1)); |
| 435 | } |
| 436 | |
| 437 | TEST_F(BlockHashTest, MaxBytesOneNoMatch) { |
| 438 | EXPECT_EQ(0, MatchingBytesToLeft( |
| 439 | &search_string[index_of_f_in_fearsome], |
| 440 | &search_string[index_of_second_e_in_what_we_hear], |
| 441 | 1)); |
| 442 | EXPECT_EQ(0, MatchingBytesToRight( |
| 443 | &search_string[index_of_f_in_fearsome], |
| 444 | &search_string[index_of_second_e_in_what_we_hear - 1], |
| 445 | 1)); |
| 446 | } |
| 447 | |
| 448 | TEST_F(BlockHashTest, LeftLimitedByMaxBytes) { |
| 449 | // The number of bytes that match between the original "we hear is fearsome" |
| 450 | // and the altered "ve hear is fearsome". |
| 451 | const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); |
| 452 | const int max_bytes = expected_length - 1; |
| 453 | EXPECT_EQ(max_bytes, MatchingBytesToLeft( |
| 454 | &search_string[index_of_f_in_fearsome], |
| 455 | &search_string_altered[index_of_f_in_fearsome], |
| 456 | max_bytes)); |
| 457 | } |
| 458 | |
| 459 | TEST_F(BlockHashTest, LeftNotLimited) { |
| 460 | // The number of bytes that match between the original "we hear is fearsome" |
| 461 | // and the altered "ve hear is fearsome". |
| 462 | const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); |
| 463 | const int max_bytes = expected_length + 1; |
| 464 | EXPECT_EQ(expected_length, MatchingBytesToLeft( |
| 465 | &search_string[index_of_f_in_fearsome], |
| 466 | &search_string_altered[index_of_f_in_fearsome], |
| 467 | max_bytes)); |
| 468 | EXPECT_EQ(expected_length, MatchingBytesToLeft( |
| 469 | &search_string[index_of_f_in_fearsome], |
| 470 | &search_string_altered[index_of_f_in_fearsome], |
| 471 | INT_MAX)); |
| 472 | } |
| 473 | |
| 474 | TEST_F(BlockHashTest, RightLimitedByMaxBytes) { |
| 475 | // The number of bytes that match between the original "fearsome" |
| 476 | // and the altered "fearsomm". |
| 477 | const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) |
| 478 | + (kBlockSize - 1); // spacing between letters |
| 479 | const int max_bytes = expected_length - 1; |
| 480 | EXPECT_EQ(max_bytes, MatchingBytesToRight( |
| 481 | &search_string[index_of_f_in_fearsome], |
| 482 | &search_string_altered[index_of_f_in_fearsome], |
| 483 | max_bytes)); |
| 484 | } |
| 485 | |
| 486 | TEST_F(BlockHashTest, RightNotLimited) { |
| 487 | // The number of bytes that match between the original "we hear is fearsome" |
| 488 | // and the altered "ve hear is fearsome". |
| 489 | const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) |
| 490 | + (kBlockSize - 1); // spacing between letters |
| 491 | const int max_bytes = expected_length + 1; |
| 492 | EXPECT_EQ(expected_length, MatchingBytesToRight( |
| 493 | &search_string[index_of_f_in_fearsome], |
| 494 | &search_string_altered[index_of_f_in_fearsome], |
| 495 | max_bytes)); |
| 496 | EXPECT_EQ(expected_length, MatchingBytesToRight( |
| 497 | &search_string[index_of_f_in_fearsome], |
| 498 | &search_string_altered[index_of_f_in_fearsome], |
| 499 | INT_MAX)); |
| 500 | } |
| 501 | |
| 502 | // If this test fails in a non-x86 or non-gcc environment, consider adding |
| 503 | // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and |
| 504 | // Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc |
| 505 | // to use a special implementation (BlockCompareWords) to compare blocks |
| 506 | // rather than using standard memcmp. |
| 507 | TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) { |
| 508 | compare_buffer_1_ = new char[kTimingTestSize]; |
| 509 | compare_buffer_2_ = new char[kTimingTestSize]; |
| 510 | |
| 511 | // The value 0xBE is arbitrarily chosen. First test with identical contents |
| 512 | // in the buffers, so that the comparison functions cannot short-circuit |
| 513 | // and will return true. |
| 514 | memset(compare_buffer_1_, 0xBE, kTimingTestSize); |
| 515 | memset(compare_buffer_2_, 0xBE, kTimingTestSize); |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 516 | std::cout << "Comparing " |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 517 | << (kTimingTestSize / kBlockSize) << " identical values:" |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 518 | << std::endl; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 519 | TestAndPrintTimesForCompareFunctions(true); |
| 520 | |
| 521 | // Now change one value in the middle of one buffer, so that the contents |
| 522 | // are no longer the same. |
| 523 | compare_buffer_1_[kTimingTestSize / 2] = 0x00; |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 524 | std::cout << "Comparing " |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 525 | << ((kTimingTestSize / kBlockSize) - 1) << " identical values" |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 526 | << " and one mismatch:" << std::endl; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 527 | TestAndPrintTimesForCompareFunctions(false); |
| 528 | |
| 529 | // Set one of the bytes of each block to differ so that |
| 530 | // none of the compare operations will return true, and run timing tests. |
| 531 | // In practice, BlockHash::BlockContentsMatch will only be called |
| 532 | // for two blocks whose hash values match, and the two most important |
| 533 | // cases are: (1) the blocks are identical, or (2) none of their bytes match. |
| 534 | TimingTestForBlocksThatDifferAtByte(0); |
| 535 | TimingTestForBlocksThatDifferAtByte(1); |
| 536 | TimingTestForBlocksThatDifferAtByte(kBlockSize / 2); |
| 537 | TimingTestForBlocksThatDifferAtByte(kBlockSize - 1); |
| 538 | |
| 539 | delete[] compare_buffer_1_; |
| 540 | delete[] compare_buffer_2_; |
| 541 | } |
| 542 | |
| 543 | TEST_F(BlockHashTest, FindFailsBeforeHashing) { |
| 544 | EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); |
| 545 | } |
| 546 | |
| 547 | TEST_F(BlockHashTest, HashOneFindOne) { |
| 548 | for (int i = 0; i <= index_of_y_in_only; ++i) { |
| 549 | th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i])); |
| 550 | } |
| 551 | EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y, |
| 552 | test_string_y)); |
| 553 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y)); |
| 554 | } |
| 555 | |
| 556 | TEST_F(BlockHashTest, HashAllFindOne) { |
| 557 | EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y, |
| 558 | test_string_y)); |
| 559 | EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y)); |
| 560 | } |
| 561 | |
| 562 | TEST_F(BlockHashTest, NonMatchingTextNotFound) { |
| 563 | EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs)); |
| 564 | } |
| 565 | |
| 566 | // Search for unaligned text. The test string is contained in the |
| 567 | // sample text (unlike the non-matching string in NonMatchingTextNotFound, |
| 568 | // above), but it is not aligned on a block boundary. FindMatchingBlock |
| 569 | // will only work if the test string is aligned on a block boundary. |
| 570 | // |
| 571 | // " T h e o n l y" |
| 572 | // ^^^^ Here is the test string |
| 573 | // |
| 574 | TEST_F(BlockHashTest, UnalignedTextNotFound) { |
| 575 | EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e, |
| 576 | test_string_unaligned_e)); |
| 577 | } |
| 578 | |
| 579 | TEST_F(BlockHashTest, FindSixMatches) { |
| 580 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, |
| 581 | test_string_e)); |
| 582 | EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e, |
| 583 | test_string_e)); |
| 584 | EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e, |
| 585 | test_string_e)); |
| 586 | EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e, |
| 587 | test_string_e)); |
| 588 | EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e, |
| 589 | test_string_e)); |
| 590 | EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e, |
| 591 | test_string_e)); |
| 592 | EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e)); |
| 593 | |
| 594 | // Starting over gives same result |
| 595 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, |
| 596 | test_string_e)); |
| 597 | } |
| 598 | |
| 599 | TEST_F(BlockHashTest, AddRangeFindThreeMatches) { |
| 600 | // Add hash values only for those characters before the fourth instance |
| 601 | // of "e" in the sample text. Tests that the ending index |
| 602 | // of AddAllBlocksThroughIndex() is not inclusive: only three matches |
| 603 | // for "e" should be found. |
| 604 | th_->AddAllBlocksThroughIndex(index_of_fourth_e); |
| 605 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 606 | test_string_e)); |
| 607 | EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| 608 | test_string_e)); |
| 609 | EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| 610 | test_string_e)); |
| 611 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); |
| 612 | |
| 613 | // Starting over gives same result |
| 614 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 615 | test_string_e)); |
| 616 | } |
| 617 | |
| 618 | // Try indices that are not even multiples of the block size. |
| 619 | // Add three ranges and verify the results after each |
| 620 | // call to AddAllBlocksThroughIndex(). |
| 621 | TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) { |
| 622 | th_->AddAllBlocksThroughIndex(index_of_first_e + 1); |
| 623 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 624 | test_string_e)); |
| 625 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); |
| 626 | |
| 627 | // Starting over gives same result |
| 628 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 629 | test_string_e)); |
| 630 | |
| 631 | // Add the second range to expand the result set |
| 632 | th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3); |
| 633 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 634 | test_string_e)); |
| 635 | EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| 636 | test_string_e)); |
| 637 | EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| 638 | test_string_e)); |
| 639 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); |
| 640 | |
| 641 | // Starting over gives same result |
| 642 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 643 | test_string_e)); |
| 644 | |
| 645 | // Add the third range to expand the result set |
| 646 | th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); |
| 647 | |
| 648 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 649 | test_string_e)); |
| 650 | EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| 651 | test_string_e)); |
| 652 | EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| 653 | test_string_e)); |
| 654 | EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| 655 | test_string_e)); |
| 656 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); |
| 657 | |
| 658 | // Starting over gives same result |
| 659 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 660 | test_string_e)); |
| 661 | } |
| 662 | |
| 663 | #ifdef GTEST_HAS_DEATH_TEST |
| 664 | TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) { |
| 665 | th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); |
| 666 | |
| 667 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 668 | test_string_e)); |
| 669 | EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| 670 | test_string_e)); |
| 671 | EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| 672 | test_string_e)); |
| 673 | EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| 674 | test_string_e)); |
| 675 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); |
| 676 | |
| 677 | // Starting over gives same result |
| 678 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 679 | test_string_e)); |
| 680 | |
| 681 | // These calls will produce DFATAL error messages, and will do nothing, |
| 682 | // since the ranges have already been added. |
| 683 | EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3), |
| 684 | "<"); |
| 685 | EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1), |
| 686 | "<"); |
| 687 | |
| 688 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 689 | test_string_e)); |
| 690 | EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| 691 | test_string_e)); |
| 692 | EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| 693 | test_string_e)); |
| 694 | EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| 695 | test_string_e)); |
| 696 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); |
| 697 | |
| 698 | // Starting over gives same result |
| 699 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 700 | test_string_e)); |
| 701 | } |
| 702 | #endif // GTEST_HAS_DEATH_TEST |
| 703 | |
| 704 | TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) { |
| 705 | th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text)); |
| 706 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 707 | test_string_e)); |
| 708 | EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, |
| 709 | test_string_e)); |
| 710 | EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, |
| 711 | test_string_e)); |
| 712 | EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, |
| 713 | test_string_e)); |
| 714 | EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e, |
| 715 | test_string_e)); |
| 716 | EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e, |
| 717 | test_string_e)); |
| 718 | EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e)); |
| 719 | |
| 720 | // Starting over gives same result |
| 721 | EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, |
| 722 | test_string_e)); |
| 723 | } |
| 724 | |
| 725 | TEST_F(BlockHashTest, ZeroSizeSourceAccepted) { |
| 726 | BlockHash zero_sized_hash(sample_text, 0, 0); |
| 727 | EXPECT_EQ(true, zero_sized_hash.Init(true)); |
| 728 | EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); |
| 729 | } |
| 730 | |
| 731 | #ifdef GTEST_HAS_DEATH_TEST |
| 732 | TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) { |
| 733 | EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")), |
| 734 | "invalid"); |
| 735 | } |
| 736 | |
| 737 | TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) { |
| 738 | BlockHash bh(sample_text, strlen(sample_text), 0); |
| 739 | EXPECT_TRUE(bh.Init(false)); |
| 740 | EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice"); |
| 741 | } |
| 742 | |
| 743 | TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) { |
| 744 | BlockHash bh(sample_text, strlen(sample_text), 0); |
| 745 | EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e), |
| 746 | "called before"); |
| 747 | } |
| 748 | |
| 749 | TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) { |
openvcdiff@gmail.com | 732fff2 | 2010-08-04 18:00:00 +0000 | [diff] [blame] | 750 | EXPECT_DEBUG_DEATH( |
| 751 | th_->AddAllBlocksThroughIndex(static_cast<int>(strlen(sample_text) + 1)), |
| 752 | "higher than end"); |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 753 | } |
| 754 | #endif // GTEST_HAS_DEATH_TEST |
| 755 | |
| 756 | TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) { |
| 757 | EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA")); |
| 758 | } |
| 759 | |
| 760 | TEST_F(BlockHashTest, FindBestMatch) { |
| 761 | dh_->FindBestMatch(hashed_f, |
| 762 | &search_string[index_of_f_in_fearsome], |
| 763 | search_string, |
| 764 | strlen(search_string), |
| 765 | &best_match_); |
| 766 | EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset()); |
| 767 | EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); |
| 768 | // The match includes the spaces after the final character, |
| 769 | // which is why (kBlockSize - 1) is added to the expected best size. |
| 770 | EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), |
| 771 | best_match_.size()); |
| 772 | } |
| 773 | |
| 774 | TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) { |
| 775 | BlockHash th2(sample_text, strlen(sample_text), 0x10000); |
| 776 | th2.Init(true); // hash all blocks |
| 777 | th2.FindBestMatch(hashed_f, |
| 778 | &search_string[index_of_f_in_fearsome], |
| 779 | search_string, |
| 780 | strlen(search_string), |
| 781 | &best_match_); |
| 782 | // Offset should begin with dictionary_size |
| 783 | EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear), |
| 784 | best_match_.source_offset()); |
| 785 | EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); |
| 786 | // The match includes the spaces after the final character, |
| 787 | // which is why (kBlockSize - 1) is added to the expected best size. |
| 788 | EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), |
| 789 | best_match_.size()); |
| 790 | } |
| 791 | |
| 792 | TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) { |
| 793 | // Hash the "i" in "fear itself" |
| 794 | uint32_t hash_value = RollingHash<kBlockSize>::Hash( |
| 795 | &search_to_end_string[index_of_i_in_itself]); |
| 796 | dh_->FindBestMatch(hash_value, |
| 797 | &search_to_end_string[index_of_i_in_itself], |
| 798 | search_to_end_string, |
| 799 | strlen(search_to_end_string), |
| 800 | &best_match_); |
| 801 | EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset()); |
| 802 | EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset()); |
| 803 | EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size()); |
| 804 | } |
| 805 | |
| 806 | TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) { |
| 807 | // Hash the "i" in "fear itself" |
| 808 | uint32_t hash_value = RollingHash<kBlockSize>::Hash( |
| 809 | &search_to_beginning_string[index_of_o_in_online]); |
| 810 | dh_->FindBestMatch(hash_value, |
| 811 | &search_to_beginning_string[index_of_o_in_online], |
| 812 | search_to_beginning_string, |
| 813 | strlen(search_to_beginning_string), |
| 814 | &best_match_); |
| 815 | EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary |
| 816 | EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset()); |
| 817 | // The match includes the spaces after the final character, |
| 818 | // which is why (kBlockSize - 1) is added to the expected best size. |
| 819 | EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1), |
| 820 | best_match_.size()); |
| 821 | } |
| 822 | |
| 823 | TEST_F(BlockHashTest, BestMatchWithManyMatches) { |
| 824 | BlockHash many_matches_hash(sample_text_many_matches, |
| 825 | strlen(sample_text_many_matches), |
| 826 | 0); |
| 827 | EXPECT_TRUE(many_matches_hash.Init(true)); |
| 828 | // Hash the " a" at the beginning of the search string "ababc" |
| 829 | uint32_t hash_value = |
| 830 | RollingHash<kBlockSize>::Hash(search_string_many_matches); |
| 831 | many_matches_hash.FindBestMatch(hash_value, |
| 832 | search_string_many_matches, |
| 833 | search_string_many_matches, |
| 834 | strlen(search_string_many_matches), |
| 835 | &best_match_); |
| 836 | EXPECT_EQ(index_of_ababc, best_match_.source_offset()); |
| 837 | EXPECT_EQ(0, best_match_.target_offset()); |
| 838 | EXPECT_EQ(strlen(search_string_many_matches), best_match_.size()); |
| 839 | } |
| 840 | |
| 841 | TEST_F(BlockHashTest, HashCollisionFindsNoMatch) { |
| 842 | char* collision_search_string = new char[strlen(search_string) + 1]; |
| 843 | memcpy(collision_search_string, search_string, strlen(search_string) + 1); |
| 844 | char* fearsome_location = &collision_search_string[index_of_f_in_fearsome]; |
| 845 | |
| 846 | // Tweak the collision string so that it has the same hash value |
| 847 | // but different text. The last four characters of the search string |
| 848 | // should be " f", and the bytes given below have the same hash value |
| 849 | // as those characters. |
| 850 | CHECK_GE(kBlockSize, 4); |
| 851 | fearsome_location[kBlockSize - 4] = 0x84; |
| 852 | fearsome_location[kBlockSize - 3] = 0xF1; |
| 853 | fearsome_location[kBlockSize - 2] = 0x51; |
| 854 | fearsome_location[kBlockSize - 1] = 0x00; |
| 855 | EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location)); |
| 856 | EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome], |
| 857 | fearsome_location, |
| 858 | kBlockSize)); |
| 859 | // No match should be found this time. |
| 860 | dh_->FindBestMatch(hashed_f, |
| 861 | fearsome_location, |
| 862 | collision_search_string, |
| 863 | strlen(search_string), // since collision_search_string has embedded \0 |
| 864 | &best_match_); |
| 865 | EXPECT_EQ(-1, best_match_.source_offset()); |
| 866 | EXPECT_EQ(-1, best_match_.target_offset()); |
| 867 | EXPECT_EQ(0U, best_match_.size()); |
| 868 | delete[] collision_search_string; |
| 869 | } |
| 870 | |
| 871 | // If the footprint passed to FindBestMatch does not actually match |
| 872 | // the search string, it should not find any matches. |
| 873 | TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) { |
| 874 | dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"! |
| 875 | &search_string[index_of_f_in_fearsome], |
| 876 | search_string, |
| 877 | strlen(search_string), |
| 878 | &best_match_); |
| 879 | EXPECT_EQ(-1, best_match_.source_offset()); |
| 880 | EXPECT_EQ(-1, best_match_.target_offset()); |
| 881 | EXPECT_EQ(0U, best_match_.size()); |
| 882 | } |
| 883 | |
| 884 | // Use a dictionary containing 1M copies of the letter 'Q', |
| 885 | // and target data that also contains 1M Qs. If FindBestMatch |
| 886 | // is not throttled to find a maximum number of matches, this |
| 887 | // will take a very long time -- several seconds at least. |
| 888 | // If this test appears to hang, it is because the throttling code |
| 889 | // (see BlockHash::kMaxMatchesToCheck for details) is not working. |
| 890 | TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) { |
| 891 | const int kTestSize = 1 << 20; // 1M |
| 892 | char* huge_dictionary = new char[kTestSize]; |
| 893 | memset(huge_dictionary, 'Q', kTestSize); |
| 894 | BlockHash huge_bh(huge_dictionary, kTestSize, 0); |
| 895 | EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true)); |
| 896 | char* huge_target = new char[kTestSize]; |
| 897 | memset(huge_target, 'Q', kTestSize); |
| 898 | CycleTimer timer; |
| 899 | timer.Start(); |
| 900 | huge_bh.FindBestMatch(hashed_all_Qs, |
| 901 | huge_target + (kTestSize / 2), // middle of target |
| 902 | huge_target, |
| 903 | kTestSize, |
| 904 | &best_match_); |
| 905 | timer.Stop(); |
| 906 | double elapsed_time_in_us = static_cast<double>(timer.GetInUsec()); |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 907 | std::cout << "Time to search for best match with 1M matches: " |
| 908 | << elapsed_time_in_us << " us" << std::endl; |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 909 | // All blocks match the candidate block. FindBestMatch should have checked |
| 910 | // a certain number of matches before giving up. The best match |
| 911 | // should include at least half the source and target, since the candidate |
| 912 | // block was in the middle of the target data. |
| 913 | EXPECT_GT((kTestSize / 2), best_match_.source_offset()); |
| 914 | EXPECT_GT((kTestSize / 2), best_match_.target_offset()); |
| 915 | EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size()); |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 916 | EXPECT_GT(5000000, elapsed_time_in_us); // < 5 seconds |
| 917 | #ifdef NDEBUG |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 918 | EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second |
openvcdiff | d184578 | 2009-03-20 21:56:15 +0000 | [diff] [blame] | 919 | #endif // NDEBUG |
openvcdiff | 311c714 | 2008-08-26 19:29:25 +0000 | [diff] [blame] | 920 | delete[] huge_target; |
| 921 | delete[] huge_dictionary; |
| 922 | } |
| 923 | |
| 924 | #ifdef GTEST_HAS_DEATH_TEST |
| 925 | TEST_F(BlockHashDeathTest, AddTooManyBlocks) { |
| 926 | for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) { |
| 927 | th_->AddOneIndexHash(i * kBlockSize, hashed_e); |
| 928 | } |
| 929 | // Didn't expect another block to be added |
| 930 | EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text), |
| 931 | hashed_e), |
| 932 | "AddBlock"); |
| 933 | } |
| 934 | #endif // GTEST_HAS_DEATH_TEST |
| 935 | |
| 936 | } // namespace open_vcdiff |