blob: bca6df51b11ca969c1d8601f3d951801d08ead15 [file] [log] [blame]
openvcdiff311c7142008-08-26 19:29:25 +00001// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include <config.h>
17#include "blockhash.h"
openvcdiff28db8072008-10-10 23:29:11 +000018#include <limits.h> // INT_MIN
19#include <string.h> // memcpy, memcmp, strlen
openvcdiffd1845782009-03-20 21:56:15 +000020#include <iostream>
openvcdiff311c7142008-08-26 19:29:25 +000021#include <memory> // auto_ptr
22#include "encodetable.h"
openvcdiff311c7142008-08-26 19:29:25 +000023#include "rolling_hash.h"
24#include "testing.h"
25
26namespace open_vcdiff {
27
28const int kBlockSize = BlockHash::kBlockSize;
29
30class BlockHashTest : public testing::Test {
31 protected:
32 static const int kTimingTestSize = 1 << 21; // 2M
33 static const int kTimingTestIterations = 32;
34
35 BlockHashTest() {
36 dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
37 strlen(sample_text)));
38 th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
39 EXPECT_TRUE(dh_.get() != NULL);
40 EXPECT_TRUE(th_.get() != NULL);
41 }
42
43 // BlockHashTest is a friend to BlockHash. Expose the protected functions
44 // that will be tested by the children of BlockHashTest.
45 static bool BlockContentsMatch(const char* block1, const char* block2) {
46 return BlockHash::BlockContentsMatch(block1, block2);
47 }
48
49 int FirstMatchingBlock(const BlockHash& block_hash,
50 uint32_t hash_value,
51 const char* block_ptr) const {
52 return block_hash.FirstMatchingBlock(hash_value, block_ptr);
53 }
54
55 int NextMatchingBlock(const BlockHash& block_hash,
56 int block_number,
57 const char* block_ptr) const {
58 return block_hash.NextMatchingBlock(block_number, block_ptr);
59 }
60
61 static int MatchingBytesToLeft(const char* source_match_start,
62 const char* target_match_start,
63 int max_bytes) {
64 return BlockHash::MatchingBytesToLeft(source_match_start,
65 target_match_start,
66 max_bytes);
67 }
68
69 static int MatchingBytesToRight(const char* source_match_end,
70 const char* target_match_end,
71 int max_bytes) {
72 return BlockHash::MatchingBytesToRight(source_match_end,
73 target_match_end,
74 max_bytes);
75 }
76
77 static int StringLengthAsInt(const char* s) {
78 return static_cast<int>(strlen(s));
79 }
80
81 void InitBlocksToDifferAtNthByte(int n) {
82 CHECK(n < kBlockSize);
83 memset(compare_buffer_1_, 0xBE, kTimingTestSize);
84 memset(compare_buffer_2_, 0xBE, kTimingTestSize);
85 for (int index = n; index < kTimingTestSize; index += kBlockSize) {
86 compare_buffer_1_[index] = 0x00;
87 compare_buffer_2_[index] = 0x01;
88 }
89 }
90
91 void TestAndPrintTimesForCompareFunctions(bool should_be_identical);
92
93 void TimingTestForBlocksThatDifferAtByte(int n) {
94 InitBlocksToDifferAtNthByte(n);
openvcdiffd1845782009-03-20 21:56:15 +000095 std::cout << "Comparing blocks that differ at byte " << n << std::endl;
openvcdiff311c7142008-08-26 19:29:25 +000096 TestAndPrintTimesForCompareFunctions(false);
97 }
98
99 // Copy sample_text_without_spaces and search_string_without_spaces
100 // into newly allocated sample_text and search_string buffers,
101 // but pad them with space characters so that every character
102 // in sample_text_without_spaces matches (kBlockSize - 1)
103 // space characters in sample_text, followed by that character.
104 // For example:
105 // Since sample_text_without_spaces begins "The only thing"...,
106 // if kBlockSize is 4, then 3 space characters will be inserted
107 // between each letter of sample_text, as follows:
108 // " T h e o n l y t h i n g"...
109 // This makes testing simpler, because finding a kBlockSize-byte match
110 // between the sample text and search string only depends on the
111 // trailing letter in each block.
112 static void MakeEachLetterABlock(const char* string_without_spaces,
113 const char** result) {
114 const size_t length_without_spaces = strlen(string_without_spaces);
115 char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
116 memset(padded_text, ' ', kBlockSize * length_without_spaces);
117 char* padded_text_ptr = padded_text + (kBlockSize - 1);
118 for (size_t i = 0; i < length_without_spaces; ++i) {
119 *padded_text_ptr = string_without_spaces[i];
120 padded_text_ptr += kBlockSize;
121 }
122 padded_text[kBlockSize * length_without_spaces] = '\0';
123 *result = padded_text;
124 }
125
126 static void SetUpTestCase() {
127 MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
128 MakeEachLetterABlock(search_string_without_spaces, &search_string);
129 MakeEachLetterABlock(search_string_altered_without_spaces,
130 &search_string_altered);
131 MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
132 MakeEachLetterABlock(search_to_beginning_without_spaces,
133 &search_to_beginning_string);
134 MakeEachLetterABlock(sample_text_many_matches_without_spaces,
135 &sample_text_many_matches);
136 MakeEachLetterABlock(search_string_many_matches_without_spaces,
137 &search_string_many_matches);
138 MakeEachLetterABlock("y", &test_string_y);
139 MakeEachLetterABlock("e", &test_string_e);
140 char* new_test_string_unaligned_e = new char[kBlockSize];
141 memset(new_test_string_unaligned_e, ' ', kBlockSize);
142 new_test_string_unaligned_e[kBlockSize - 2] = 'e';
143 test_string_unaligned_e = new_test_string_unaligned_e;
144 char* new_test_string_all_Qs = new char[kBlockSize];
145 memset(new_test_string_all_Qs, 'Q', kBlockSize);
146 test_string_all_Qs = new_test_string_all_Qs;
147 hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
148 hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
149 hashed_f =
150 RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
151 hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
152 hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
153 }
154
155 static void TearDownTestCase() {
156 delete[] sample_text;
157 delete[] search_string;
158 delete[] search_string_altered;
159 delete[] search_to_end_string;
160 delete[] search_to_beginning_string;
161 delete[] sample_text_many_matches;
162 delete[] search_string_many_matches;
163 delete[] test_string_y;
164 delete[] test_string_e;
165 delete[] test_string_unaligned_e;
166 delete[] test_string_all_Qs;
167 }
168
169 // Each block in the sample text and search string is kBlockSize bytes long,
170 // and consists of (kBlockSize - 1) space characters
171 // followed by a single letter of text.
172
173 // Block numbers of certain characters within the sample text:
174 // All six occurrences of "e", in order.
175 static const int block_of_first_e = 2;
176 static const int block_of_second_e = 16;
177 static const int block_of_third_e = 21;
178 static const int block_of_fourth_e = 27;
179 static const int block_of_fifth_e = 35;
180 static const int block_of_sixth_e = 42;
181
182 static const int block_of_y_in_only = 7;
183 // The block number is multiplied by kBlockSize to arrive at the
184 // index, which points to the (kBlockSize - 1) space characters before
185 // the letter specified.
186 // Indices of certain characters within the sample text.
187 static const int index_of_first_e = block_of_first_e * kBlockSize;
188 static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
189 static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
190 static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
191 static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
192 static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
193 static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
194 static const int index_of_space_before_fear_itself = 33 * kBlockSize;
195 static const int index_of_space_before_itself = 38 * kBlockSize;
196 static const int index_of_ababc = 4 * kBlockSize;
197
198 // Indices of certain characters within the search strings.
199 static const int index_of_second_w_in_what_we = 5 * kBlockSize;
200 static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
201 static const int index_of_f_in_fearsome = 16 * kBlockSize;
202 static const int index_of_space_in_eat_itself = 12 * kBlockSize;
203 static const int index_of_i_in_itself = 13 * kBlockSize;
204 static const int index_of_t_in_use_the = 4 * kBlockSize;
205 static const int index_of_o_in_online = 8 * kBlockSize;
206
207 static const char sample_text_without_spaces[];
208 static const char search_string_without_spaces[];
209 static const char search_string_altered_without_spaces[];
210 static const char search_to_end_without_spaces[];
211 static const char search_to_beginning_without_spaces[];
212 static const char sample_text_many_matches_without_spaces[];
213 static const char search_string_many_matches_without_spaces[];
214
215 static const char* sample_text;
216 static const char* search_string;
217 static const char* search_string_altered;
218 static const char* search_to_end_string;
219 static const char* search_to_beginning_string;
220 static const char* sample_text_many_matches;
221 static const char* search_string_many_matches;
222
223 static const char* test_string_y;
224 static const char* test_string_e;
225 static const char* test_string_all_Qs;
226 static const char* test_string_unaligned_e;
227
228 static uint32_t hashed_y;
229 static uint32_t hashed_e;
230 static uint32_t hashed_f;
231 static uint32_t hashed_unaligned_e;
232 static uint32_t hashed_all_Qs;
233
234 // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
235 std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup
236 std::auto_ptr<BlockHash> th_; // hash table not populated;
237 // used to test incremental adds
238
239 BlockHash::Match best_match_;
240 char* compare_buffer_1_;
241 char* compare_buffer_2_;
242 int prime_result_;
243};
244
245#ifdef GTEST_HAS_DEATH_TEST
246typedef BlockHashTest BlockHashDeathTest;
247#endif // GTEST_HAS_DEATH_TEST
248
249// The C++ standard requires a separate definition of these static const values,
250// even though their initializers are given within the class definition.
251const int BlockHashTest::block_of_first_e;
252const int BlockHashTest::block_of_second_e;
253const int BlockHashTest::block_of_third_e;
254const int BlockHashTest::block_of_fourth_e;
255const int BlockHashTest::block_of_fifth_e;
256const int BlockHashTest::block_of_sixth_e;
257const int BlockHashTest::block_of_y_in_only;
258const int BlockHashTest::index_of_first_e;
259const int BlockHashTest::index_of_fourth_e;
260const int BlockHashTest::index_of_sixth_e;
261const int BlockHashTest::index_of_y_in_only;
262const int BlockHashTest::index_of_space_before_fear_is_fear;
263const int BlockHashTest::index_of_longest_match_ear_is_fear;
264const int BlockHashTest::index_of_i_in_fear_is_fear;
265const int BlockHashTest::index_of_space_before_fear_itself;
266const int BlockHashTest::index_of_space_before_itself;
267const int BlockHashTest::index_of_ababc;
268const int BlockHashTest::index_of_second_w_in_what_we;
269const int BlockHashTest::index_of_second_e_in_what_we_hear;
270const int BlockHashTest::index_of_f_in_fearsome;
271const int BlockHashTest::index_of_space_in_eat_itself;
272const int BlockHashTest::index_of_i_in_itself;
273const int BlockHashTest::index_of_t_in_use_the;
274const int BlockHashTest::index_of_o_in_online;
275
276const char BlockHashTest::sample_text_without_spaces[] =
277 "The only thing we have to fear is fear itself";
278
279const char BlockHashTest::search_string_without_spaces[] =
280 "What we hear is fearsome";
281
282const char BlockHashTest::search_string_altered_without_spaces[] =
283 "Vhat ve hear is fearsomm";
284
285const char BlockHashTest::search_to_end_without_spaces[] =
286 "Pop will eat itself, eventually";
287
288const char BlockHashTest::search_to_beginning_without_spaces[] =
289 "Use The online dictionary";
290
291const char BlockHashTest::sample_text_many_matches_without_spaces[] =
292 "ababababcab";
293
294const char BlockHashTest::search_string_many_matches_without_spaces[] =
295 "ababc";
296
297const char* BlockHashTest::sample_text = NULL;
298const char* BlockHashTest::search_string = NULL;
299const char* BlockHashTest::search_string_altered = NULL;
300const char* BlockHashTest::search_to_end_string = NULL;
301const char* BlockHashTest::search_to_beginning_string = NULL;
302const char* BlockHashTest::sample_text_many_matches = NULL;
303const char* BlockHashTest::search_string_many_matches = NULL;
304
305const char* BlockHashTest::test_string_y = NULL;
306const char* BlockHashTest::test_string_e = NULL;
307const char* BlockHashTest::test_string_unaligned_e = NULL;
308const char* BlockHashTest::test_string_all_Qs = NULL;
309
310uint32_t BlockHashTest::hashed_y = 0;
311uint32_t BlockHashTest::hashed_e = 0;
312uint32_t BlockHashTest::hashed_f = 0;
313uint32_t BlockHashTest::hashed_unaligned_e = 0;
314uint32_t BlockHashTest::hashed_all_Qs = 0;
315
316void BlockHashTest::TestAndPrintTimesForCompareFunctions(
317 bool should_be_identical) {
318 CHECK(compare_buffer_1_ != NULL);
319 CHECK(compare_buffer_2_ != NULL);
320 // Prime the memory cache.
321 prime_result_ =
322 memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
323 const char* const block1_limit =
324 &compare_buffer_1_[kTimingTestSize - kBlockSize];
325 int block_compare_words_result = 0;
326 CycleTimer block_compare_words_timer;
327 block_compare_words_timer.Start();
328 for (int i = 0; i < kTimingTestIterations; ++i) {
329 const char* block1 = compare_buffer_1_;
330 const char* block2 = compare_buffer_2_;
331 while (block1 < block1_limit) {
332 if (!BlockHash::BlockCompareWords(block1, block2)) {
333 ++block_compare_words_result;
334 }
335 block1 += kBlockSize;
336 block2 += kBlockSize;
337 }
338 }
339 block_compare_words_timer.Stop();
340 double time_for_block_compare_words =
341 static_cast<double>(block_compare_words_timer.GetInUsec())
342 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
343 int block_contents_match_result = 0;
344 CycleTimer block_contents_match_timer;
345 block_contents_match_timer.Start();
346 for (int i = 0; i < kTimingTestIterations; ++i) {
347 const char* block1 = compare_buffer_1_;
348 const char* block2 = compare_buffer_2_;
349 while (block1 < block1_limit) {
350 if (!BlockHash::BlockContentsMatch(block1, block2)) {
351 ++block_contents_match_result;
352 }
353 block1 += kBlockSize;
354 block2 += kBlockSize;
355 }
356 }
357 block_contents_match_timer.Stop();
358 double time_for_block_contents_match =
359 static_cast<double>(block_contents_match_timer.GetInUsec())
360 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
361 EXPECT_EQ(block_contents_match_result, block_compare_words_result);
362 if (should_be_identical) {
363 CHECK_EQ(0, block_compare_words_result);
364 } else {
365 CHECK_GT(block_compare_words_result, 0);
366 }
openvcdiffd1845782009-03-20 21:56:15 +0000367 std::cout << "BlockHash::BlockCompareWords: "
368 << time_for_block_compare_words << " us per operation" << std::endl;
369 std::cout << "BlockHash::BlockContentsMatch: "
370 << time_for_block_contents_match << " us per operation"
371 << std::endl;
openvcdiff311c7142008-08-26 19:29:25 +0000372 if (time_for_block_compare_words > 0) {
373 double percent_change =
374 (((time_for_block_contents_match - time_for_block_compare_words)
375 / time_for_block_compare_words) * 100.0);
376 if (percent_change >= 0.0) {
openvcdiffd1845782009-03-20 21:56:15 +0000377 std::cout << "BlockContentsMatch is " << percent_change << "%"
378 << " SLOWER than BlockCompareWords" << std::endl;
openvcdiff311c7142008-08-26 19:29:25 +0000379 } else {
openvcdiffd1845782009-03-20 21:56:15 +0000380 std::cout << "BlockContentsMatch is " << (-percent_change) << "%"
381 << " FASTER than BlockCompareWords" << std::endl;
openvcdiff311c7142008-08-26 19:29:25 +0000382 }
383 }
openvcdiff28db8072008-10-10 23:29:11 +0000384#if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS)
openvcdiff311c7142008-08-26 19:29:25 +0000385 // Only check timings for optimized build. There's plenty of margin: this
386 // check will fail only if BlockContentsMatch is at least twice as slow as
387 // BlockCompareWords.
388 EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match);
openvcdiff28db8072008-10-10 23:29:11 +0000389#endif // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS
openvcdiff311c7142008-08-26 19:29:25 +0000390}
391
392// The two strings passed to BlockHash::MatchingBytesToLeft do have matching
393// characters -- in fact, they're the same string -- but since max_bytes is zero
394// or negative, BlockHash::MatchingBytesToLeft should not read from the strings
395// and should return 0.
396TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
397 EXPECT_EQ(0, MatchingBytesToLeft(
398 &search_string[index_of_f_in_fearsome],
399 &search_string[index_of_f_in_fearsome],
400 0));
401 EXPECT_EQ(0, MatchingBytesToRight(
402 &search_string[index_of_f_in_fearsome],
403 &search_string[index_of_f_in_fearsome],
404 0));
405}
406
407TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
408 EXPECT_EQ(0, MatchingBytesToLeft(
409 &search_string[index_of_f_in_fearsome],
410 &search_string[index_of_f_in_fearsome],
411 -1));
412 EXPECT_EQ(0, MatchingBytesToLeft(
413 &search_string[index_of_f_in_fearsome],
414 &search_string[index_of_f_in_fearsome],
415 INT_MIN));
416 EXPECT_EQ(0, MatchingBytesToRight(
417 &search_string[index_of_f_in_fearsome],
418 &search_string[index_of_f_in_fearsome],
419 -1));
420 EXPECT_EQ(0, MatchingBytesToRight(
421 &search_string[index_of_f_in_fearsome],
422 &search_string[index_of_f_in_fearsome],
423 INT_MIN));
424}
425
426TEST_F(BlockHashTest, MaxBytesOneMatch) {
427 EXPECT_EQ(1, MatchingBytesToLeft(
428 &search_string[index_of_f_in_fearsome],
429 &search_string[index_of_f_in_fearsome],
430 1));
431 EXPECT_EQ(1, MatchingBytesToRight(
432 &search_string[index_of_f_in_fearsome],
433 &search_string[index_of_f_in_fearsome],
434 1));
435}
436
437TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
438 EXPECT_EQ(0, MatchingBytesToLeft(
439 &search_string[index_of_f_in_fearsome],
440 &search_string[index_of_second_e_in_what_we_hear],
441 1));
442 EXPECT_EQ(0, MatchingBytesToRight(
443 &search_string[index_of_f_in_fearsome],
444 &search_string[index_of_second_e_in_what_we_hear - 1],
445 1));
446}
447
448TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
449 // The number of bytes that match between the original "we hear is fearsome"
450 // and the altered "ve hear is fearsome".
451 const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
452 const int max_bytes = expected_length - 1;
453 EXPECT_EQ(max_bytes, MatchingBytesToLeft(
454 &search_string[index_of_f_in_fearsome],
455 &search_string_altered[index_of_f_in_fearsome],
456 max_bytes));
457}
458
459TEST_F(BlockHashTest, LeftNotLimited) {
460 // The number of bytes that match between the original "we hear is fearsome"
461 // and the altered "ve hear is fearsome".
462 const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
463 const int max_bytes = expected_length + 1;
464 EXPECT_EQ(expected_length, MatchingBytesToLeft(
465 &search_string[index_of_f_in_fearsome],
466 &search_string_altered[index_of_f_in_fearsome],
467 max_bytes));
468 EXPECT_EQ(expected_length, MatchingBytesToLeft(
469 &search_string[index_of_f_in_fearsome],
470 &search_string_altered[index_of_f_in_fearsome],
471 INT_MAX));
472}
473
474TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
475 // The number of bytes that match between the original "fearsome"
476 // and the altered "fearsomm".
477 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
478 + (kBlockSize - 1); // spacing between letters
479 const int max_bytes = expected_length - 1;
480 EXPECT_EQ(max_bytes, MatchingBytesToRight(
481 &search_string[index_of_f_in_fearsome],
482 &search_string_altered[index_of_f_in_fearsome],
483 max_bytes));
484}
485
486TEST_F(BlockHashTest, RightNotLimited) {
487 // The number of bytes that match between the original "we hear is fearsome"
488 // and the altered "ve hear is fearsome".
489 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
490 + (kBlockSize - 1); // spacing between letters
491 const int max_bytes = expected_length + 1;
492 EXPECT_EQ(expected_length, MatchingBytesToRight(
493 &search_string[index_of_f_in_fearsome],
494 &search_string_altered[index_of_f_in_fearsome],
495 max_bytes));
496 EXPECT_EQ(expected_length, MatchingBytesToRight(
497 &search_string[index_of_f_in_fearsome],
498 &search_string_altered[index_of_f_in_fearsome],
499 INT_MAX));
500}
501
502// If this test fails in a non-x86 or non-gcc environment, consider adding
503// -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and
504// Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc
505// to use a special implementation (BlockCompareWords) to compare blocks
506// rather than using standard memcmp.
507TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) {
508 compare_buffer_1_ = new char[kTimingTestSize];
509 compare_buffer_2_ = new char[kTimingTestSize];
510
511 // The value 0xBE is arbitrarily chosen. First test with identical contents
512 // in the buffers, so that the comparison functions cannot short-circuit
513 // and will return true.
514 memset(compare_buffer_1_, 0xBE, kTimingTestSize);
515 memset(compare_buffer_2_, 0xBE, kTimingTestSize);
openvcdiffd1845782009-03-20 21:56:15 +0000516 std::cout << "Comparing "
openvcdiff311c7142008-08-26 19:29:25 +0000517 << (kTimingTestSize / kBlockSize) << " identical values:"
openvcdiffd1845782009-03-20 21:56:15 +0000518 << std::endl;
openvcdiff311c7142008-08-26 19:29:25 +0000519 TestAndPrintTimesForCompareFunctions(true);
520
521 // Now change one value in the middle of one buffer, so that the contents
522 // are no longer the same.
523 compare_buffer_1_[kTimingTestSize / 2] = 0x00;
openvcdiffd1845782009-03-20 21:56:15 +0000524 std::cout << "Comparing "
openvcdiff311c7142008-08-26 19:29:25 +0000525 << ((kTimingTestSize / kBlockSize) - 1) << " identical values"
openvcdiffd1845782009-03-20 21:56:15 +0000526 << " and one mismatch:" << std::endl;
openvcdiff311c7142008-08-26 19:29:25 +0000527 TestAndPrintTimesForCompareFunctions(false);
528
529 // Set one of the bytes of each block to differ so that
530 // none of the compare operations will return true, and run timing tests.
531 // In practice, BlockHash::BlockContentsMatch will only be called
532 // for two blocks whose hash values match, and the two most important
533 // cases are: (1) the blocks are identical, or (2) none of their bytes match.
534 TimingTestForBlocksThatDifferAtByte(0);
535 TimingTestForBlocksThatDifferAtByte(1);
536 TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
537 TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);
538
539 delete[] compare_buffer_1_;
540 delete[] compare_buffer_2_;
541}
542
543TEST_F(BlockHashTest, FindFailsBeforeHashing) {
544 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
545}
546
547TEST_F(BlockHashTest, HashOneFindOne) {
548 for (int i = 0; i <= index_of_y_in_only; ++i) {
549 th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
550 }
551 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
552 test_string_y));
553 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
554}
555
556TEST_F(BlockHashTest, HashAllFindOne) {
557 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
558 test_string_y));
559 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
560}
561
562TEST_F(BlockHashTest, NonMatchingTextNotFound) {
563 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
564}
565
566// Search for unaligned text. The test string is contained in the
567// sample text (unlike the non-matching string in NonMatchingTextNotFound,
568// above), but it is not aligned on a block boundary. FindMatchingBlock
569// will only work if the test string is aligned on a block boundary.
570//
571// " T h e o n l y"
572// ^^^^ Here is the test string
573//
574TEST_F(BlockHashTest, UnalignedTextNotFound) {
575 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
576 test_string_unaligned_e));
577}
578
579TEST_F(BlockHashTest, FindSixMatches) {
580 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
581 test_string_e));
582 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
583 test_string_e));
584 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
585 test_string_e));
586 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
587 test_string_e));
588 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
589 test_string_e));
590 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
591 test_string_e));
592 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));
593
594 // Starting over gives same result
595 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
596 test_string_e));
597}
598
599TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
600 // Add hash values only for those characters before the fourth instance
601 // of "e" in the sample text. Tests that the ending index
602 // of AddAllBlocksThroughIndex() is not inclusive: only three matches
603 // for "e" should be found.
604 th_->AddAllBlocksThroughIndex(index_of_fourth_e);
605 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
606 test_string_e));
607 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
608 test_string_e));
609 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
610 test_string_e));
611 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
612
613 // Starting over gives same result
614 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
615 test_string_e));
616}
617
618// Try indices that are not even multiples of the block size.
619// Add three ranges and verify the results after each
620// call to AddAllBlocksThroughIndex().
621TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
622 th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
623 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
624 test_string_e));
625 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));
626
627 // Starting over gives same result
628 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
629 test_string_e));
630
631 // Add the second range to expand the result set
632 th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
633 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
634 test_string_e));
635 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
636 test_string_e));
637 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
638 test_string_e));
639 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
640
641 // Starting over gives same result
642 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
643 test_string_e));
644
645 // Add the third range to expand the result set
646 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
647
648 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
649 test_string_e));
650 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
651 test_string_e));
652 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
653 test_string_e));
654 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
655 test_string_e));
656 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
657
658 // Starting over gives same result
659 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
660 test_string_e));
661}
662
663#ifdef GTEST_HAS_DEATH_TEST
664TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
665 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
666
667 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
668 test_string_e));
669 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
670 test_string_e));
671 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
672 test_string_e));
673 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
674 test_string_e));
675 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
676
677 // Starting over gives same result
678 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
679 test_string_e));
680
681 // These calls will produce DFATAL error messages, and will do nothing,
682 // since the ranges have already been added.
683 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
684 "<");
685 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
686 "<");
687
688 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
689 test_string_e));
690 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
691 test_string_e));
692 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
693 test_string_e));
694 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
695 test_string_e));
696 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
697
698 // Starting over gives same result
699 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
700 test_string_e));
701}
702#endif // GTEST_HAS_DEATH_TEST
703
704TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
705 th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
706 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
707 test_string_e));
708 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
709 test_string_e));
710 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
711 test_string_e));
712 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
713 test_string_e));
714 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
715 test_string_e));
716 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
717 test_string_e));
718 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));
719
720 // Starting over gives same result
721 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
722 test_string_e));
723}
724
725TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
726 BlockHash zero_sized_hash(sample_text, 0, 0);
727 EXPECT_EQ(true, zero_sized_hash.Init(true));
728 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
729}
730
731#ifdef GTEST_HAS_DEATH_TEST
732TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
733 EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")),
734 "invalid");
735}
736
737TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) {
738 BlockHash bh(sample_text, strlen(sample_text), 0);
739 EXPECT_TRUE(bh.Init(false));
740 EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice");
741}
742
743TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) {
744 BlockHash bh(sample_text, strlen(sample_text), 0);
745 EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e),
746 "called before");
747}
748
749TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) {
openvcdiff@gmail.com732fff22010-08-04 18:00:00 +0000750 EXPECT_DEBUG_DEATH(
751 th_->AddAllBlocksThroughIndex(static_cast<int>(strlen(sample_text) + 1)),
752 "higher than end");
openvcdiff311c7142008-08-26 19:29:25 +0000753}
754#endif // GTEST_HAS_DEATH_TEST
755
756TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
757 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
758}
759
760TEST_F(BlockHashTest, FindBestMatch) {
761 dh_->FindBestMatch(hashed_f,
762 &search_string[index_of_f_in_fearsome],
763 search_string,
764 strlen(search_string),
765 &best_match_);
766 EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
767 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
768 // The match includes the spaces after the final character,
769 // which is why (kBlockSize - 1) is added to the expected best size.
770 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
771 best_match_.size());
772}
773
774TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
775 BlockHash th2(sample_text, strlen(sample_text), 0x10000);
776 th2.Init(true); // hash all blocks
777 th2.FindBestMatch(hashed_f,
778 &search_string[index_of_f_in_fearsome],
779 search_string,
780 strlen(search_string),
781 &best_match_);
782 // Offset should begin with dictionary_size
783 EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
784 best_match_.source_offset());
785 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
786 // The match includes the spaces after the final character,
787 // which is why (kBlockSize - 1) is added to the expected best size.
788 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
789 best_match_.size());
790}
791
792TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
793 // Hash the "i" in "fear itself"
794 uint32_t hash_value = RollingHash<kBlockSize>::Hash(
795 &search_to_end_string[index_of_i_in_itself]);
796 dh_->FindBestMatch(hash_value,
797 &search_to_end_string[index_of_i_in_itself],
798 search_to_end_string,
799 strlen(search_to_end_string),
800 &best_match_);
801 EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
802 EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
803 EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
804}
805
806TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
807 // Hash the "i" in "fear itself"
808 uint32_t hash_value = RollingHash<kBlockSize>::Hash(
809 &search_to_beginning_string[index_of_o_in_online]);
810 dh_->FindBestMatch(hash_value,
811 &search_to_beginning_string[index_of_o_in_online],
812 search_to_beginning_string,
813 strlen(search_to_beginning_string),
814 &best_match_);
815 EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary
816 EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
817 // The match includes the spaces after the final character,
818 // which is why (kBlockSize - 1) is added to the expected best size.
819 EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
820 best_match_.size());
821}
822
823TEST_F(BlockHashTest, BestMatchWithManyMatches) {
824 BlockHash many_matches_hash(sample_text_many_matches,
825 strlen(sample_text_many_matches),
826 0);
827 EXPECT_TRUE(many_matches_hash.Init(true));
828 // Hash the " a" at the beginning of the search string "ababc"
829 uint32_t hash_value =
830 RollingHash<kBlockSize>::Hash(search_string_many_matches);
831 many_matches_hash.FindBestMatch(hash_value,
832 search_string_many_matches,
833 search_string_many_matches,
834 strlen(search_string_many_matches),
835 &best_match_);
836 EXPECT_EQ(index_of_ababc, best_match_.source_offset());
837 EXPECT_EQ(0, best_match_.target_offset());
838 EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
839}
840
841TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
842 char* collision_search_string = new char[strlen(search_string) + 1];
843 memcpy(collision_search_string, search_string, strlen(search_string) + 1);
844 char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];
845
846 // Tweak the collision string so that it has the same hash value
847 // but different text. The last four characters of the search string
848 // should be " f", and the bytes given below have the same hash value
849 // as those characters.
850 CHECK_GE(kBlockSize, 4);
851 fearsome_location[kBlockSize - 4] = 0x84;
852 fearsome_location[kBlockSize - 3] = 0xF1;
853 fearsome_location[kBlockSize - 2] = 0x51;
854 fearsome_location[kBlockSize - 1] = 0x00;
855 EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
856 EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
857 fearsome_location,
858 kBlockSize));
859 // No match should be found this time.
860 dh_->FindBestMatch(hashed_f,
861 fearsome_location,
862 collision_search_string,
863 strlen(search_string), // since collision_search_string has embedded \0
864 &best_match_);
865 EXPECT_EQ(-1, best_match_.source_offset());
866 EXPECT_EQ(-1, best_match_.target_offset());
867 EXPECT_EQ(0U, best_match_.size());
868 delete[] collision_search_string;
869}
870
871// If the footprint passed to FindBestMatch does not actually match
872// the search string, it should not find any matches.
873TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
874 dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"!
875 &search_string[index_of_f_in_fearsome],
876 search_string,
877 strlen(search_string),
878 &best_match_);
879 EXPECT_EQ(-1, best_match_.source_offset());
880 EXPECT_EQ(-1, best_match_.target_offset());
881 EXPECT_EQ(0U, best_match_.size());
882}
883
884// Use a dictionary containing 1M copies of the letter 'Q',
885// and target data that also contains 1M Qs. If FindBestMatch
886// is not throttled to find a maximum number of matches, this
887// will take a very long time -- several seconds at least.
888// If this test appears to hang, it is because the throttling code
889// (see BlockHash::kMaxMatchesToCheck for details) is not working.
890TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
891 const int kTestSize = 1 << 20; // 1M
892 char* huge_dictionary = new char[kTestSize];
893 memset(huge_dictionary, 'Q', kTestSize);
894 BlockHash huge_bh(huge_dictionary, kTestSize, 0);
895 EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
896 char* huge_target = new char[kTestSize];
897 memset(huge_target, 'Q', kTestSize);
898 CycleTimer timer;
899 timer.Start();
900 huge_bh.FindBestMatch(hashed_all_Qs,
901 huge_target + (kTestSize / 2), // middle of target
902 huge_target,
903 kTestSize,
904 &best_match_);
905 timer.Stop();
906 double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
openvcdiffd1845782009-03-20 21:56:15 +0000907 std::cout << "Time to search for best match with 1M matches: "
908 << elapsed_time_in_us << " us" << std::endl;
openvcdiff311c7142008-08-26 19:29:25 +0000909 // All blocks match the candidate block. FindBestMatch should have checked
910 // a certain number of matches before giving up. The best match
911 // should include at least half the source and target, since the candidate
912 // block was in the middle of the target data.
913 EXPECT_GT((kTestSize / 2), best_match_.source_offset());
914 EXPECT_GT((kTestSize / 2), best_match_.target_offset());
915 EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
openvcdiffd1845782009-03-20 21:56:15 +0000916 EXPECT_GT(5000000, elapsed_time_in_us); // < 5 seconds
917#ifdef NDEBUG
openvcdiff311c7142008-08-26 19:29:25 +0000918 EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second
openvcdiffd1845782009-03-20 21:56:15 +0000919#endif // NDEBUG
openvcdiff311c7142008-08-26 19:29:25 +0000920 delete[] huge_target;
921 delete[] huge_dictionary;
922}
923
924#ifdef GTEST_HAS_DEATH_TEST
925TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
926 for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
927 th_->AddOneIndexHash(i * kBlockSize, hashed_e);
928 }
929 // Didn't expect another block to be added
930 EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
931 hashed_e),
932 "AddBlock");
933}
934#endif // GTEST_HAS_DEATH_TEST
935
936} // namespace open_vcdiff