blob: 6b826d10f50271192461bd3c4d0f0202f3e7f939 [file] [log] [blame]
Samuel Huang06f1ae92018-03-13 18:19:34 +00001// Copyright 2017 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/zucchini/equivalence_map.h"
6
7#include <cstring>
Samuel Huang8fdb8ba2018-06-26 14:47:02 +00008#include <deque>
9#include <map>
10#include <string>
Samuel Huang06f1ae92018-03-13 18:19:34 +000011#include <utility>
12#include <vector>
13
14#include "components/zucchini/encoded_view.h"
15#include "components/zucchini/image_index.h"
16#include "components/zucchini/suffix_array.h"
17#include "components/zucchini/targets_affinity.h"
18#include "components/zucchini/test_disassembler.h"
19#include "testing/gtest/include/gtest/gtest.h"
20
21namespace zucchini {
22
23namespace {
24
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +000025using OffsetDeque = std::deque<offset_t>;
Samuel Huang06f1ae92018-03-13 18:19:34 +000026
27// Make all references 2 bytes long.
28constexpr offset_t kReferenceSize = 2;
29
30// Creates and initialize an ImageIndex from |a| and with 2 types of references.
31// The result is populated with |refs0| and |refs1|. |a| is expected to be a
32// string literal valid for the lifetime of the object.
33ImageIndex MakeImageIndexForTesting(const char* a,
34 std::vector<Reference>&& refs0,
35 std::vector<Reference>&& refs1) {
36 TestDisassembler disasm(
37 {kReferenceSize, TypeTag(0), PoolTag(0)}, std::move(refs0),
38 {kReferenceSize, TypeTag(1), PoolTag(0)}, std::move(refs1),
39 {kReferenceSize, TypeTag(2), PoolTag(1)}, {});
40
41 ImageIndex image_index(
42 ConstBufferView(reinterpret_cast<const uint8_t*>(a), std::strlen(a)));
43
44 EXPECT_TRUE(image_index.Initialize(&disasm));
45 return image_index;
46}
47
48std::vector<TargetsAffinity> MakeTargetsAffinitiesForTesting(
49 const ImageIndex& old_image_index,
50 const ImageIndex& new_image_index,
51 const EquivalenceMap& equivalence_map) {
52 std::vector<TargetsAffinity> target_affinities(old_image_index.PoolCount());
53 for (const auto& old_pool_tag_and_targets : old_image_index.target_pools()) {
54 PoolTag pool_tag = old_pool_tag_and_targets.first;
55 target_affinities[pool_tag.value()].InferFromSimilarities(
56 equivalence_map, old_pool_tag_and_targets.second.targets(),
57 new_image_index.pool(pool_tag).targets());
58 }
59 return target_affinities;
60}
61
62} // namespace
63
64TEST(EquivalenceMapTest, GetTokenSimilarity) {
65 ImageIndex old_index = MakeImageIndexForTesting(
66 "ab1122334455", {{2, 0}, {4, 1}, {6, 2}, {8, 2}}, {{10, 3}});
67 // Note: {4, 1} -> {6, 3} and {6, 2} -> {4, 1}, then result is sorted.
68 ImageIndex new_index = MakeImageIndexForTesting(
69 "a11b33224455", {{1, 0}, {4, 1}, {6, 3}, {8, 1}}, {{10, 2}});
70 std::vector<TargetsAffinity> affinities = MakeTargetsAffinitiesForTesting(
71 old_index, new_index,
72 EquivalenceMap({{{0, 0, 1}, 1.0}, {{1, 3, 1}, 1.0}}));
73
74 // Raw match.
75 EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 0));
76 // Raw mismatch.
77 EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
78 EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 1, 0));
79
80 // Type mismatch.
81 EXPECT_EQ(kMismatchFatal,
82 GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
83 EXPECT_EQ(kMismatchFatal,
84 GetTokenSimilarity(old_index, new_index, affinities, 2, 0));
85 EXPECT_EQ(kMismatchFatal,
86 GetTokenSimilarity(old_index, new_index, affinities, 2, 10));
87 EXPECT_EQ(kMismatchFatal,
88 GetTokenSimilarity(old_index, new_index, affinities, 10, 1));
89
90 // Reference strong match.
91 EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
92 EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 4, 6));
93
94 // Reference weak match.
95 EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 4));
96 EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 8));
97 EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 8, 4));
98
99 // Weak match is not greater than strong match.
100 EXPECT_LE(GetTokenSimilarity(old_index, new_index, affinities, 6, 4),
101 GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
102
103 // Reference mismatch.
104 EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 4));
105 EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 6));
106}
107
108TEST(EquivalenceMapTest, GetEquivalenceSimilarity) {
109 ImageIndex image_index =
110 MakeImageIndexForTesting("abcdef1122", {{6, 0}}, {{8, 1}});
111 std::vector<TargetsAffinity> affinities =
112 MakeTargetsAffinitiesForTesting(image_index, image_index, {});
113
114 // Sanity check. These are no-op with length-0 equivalences.
115 EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
116 {0, 0, 0}));
117 EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
118 {0, 3, 0}));
119 EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
120 {3, 0, 0}));
121
122 // Now examine larger equivalences.
123 EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
124 {0, 0, 3}));
125 EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
126 {0, 3, 3}));
127 EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
128 {3, 0, 3}));
129
130 EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
131 {6, 6, 4}));
132}
133
134TEST(EquivalenceMapTest, ExtendEquivalenceForward) {
135 auto test_extend_forward =
136 [](const ImageIndex old_index, const ImageIndex new_index,
137 const EquivalenceCandidate& equivalence, double base_similarity) {
138 return ExtendEquivalenceForward(
139 old_index, new_index,
140 MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
141 equivalence, base_similarity)
142 .eq;
143 };
144
145 EXPECT_EQ(Equivalence({0, 0, 0}),
146 test_extend_forward(MakeImageIndexForTesting("", {}, {}),
147 MakeImageIndexForTesting("", {}, {}),
148 {{0, 0, 0}, 0.0}, 8.0));
149
150 EXPECT_EQ(Equivalence({0, 0, 0}),
151 test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
152 MakeImageIndexForTesting("zzzz", {}, {}),
153 {{0, 0, 0}, 0.0}, 8.0));
154
155 EXPECT_EQ(Equivalence({0, 0, 6}),
156 test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
157 MakeImageIndexForTesting("banana", {}, {}),
158 {{0, 0, 0}, 0.0}, 8.0));
159
160 EXPECT_EQ(Equivalence({2, 2, 4}),
161 test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
162 MakeImageIndexForTesting("banana", {}, {}),
163 {{2, 2, 0}, 0.0}, 8.0));
164
165 EXPECT_EQ(Equivalence({0, 0, 6}),
166 test_extend_forward(MakeImageIndexForTesting("bananaxx", {}, {}),
167 MakeImageIndexForTesting("bananayy", {}, {}),
168 {{0, 0, 0}, 0.0}, 8.0));
169
170 EXPECT_EQ(
171 Equivalence({0, 0, 8}),
172 test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
173 MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
174 {{0, 0, 0}, 0.0}, 8.0));
175
176 EXPECT_EQ(
177 Equivalence({0, 0, 6}),
178 test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
179 MakeImageIndexForTesting("banana22", {}, {{6, 0}}),
180 {{0, 0, 0}, 0.0}, 8.0));
181
182 EXPECT_EQ(
183 Equivalence({0, 0, 17}),
184 test_extend_forward(MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
185 MakeImageIndexForTesting("bananayypineapple", {}, {}),
186 {{0, 0, 0}, 0.0}, 8.0));
187
188 EXPECT_EQ(
189 Equivalence({3, 0, 19}),
190 test_extend_forward(
191 MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
192 MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
193 {{3, 0, 0}, 0.0}, 8.0));
194}
195
196TEST(EquivalenceMapTest, ExtendEquivalenceBackward) {
197 auto test_extend_backward =
198 [](const ImageIndex old_index, const ImageIndex new_index,
199 const EquivalenceCandidate& equivalence, double base_similarity) {
200 return ExtendEquivalenceBackward(
201 old_index, new_index,
202 MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
203 equivalence, base_similarity)
204 .eq;
205 };
206
207 EXPECT_EQ(Equivalence({0, 0, 0}),
208 test_extend_backward(MakeImageIndexForTesting("", {}, {}),
209 MakeImageIndexForTesting("", {}, {}),
210 {{0, 0, 0}, 0.0}, 8.0));
211
212 EXPECT_EQ(Equivalence({6, 4, 0}),
213 test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
214 MakeImageIndexForTesting("zzzz", {}, {}),
215 {{6, 4, 0}, 0.0}, 8.0));
216
217 EXPECT_EQ(Equivalence({0, 0, 6}),
218 test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
219 MakeImageIndexForTesting("banana", {}, {}),
220 {{6, 6, 0}, 0.0}, 8.0));
221
222 EXPECT_EQ(Equivalence({2, 2, 6}),
223 test_extend_backward(MakeImageIndexForTesting("xxbanana", {}, {}),
224 MakeImageIndexForTesting("yybanana", {}, {}),
225 {{8, 8, 0}, 0.0}, 8.0));
226
227 EXPECT_EQ(
228 Equivalence({0, 0, 8}),
229 test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
230 MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
231 {{8, 8, 0}, 0.0}, 8.0));
232
233 EXPECT_EQ(
234 Equivalence({2, 2, 6}),
235 test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
236 MakeImageIndexForTesting("22banana", {}, {{0, 0}}),
237 {{8, 8, 0}, 0.0}, 8.0));
238
239 EXPECT_EQ(Equivalence({0, 0, 17}),
240 test_extend_backward(
241 MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
242 MakeImageIndexForTesting("bananayypineapple", {}, {}),
243 {{8, 8, 9}, 9.0}, 8.0));
244
245 EXPECT_EQ(
246 Equivalence({3, 0, 19}),
247 test_extend_backward(
248 MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
249 MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
250 {{22, 19, 0}, 0.0}, 8.0));
251}
252
253TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) {
254 auto PruneEquivalencesAndSortBySourceTest =
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000255 [](std::deque<Equivalence>&& equivalences) {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000256 OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences);
Takuto Ikuta637057c2018-04-25 11:14:06 +0000257 return std::move(equivalences);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000258 };
259
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000260 EXPECT_EQ(std::deque<Equivalence>(),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000261 PruneEquivalencesAndSortBySourceTest({}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000262 EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000263 PruneEquivalencesAndSortBySourceTest({{0, 10, 1}}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000264 EXPECT_EQ(std::deque<Equivalence>(),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000265 PruneEquivalencesAndSortBySourceTest({{0, 10, 0}}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000266 EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}, {1, 11, 1}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000267 PruneEquivalencesAndSortBySourceTest({{0, 10, 1}, {1, 11, 1}}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000268 EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}, {2, 13, 1}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000269 PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 2}}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000270 EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000271 PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 1}}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000272 EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}, {2, 14, 1}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000273 PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 13, 2}}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000274 EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}, {1, 12, 3}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000275 PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 3}}));
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000276 EXPECT_EQ(std::deque<Equivalence>({{0, 10, 3}, {3, 16, 2}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000277 PruneEquivalencesAndSortBySourceTest(
278 {{0, 10, 3}, {1, 13, 3}, {3, 16, 2}})); // Pruning is greedy
279
280 // Consider following pattern that may cause O(n^2) behavior if not handled
281 // properly.
282 // ***************
283 // **********
284 // ********
285 // ******
286 // ****
287 // **
288 // ***************
289 // This test case makes sure the function does not stall on a large instance
290 // of this pattern.
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000291 EXPECT_EQ(std::deque<Equivalence>({{0, 10, +300000}, {300000, 30, +300000}}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000292 PruneEquivalencesAndSortBySourceTest([] {
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000293 std::deque<Equivalence> equivalenses;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000294 equivalenses.push_back({0, 10, +300000});
295 for (offset_t i = 0; i < 100000; ++i)
296 equivalenses.push_back({200000 + i, 20, +200000 - 2 * i});
297 equivalenses.push_back({300000, 30, +300000});
298 return equivalenses;
299 }()));
300}
301
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000302TEST(EquivalenceMapTest, NaiveExtendedForwardProject) {
303 constexpr size_t kOldImageSize = 1000U;
304 constexpr size_t kNewImageSize = 1000U;
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000305 OffsetMapper offset_mapper(std::deque<Equivalence>(), kOldImageSize,
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000306 kNewImageSize);
307
308 // Convenience function to declutter.
309 auto project = [&offset_mapper](const Equivalence& eq, offset_t offset) {
310 return offset_mapper.NaiveExtendedForwardProject(eq, offset);
311 };
312
313 // Equivalence with delta = 0.
314 Equivalence eq_stay = {10, 10, +5}; // [10,15) -> [10,15).
315 for (offset_t offset = 0U; offset < 1000U; ++offset) {
316 EXPECT_EQ(offset, project(eq_stay, offset));
317 }
318 // Saturate since result would overflow "new".
319 EXPECT_EQ(999U, project(eq_stay, 1000U));
320 EXPECT_EQ(999U, project(eq_stay, 2000U));
321 EXPECT_EQ(999U, project(eq_stay, kOffsetBound - 1));
322
323 // Equivalence with delta = -10.
324 Equivalence eq_dec = {20, 10, +12}; // [20,32) --> [10,22).
325 // Offsets in "old" block.
326 EXPECT_EQ(10U, project(eq_dec, 20U));
327 EXPECT_EQ(11U, project(eq_dec, 21U));
328 EXPECT_EQ(21U, project(eq_dec, 31U));
329 // Offsets before "old" block, no underflow
330 EXPECT_EQ(9U, project(eq_dec, 19U));
331 EXPECT_EQ(1U, project(eq_dec, 11U));
332 EXPECT_EQ(0U, project(eq_dec, 10U));
333 // Offsets before "old" block, underflow (possible since delta < 0).
334 EXPECT_EQ(0U, project(eq_dec, 9U));
335 EXPECT_EQ(0U, project(eq_dec, 5U));
336 EXPECT_EQ(0U, project(eq_dec, 0U));
337 // Offsets after "old" block, no overflow.
338 EXPECT_EQ(20U, project(eq_dec, 30U));
339 EXPECT_EQ(64U, project(eq_dec, 74U));
340 EXPECT_EQ(90U, project(eq_dec, 100U));
341 EXPECT_EQ(490U, project(eq_dec, 500U));
342 EXPECT_EQ(999U, project(eq_dec, 1009U));
343 // Offsets after "old" block, overflow.
344 EXPECT_EQ(999U, project(eq_dec, 1010U));
345 EXPECT_EQ(999U, project(eq_dec, 2000U));
346 EXPECT_EQ(999U, project(eq_dec, kOffsetBound - 1));
347
348 // Equivalence with delta = +10.
349 Equivalence eq_inc = {7, 17, +80}; // [7,87) --> [17,97).
350 // Offsets in "old" block.
351 EXPECT_EQ(17U, project(eq_inc, 7U));
352 EXPECT_EQ(60U, project(eq_inc, 50U));
353 EXPECT_EQ(96U, project(eq_inc, 86U));
354 // Offsets before "old" block, underflow impossible since delta >= 0.
355 EXPECT_EQ(16U, project(eq_inc, 6U));
356 EXPECT_EQ(10U, project(eq_inc, 0U));
357 // Offsets after "old" block, no overflow.
358 EXPECT_EQ(97U, project(eq_inc, 87U));
359 EXPECT_EQ(510U, project(eq_inc, 500U));
360 EXPECT_EQ(999U, project(eq_inc, 989U));
361 // Offsets after "old" block, overflow.
362 EXPECT_EQ(999U, project(eq_inc, 990U));
363 EXPECT_EQ(999U, project(eq_inc, 2000U));
364 EXPECT_EQ(999U, project(eq_inc, kOffsetBound - 1));
365}
366
367TEST(EquivalenceMapTest, ExtendedForwardProject) {
368 // EquivalenceMaps provided must be sorted by "old" offset, and pruned.
369 // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18).
370 OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 20U,
371 25U);
372 EXPECT_EQ(10U, offset_mapper1.ExtendedForwardProject(0U));
373 EXPECT_EQ(11U, offset_mapper1.ExtendedForwardProject(1U));
374 EXPECT_EQ(13U, offset_mapper1.ExtendedForwardProject(2U));
375 EXPECT_EQ(14U, offset_mapper1.ExtendedForwardProject(3U)); // Previous equiv.
376 EXPECT_EQ(16U, offset_mapper1.ExtendedForwardProject(4U));
377 EXPECT_EQ(17U, offset_mapper1.ExtendedForwardProject(5U));
378 EXPECT_EQ(18U, offset_mapper1.ExtendedForwardProject(6U)); // Previous equiv.
379 // Fake offsets.
380 EXPECT_EQ(25U, offset_mapper1.ExtendedForwardProject(20U));
381 EXPECT_EQ(26U, offset_mapper1.ExtendedForwardProject(21U));
382 EXPECT_EQ(1005U, offset_mapper1.ExtendedForwardProject(1000U));
383 EXPECT_EQ(kOffsetBound - 1,
384 offset_mapper1.ExtendedForwardProject(kOffsetBound - 1));
385
386 // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6).
387 OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 25U,
388 20U);
389 EXPECT_EQ(10U, offset_mapper2.ExtendedForwardProject(0U));
390 EXPECT_EQ(11U, offset_mapper2.ExtendedForwardProject(1U));
391 EXPECT_EQ(2U, offset_mapper2.ExtendedForwardProject(13U));
392 EXPECT_EQ(3U, offset_mapper2.ExtendedForwardProject(14U)); // Previous equiv.
393 EXPECT_EQ(4U, offset_mapper2.ExtendedForwardProject(16U));
394 EXPECT_EQ(5U, offset_mapper2.ExtendedForwardProject(17U));
395 EXPECT_EQ(6U, offset_mapper2.ExtendedForwardProject(18U)); // Previous equiv.
396 // Fake offsets.
397 EXPECT_EQ(20U, offset_mapper2.ExtendedForwardProject(25U));
398 EXPECT_EQ(21U, offset_mapper2.ExtendedForwardProject(26U));
399 EXPECT_EQ(995U, offset_mapper2.ExtendedForwardProject(1000U));
400 EXPECT_EQ(kOffsetBound - 1 - 5,
401 offset_mapper2.ExtendedForwardProject(kOffsetBound - 1));
402}
403
404TEST(EquivalenceMapTest, ExtendedForwardProjectEncoding) {
405 // Tests OffsetMapper::ExtendedForwardProject(), which maps every "old" offset
406 // to a "new" offset, with possible overlap (even though blocks don't
407 // overlap). Not testing real offsets only (no fake offsets).
408 // |old_spec| is a string like "<<aaAAaabbBBbcCCc>>":
409 // - Upper case letters are covered "old" offsets.
410 // - Lower case letters are non-covered offsets that are properly mapped using
411 // nearest "old" block.
412 // - '<' denotes underflow (clamped to 0).
413 // - '>' denotes overflow (clampled to "new" size - 1).
414 // |new_spec| is a string like "aaAA(ab)(ab)BBb..cCCc":
415 // - Upper and lower case letters are mapped "new" targets, occurring in the
416 // order that they appear in |old_spec|.
417 // - '.' are "new" offsets that appear as output.
418 // - '(' and ')' surround a single "new" location that are repeated as output.
419 int case_no = 0;
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000420 auto run_test = [&case_no](std::deque<Equivalence>&& equivalences,
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000421 const std::string& old_spec,
422 const std::string& new_spec) {
423 const size_t old_size = old_spec.length();
424 // Build expected "new" offsets, queue up for each letter.
425 std::map<char, std::deque<offset_t>> expected;
426 offset_t cur_new_offset = 0;
427 char state = ')'; // ')' = increase offset, '(' = stay.
428 for (char ch : new_spec) {
429 if (ch == '(' || ch == ')')
430 state = ch;
431 else
432 expected[ch].push_back(cur_new_offset);
433 cur_new_offset += (state == ')') ? 1 : 0;
434 }
435 const size_t new_size = cur_new_offset;
436 // Forward-project for each "old" index, pull from queue from matching
437 // letter, and compare.
438 OffsetMapper offset_mapper(std::move(equivalences), old_size, new_size);
439 for (offset_t old_offset = 0; old_offset < old_size; ++old_offset) {
440 offset_t new_offset = offset_mapper.ExtendedForwardProject(old_offset);
441 char ch = old_spec[old_offset];
442 if (ch == '<') { // Special case: Underflow.
443 EXPECT_EQ(0U, new_offset) << "in case " << case_no;
444 } else if (ch == '>') { // Special case: Overflow.
445 EXPECT_EQ(static_cast<offset_t>(new_size - 1), new_offset)
446 << "in case " << case_no;
447 } else {
448 std::deque<offset_t>& q = expected[ch];
449 ASSERT_FALSE(q.empty());
450 EXPECT_EQ(q.front(), new_offset) << "in case " << case_no;
451 q.pop_front();
452 if (q.empty())
453 expected.erase(ch);
454 }
455 }
456 // Clear useless '.', and ensure everything is consumed.
457 expected.erase('.');
458 EXPECT_TRUE(expected.empty()) << "in case " << case_no;
459 ++case_no;
460 };
461
462 // Trivial: [5,9) --> [5,9).
463 run_test({{5, 5, +4}}, "aaaaaAAAAaaaaa", "aaaaaAAAAaaaaa");
464 // Swap: [0,4) --> [6,10), [4,10) --> [0,6).
465 run_test({{0, 6, +4}, {4, 0, +6}}, "AAAABBBBBB", "BBBBBBAAAA");
466 // Overlap: [0,4) --> [2,6), [4,10) --> [3,9).
467 run_test({{0, 2, +4}, {4, 3, +6}}, "AAAABBBBBB", "..A(AB)(AB)(AB)BBB.");
468 // Converge: [1,3) --> [2,4), [7,8) --> [6,7).
469 run_test({{1, 2, +2}, {7, 6, +1}}, "aAAaabbBbb", ".aAA(ab)(ab)Bbb.");
470 // Converge with tie-breaker: [1,3) --> [2,4), [8,9) --> [7,8).
471 run_test({{1, 2, +2}, {8, 7, +1}}, "aAAaaabbBb", ".aAAa(ab)(ab)Bb.");
472 // Shift left: [6,8) --> [2,4): Underflow occurs.
473 run_test({{6, 2, +2}}, "<<<<aaAAaa", "aaAAaa....");
474 // Shift right: [2,5) --> [6,9): Overflow occurs.
475 run_test({{2, 6, +3}}, "aaAAAa>>>>", "....aaAAAa");
476 // Diverge: [3,5) --> [1,3], [7,9) --> [9,11).
477 run_test({{3, 1, +2}, {7, 9, +2}}, "<<aAAabBBb>>", "aAAa....bBBb");
478 // Pile-up: [0,2) --> [7,9), [9,11) --> [9,11), [18,20) --> [11,13).
479 run_test({{0, 7, +2}, {9, 9, +2}, {18, 11, +2}}, "AAaaaabbbBBbbbbcccCC",
480 "......b(Ab)(Abc)(Bac)(Bac)(Cab)(Cab)bb.....");
481 // Inverse pile-up: [7,9) --> [0,2), [9,11) --> [9,11), [13,15) --> [18,20).
482 run_test({{7, 0, +2}, {9, 9, +2}, {11, 18, +2}}, "<<<<<<<AABBCC>>>>>>>",
483 "AA.......BB.......CC");
484 // Sparse rotate: [3,4) -> [10,11), [10,11) --> [17,18), [17,18) --> [3,4).
485 run_test({{3, 10, +1}, {10, 17, +1}, {17, 3, +1}}, "aaaAaaabbbBbbbcccCccc",
486 "cccCcccaaaAaaabbbBbbb");
487 // Messy swap: [2,4) --> [10,12), [12,16) --> [3,7).
488 run_test({{2, 10, +2}, {12, 3, +4}}, "aaAAaa>><bbbBBBBbb",
489 "bbbBBBBb(ab)aAAaa");
490 // Messy expand: [6,8) --> [3,5), [10,11) -> [11,12), [14,17) --> [16,19).
491 run_test({{6, 3, +2}, {10, 11, +1}, {14, 16, +3}}, "<<<aaaAAabBbbcCCCc>>>>>",
492 "aaaAAa....bBbb.cCCCc");
493 // Interleave: [1,2) --> [0,1), [5,6) --> [10,11), [6,8) --> [3,5),
494 // [11,13) --> [12,14), [14,16) --> [6,8), [17,18) --> [17,18).
495 run_test({{1, 0, +1},
496 {5, 10, +1},
497 {6, 3, +2},
498 {11, 12, +2},
499 {14, 6, +2},
500 {17, 17, +1}},
501 "<AaabBCCccdDDdEEeFf>", "AaaCCc(Ec)EebBdDDd..Ff");
502}
503
504TEST(EquivalenceMapTest, ForwardProjectAll) {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000505 auto ForwardProjectAllTest = [](const OffsetMapper& offset_mapper,
506 std::initializer_list<offset_t> offsets) {
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000507 std::deque<offset_t> offsets_vec(offsets);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000508 offset_mapper.ForwardProjectAll(&offsets_vec);
509 return offsets_vec;
510 };
511
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000512 // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18).
513 OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 100U,
514 100U);
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000515 EXPECT_EQ(OffsetDeque({10}), ForwardProjectAllTest(offset_mapper1, {0}));
516 EXPECT_EQ(OffsetDeque({13}), ForwardProjectAllTest(offset_mapper1, {2}));
517 EXPECT_EQ(OffsetDeque({}), ForwardProjectAllTest(offset_mapper1, {3}));
518 EXPECT_EQ(OffsetDeque({10, 13}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000519 ForwardProjectAllTest(offset_mapper1, {0, 2}));
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000520 EXPECT_EQ(OffsetDeque({11, 13, 17}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000521 ForwardProjectAllTest(offset_mapper1, {1, 2, 5}));
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000522 EXPECT_EQ(OffsetDeque({11, 17}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000523 ForwardProjectAllTest(offset_mapper1, {1, 3, 5}));
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000524 EXPECT_EQ(OffsetDeque({10, 11, 13, 16, 17}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000525 ForwardProjectAllTest(offset_mapper1, {0, 1, 2, 3, 4, 5, 6}));
526
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000527 // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6).
528 OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 100U,
529 100U);
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000530 EXPECT_EQ(OffsetDeque({2}), ForwardProjectAllTest(offset_mapper2, {13}));
531 EXPECT_EQ(OffsetDeque({10, 2}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000532 ForwardProjectAllTest(offset_mapper2, {0, 13}));
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000533 EXPECT_EQ(OffsetDeque({11, 2, 5}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000534 ForwardProjectAllTest(offset_mapper2, {1, 13, 17}));
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000535 EXPECT_EQ(OffsetDeque({11, 5}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000536 ForwardProjectAllTest(offset_mapper2, {1, 14, 17}));
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000537 EXPECT_EQ(OffsetDeque({10, 11, 2, 4, 5}),
Samuel Huang06f1ae92018-03-13 18:19:34 +0000538 ForwardProjectAllTest(offset_mapper2, {0, 1, 13, 14, 16, 17, 18}));
539}
540
Samuel Huang06f1ae92018-03-13 18:19:34 +0000541TEST(EquivalenceMapTest, Build) {
542 auto test_build_equivalence = [](const ImageIndex old_index,
543 const ImageIndex new_index,
544 double minimum_similarity) {
545 auto affinities = MakeTargetsAffinitiesForTesting(old_index, new_index, {});
546
547 EncodedView old_view(old_index);
548 EncodedView new_view(new_index);
549
550 for (const auto& old_pool_tag_and_targets : old_index.target_pools()) {
551 PoolTag pool_tag = old_pool_tag_and_targets.first;
552 std::vector<uint32_t> old_labels;
553 std::vector<uint32_t> new_labels;
554 size_t label_bound = affinities[pool_tag.value()].AssignLabels(
555 1.0, &old_labels, &new_labels);
556 old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
557 new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
558 }
559
560 std::vector<offset_t> old_sa =
561 MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
562
563 EquivalenceMap equivalence_map;
564 equivalence_map.Build(old_sa, old_view, new_view, affinities,
565 minimum_similarity);
566
567 offset_t current_dst_offset = 0;
568 offset_t coverage = 0;
569 for (const auto& candidate : equivalence_map) {
570 EXPECT_GE(candidate.eq.dst_offset, current_dst_offset);
571 EXPECT_GT(candidate.eq.length, offset_t(0));
572 EXPECT_LE(candidate.eq.src_offset + candidate.eq.length,
573 old_index.size());
574 EXPECT_LE(candidate.eq.dst_offset + candidate.eq.length,
575 new_index.size());
576 EXPECT_GE(candidate.similarity, minimum_similarity);
577 current_dst_offset = candidate.eq.dst_offset;
578 coverage += candidate.eq.length;
579 }
580 return coverage;
581 };
582
583 EXPECT_EQ(0U,
584 test_build_equivalence(MakeImageIndexForTesting("", {}, {}),
585 MakeImageIndexForTesting("", {}, {}), 4.0));
586
587 EXPECT_EQ(0U, test_build_equivalence(
588 MakeImageIndexForTesting("", {}, {}),
589 MakeImageIndexForTesting("banana", {}, {}), 4.0));
590
591 EXPECT_EQ(0U,
592 test_build_equivalence(MakeImageIndexForTesting("banana", {}, {}),
593 MakeImageIndexForTesting("", {}, {}), 4.0));
594
595 EXPECT_EQ(0U, test_build_equivalence(
596 MakeImageIndexForTesting("banana", {}, {}),
597 MakeImageIndexForTesting("zzzz", {}, {}), 4.0));
598
599 EXPECT_EQ(6U, test_build_equivalence(
600 MakeImageIndexForTesting("banana", {}, {}),
601 MakeImageIndexForTesting("banana", {}, {}), 4.0));
602
603 EXPECT_EQ(6U, test_build_equivalence(
604 MakeImageIndexForTesting("bananaxx", {}, {}),
605 MakeImageIndexForTesting("bananayy", {}, {}), 4.0));
606
607 EXPECT_EQ(8U, test_build_equivalence(
608 MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
609 MakeImageIndexForTesting("banana11", {{6, 0}}, {}), 4.0));
610
611 EXPECT_EQ(6U, test_build_equivalence(
612 MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
613 MakeImageIndexForTesting("banana22", {}, {{6, 0}}), 4.0));
614
615 EXPECT_EQ(
616 15U,
617 test_build_equivalence(
618 MakeImageIndexForTesting("banana11pineapple", {{6, 0}}, {}),
619 MakeImageIndexForTesting("banana22pineapple", {}, {{6, 0}}), 4.0));
620
621 EXPECT_EQ(
622 15U,
623 test_build_equivalence(
624 MakeImageIndexForTesting("bananaxxxxxxxxpineapple", {}, {}),
625 MakeImageIndexForTesting("bananayyyyyyyypineapple", {}, {}), 4.0));
626
627 EXPECT_EQ(
628 19U,
629 test_build_equivalence(
630 MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
631 MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
632 4.0));
633}
634
635} // namespace zucchini