Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "components/zucchini/equivalence_map.h" |
| 6 | |
| 7 | #include <cstring> |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 8 | #include <deque> |
| 9 | #include <map> |
| 10 | #include <string> |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 11 | #include <utility> |
| 12 | #include <vector> |
| 13 | |
| 14 | #include "components/zucchini/encoded_view.h" |
| 15 | #include "components/zucchini/image_index.h" |
| 16 | #include "components/zucchini/suffix_array.h" |
| 17 | #include "components/zucchini/targets_affinity.h" |
| 18 | #include "components/zucchini/test_disassembler.h" |
| 19 | #include "testing/gtest/include/gtest/gtest.h" |
| 20 | |
| 21 | namespace zucchini { |
| 22 | |
| 23 | namespace { |
| 24 | |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 25 | using OffsetDeque = std::deque<offset_t>; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 26 | |
| 27 | // Make all references 2 bytes long. |
| 28 | constexpr offset_t kReferenceSize = 2; |
| 29 | |
| 30 | // Creates and initialize an ImageIndex from |a| and with 2 types of references. |
| 31 | // The result is populated with |refs0| and |refs1|. |a| is expected to be a |
| 32 | // string literal valid for the lifetime of the object. |
| 33 | ImageIndex MakeImageIndexForTesting(const char* a, |
| 34 | std::vector<Reference>&& refs0, |
| 35 | std::vector<Reference>&& refs1) { |
| 36 | TestDisassembler disasm( |
| 37 | {kReferenceSize, TypeTag(0), PoolTag(0)}, std::move(refs0), |
| 38 | {kReferenceSize, TypeTag(1), PoolTag(0)}, std::move(refs1), |
| 39 | {kReferenceSize, TypeTag(2), PoolTag(1)}, {}); |
| 40 | |
| 41 | ImageIndex image_index( |
| 42 | ConstBufferView(reinterpret_cast<const uint8_t*>(a), std::strlen(a))); |
| 43 | |
| 44 | EXPECT_TRUE(image_index.Initialize(&disasm)); |
| 45 | return image_index; |
| 46 | } |
| 47 | |
| 48 | std::vector<TargetsAffinity> MakeTargetsAffinitiesForTesting( |
| 49 | const ImageIndex& old_image_index, |
| 50 | const ImageIndex& new_image_index, |
| 51 | const EquivalenceMap& equivalence_map) { |
| 52 | std::vector<TargetsAffinity> target_affinities(old_image_index.PoolCount()); |
| 53 | for (const auto& old_pool_tag_and_targets : old_image_index.target_pools()) { |
| 54 | PoolTag pool_tag = old_pool_tag_and_targets.first; |
| 55 | target_affinities[pool_tag.value()].InferFromSimilarities( |
| 56 | equivalence_map, old_pool_tag_and_targets.second.targets(), |
| 57 | new_image_index.pool(pool_tag).targets()); |
| 58 | } |
| 59 | return target_affinities; |
| 60 | } |
| 61 | |
| 62 | } // namespace |
| 63 | |
| 64 | TEST(EquivalenceMapTest, GetTokenSimilarity) { |
| 65 | ImageIndex old_index = MakeImageIndexForTesting( |
| 66 | "ab1122334455", {{2, 0}, {4, 1}, {6, 2}, {8, 2}}, {{10, 3}}); |
| 67 | // Note: {4, 1} -> {6, 3} and {6, 2} -> {4, 1}, then result is sorted. |
| 68 | ImageIndex new_index = MakeImageIndexForTesting( |
| 69 | "a11b33224455", {{1, 0}, {4, 1}, {6, 3}, {8, 1}}, {{10, 2}}); |
| 70 | std::vector<TargetsAffinity> affinities = MakeTargetsAffinitiesForTesting( |
| 71 | old_index, new_index, |
| 72 | EquivalenceMap({{{0, 0, 1}, 1.0}, {{1, 3, 1}, 1.0}})); |
| 73 | |
| 74 | // Raw match. |
| 75 | EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 0)); |
| 76 | // Raw mismatch. |
| 77 | EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 1)); |
| 78 | EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 1, 0)); |
| 79 | |
| 80 | // Type mismatch. |
| 81 | EXPECT_EQ(kMismatchFatal, |
| 82 | GetTokenSimilarity(old_index, new_index, affinities, 0, 1)); |
| 83 | EXPECT_EQ(kMismatchFatal, |
| 84 | GetTokenSimilarity(old_index, new_index, affinities, 2, 0)); |
| 85 | EXPECT_EQ(kMismatchFatal, |
| 86 | GetTokenSimilarity(old_index, new_index, affinities, 2, 10)); |
| 87 | EXPECT_EQ(kMismatchFatal, |
| 88 | GetTokenSimilarity(old_index, new_index, affinities, 10, 1)); |
| 89 | |
| 90 | // Reference strong match. |
| 91 | EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 1)); |
| 92 | EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 4, 6)); |
| 93 | |
| 94 | // Reference weak match. |
| 95 | EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 4)); |
| 96 | EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 8)); |
| 97 | EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 8, 4)); |
| 98 | |
| 99 | // Weak match is not greater than strong match. |
| 100 | EXPECT_LE(GetTokenSimilarity(old_index, new_index, affinities, 6, 4), |
| 101 | GetTokenSimilarity(old_index, new_index, affinities, 2, 1)); |
| 102 | |
| 103 | // Reference mismatch. |
| 104 | EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 4)); |
| 105 | EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 6)); |
| 106 | } |
| 107 | |
| 108 | TEST(EquivalenceMapTest, GetEquivalenceSimilarity) { |
| 109 | ImageIndex image_index = |
| 110 | MakeImageIndexForTesting("abcdef1122", {{6, 0}}, {{8, 1}}); |
| 111 | std::vector<TargetsAffinity> affinities = |
| 112 | MakeTargetsAffinitiesForTesting(image_index, image_index, {}); |
| 113 | |
| 114 | // Sanity check. These are no-op with length-0 equivalences. |
| 115 | EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, |
| 116 | {0, 0, 0})); |
| 117 | EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, |
| 118 | {0, 3, 0})); |
| 119 | EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, |
| 120 | {3, 0, 0})); |
| 121 | |
| 122 | // Now examine larger equivalences. |
| 123 | EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, |
| 124 | {0, 0, 3})); |
| 125 | EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, |
| 126 | {0, 3, 3})); |
| 127 | EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, |
| 128 | {3, 0, 3})); |
| 129 | |
| 130 | EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities, |
| 131 | {6, 6, 4})); |
| 132 | } |
| 133 | |
| 134 | TEST(EquivalenceMapTest, ExtendEquivalenceForward) { |
| 135 | auto test_extend_forward = |
| 136 | [](const ImageIndex old_index, const ImageIndex new_index, |
| 137 | const EquivalenceCandidate& equivalence, double base_similarity) { |
| 138 | return ExtendEquivalenceForward( |
| 139 | old_index, new_index, |
| 140 | MakeTargetsAffinitiesForTesting(old_index, new_index, {}), |
| 141 | equivalence, base_similarity) |
| 142 | .eq; |
| 143 | }; |
| 144 | |
| 145 | EXPECT_EQ(Equivalence({0, 0, 0}), |
| 146 | test_extend_forward(MakeImageIndexForTesting("", {}, {}), |
| 147 | MakeImageIndexForTesting("", {}, {}), |
| 148 | {{0, 0, 0}, 0.0}, 8.0)); |
| 149 | |
| 150 | EXPECT_EQ(Equivalence({0, 0, 0}), |
| 151 | test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), |
| 152 | MakeImageIndexForTesting("zzzz", {}, {}), |
| 153 | {{0, 0, 0}, 0.0}, 8.0)); |
| 154 | |
| 155 | EXPECT_EQ(Equivalence({0, 0, 6}), |
| 156 | test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), |
| 157 | MakeImageIndexForTesting("banana", {}, {}), |
| 158 | {{0, 0, 0}, 0.0}, 8.0)); |
| 159 | |
| 160 | EXPECT_EQ(Equivalence({2, 2, 4}), |
| 161 | test_extend_forward(MakeImageIndexForTesting("banana", {}, {}), |
| 162 | MakeImageIndexForTesting("banana", {}, {}), |
| 163 | {{2, 2, 0}, 0.0}, 8.0)); |
| 164 | |
| 165 | EXPECT_EQ(Equivalence({0, 0, 6}), |
| 166 | test_extend_forward(MakeImageIndexForTesting("bananaxx", {}, {}), |
| 167 | MakeImageIndexForTesting("bananayy", {}, {}), |
| 168 | {{0, 0, 0}, 0.0}, 8.0)); |
| 169 | |
| 170 | EXPECT_EQ( |
| 171 | Equivalence({0, 0, 8}), |
| 172 | test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}), |
| 173 | MakeImageIndexForTesting("banana11", {{6, 0}}, {}), |
| 174 | {{0, 0, 0}, 0.0}, 8.0)); |
| 175 | |
| 176 | EXPECT_EQ( |
| 177 | Equivalence({0, 0, 6}), |
| 178 | test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}), |
| 179 | MakeImageIndexForTesting("banana22", {}, {{6, 0}}), |
| 180 | {{0, 0, 0}, 0.0}, 8.0)); |
| 181 | |
| 182 | EXPECT_EQ( |
| 183 | Equivalence({0, 0, 17}), |
| 184 | test_extend_forward(MakeImageIndexForTesting("bananaxxpineapple", {}, {}), |
| 185 | MakeImageIndexForTesting("bananayypineapple", {}, {}), |
| 186 | {{0, 0, 0}, 0.0}, 8.0)); |
| 187 | |
| 188 | EXPECT_EQ( |
| 189 | Equivalence({3, 0, 19}), |
| 190 | test_extend_forward( |
| 191 | MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), |
| 192 | MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), |
| 193 | {{3, 0, 0}, 0.0}, 8.0)); |
| 194 | } |
| 195 | |
| 196 | TEST(EquivalenceMapTest, ExtendEquivalenceBackward) { |
| 197 | auto test_extend_backward = |
| 198 | [](const ImageIndex old_index, const ImageIndex new_index, |
| 199 | const EquivalenceCandidate& equivalence, double base_similarity) { |
| 200 | return ExtendEquivalenceBackward( |
| 201 | old_index, new_index, |
| 202 | MakeTargetsAffinitiesForTesting(old_index, new_index, {}), |
| 203 | equivalence, base_similarity) |
| 204 | .eq; |
| 205 | }; |
| 206 | |
| 207 | EXPECT_EQ(Equivalence({0, 0, 0}), |
| 208 | test_extend_backward(MakeImageIndexForTesting("", {}, {}), |
| 209 | MakeImageIndexForTesting("", {}, {}), |
| 210 | {{0, 0, 0}, 0.0}, 8.0)); |
| 211 | |
| 212 | EXPECT_EQ(Equivalence({6, 4, 0}), |
| 213 | test_extend_backward(MakeImageIndexForTesting("banana", {}, {}), |
| 214 | MakeImageIndexForTesting("zzzz", {}, {}), |
| 215 | {{6, 4, 0}, 0.0}, 8.0)); |
| 216 | |
| 217 | EXPECT_EQ(Equivalence({0, 0, 6}), |
| 218 | test_extend_backward(MakeImageIndexForTesting("banana", {}, {}), |
| 219 | MakeImageIndexForTesting("banana", {}, {}), |
| 220 | {{6, 6, 0}, 0.0}, 8.0)); |
| 221 | |
| 222 | EXPECT_EQ(Equivalence({2, 2, 6}), |
| 223 | test_extend_backward(MakeImageIndexForTesting("xxbanana", {}, {}), |
| 224 | MakeImageIndexForTesting("yybanana", {}, {}), |
| 225 | {{8, 8, 0}, 0.0}, 8.0)); |
| 226 | |
| 227 | EXPECT_EQ( |
| 228 | Equivalence({0, 0, 8}), |
| 229 | test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}), |
| 230 | MakeImageIndexForTesting("11banana", {{0, 0}}, {}), |
| 231 | {{8, 8, 0}, 0.0}, 8.0)); |
| 232 | |
| 233 | EXPECT_EQ( |
| 234 | Equivalence({2, 2, 6}), |
| 235 | test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}), |
| 236 | MakeImageIndexForTesting("22banana", {}, {{0, 0}}), |
| 237 | {{8, 8, 0}, 0.0}, 8.0)); |
| 238 | |
| 239 | EXPECT_EQ(Equivalence({0, 0, 17}), |
| 240 | test_extend_backward( |
| 241 | MakeImageIndexForTesting("bananaxxpineapple", {}, {}), |
| 242 | MakeImageIndexForTesting("bananayypineapple", {}, {}), |
| 243 | {{8, 8, 9}, 9.0}, 8.0)); |
| 244 | |
| 245 | EXPECT_EQ( |
| 246 | Equivalence({3, 0, 19}), |
| 247 | test_extend_backward( |
| 248 | MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), |
| 249 | MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), |
| 250 | {{22, 19, 0}, 0.0}, 8.0)); |
| 251 | } |
| 252 | |
| 253 | TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) { |
| 254 | auto PruneEquivalencesAndSortBySourceTest = |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 255 | [](std::deque<Equivalence>&& equivalences) { |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 256 | OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences); |
Takuto Ikuta | 637057c | 2018-04-25 11:14:06 +0000 | [diff] [blame] | 257 | return std::move(equivalences); |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 258 | }; |
| 259 | |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 260 | EXPECT_EQ(std::deque<Equivalence>(), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 261 | PruneEquivalencesAndSortBySourceTest({})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 262 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 263 | PruneEquivalencesAndSortBySourceTest({{0, 10, 1}})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 264 | EXPECT_EQ(std::deque<Equivalence>(), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 265 | PruneEquivalencesAndSortBySourceTest({{0, 10, 0}})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 266 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}, {1, 11, 1}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 267 | PruneEquivalencesAndSortBySourceTest({{0, 10, 1}, {1, 11, 1}})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 268 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}, {2, 13, 1}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 269 | PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 2}})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 270 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 271 | PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 1}})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 272 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}, {2, 14, 1}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 273 | PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 13, 2}})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 274 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}, {1, 12, 3}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 275 | PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 3}})); |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 276 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, 3}, {3, 16, 2}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 277 | PruneEquivalencesAndSortBySourceTest( |
| 278 | {{0, 10, 3}, {1, 13, 3}, {3, 16, 2}})); // Pruning is greedy |
| 279 | |
| 280 | // Consider following pattern that may cause O(n^2) behavior if not handled |
| 281 | // properly. |
| 282 | // *************** |
| 283 | // ********** |
| 284 | // ******** |
| 285 | // ****** |
| 286 | // **** |
| 287 | // ** |
| 288 | // *************** |
| 289 | // This test case makes sure the function does not stall on a large instance |
| 290 | // of this pattern. |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 291 | EXPECT_EQ(std::deque<Equivalence>({{0, 10, +300000}, {300000, 30, +300000}}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 292 | PruneEquivalencesAndSortBySourceTest([] { |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 293 | std::deque<Equivalence> equivalenses; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 294 | equivalenses.push_back({0, 10, +300000}); |
| 295 | for (offset_t i = 0; i < 100000; ++i) |
| 296 | equivalenses.push_back({200000 + i, 20, +200000 - 2 * i}); |
| 297 | equivalenses.push_back({300000, 30, +300000}); |
| 298 | return equivalenses; |
| 299 | }())); |
| 300 | } |
| 301 | |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 302 | TEST(EquivalenceMapTest, NaiveExtendedForwardProject) { |
| 303 | constexpr size_t kOldImageSize = 1000U; |
| 304 | constexpr size_t kNewImageSize = 1000U; |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 305 | OffsetMapper offset_mapper(std::deque<Equivalence>(), kOldImageSize, |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 306 | kNewImageSize); |
| 307 | |
| 308 | // Convenience function to declutter. |
| 309 | auto project = [&offset_mapper](const Equivalence& eq, offset_t offset) { |
| 310 | return offset_mapper.NaiveExtendedForwardProject(eq, offset); |
| 311 | }; |
| 312 | |
| 313 | // Equivalence with delta = 0. |
| 314 | Equivalence eq_stay = {10, 10, +5}; // [10,15) -> [10,15). |
| 315 | for (offset_t offset = 0U; offset < 1000U; ++offset) { |
| 316 | EXPECT_EQ(offset, project(eq_stay, offset)); |
| 317 | } |
| 318 | // Saturate since result would overflow "new". |
| 319 | EXPECT_EQ(999U, project(eq_stay, 1000U)); |
| 320 | EXPECT_EQ(999U, project(eq_stay, 2000U)); |
| 321 | EXPECT_EQ(999U, project(eq_stay, kOffsetBound - 1)); |
| 322 | |
| 323 | // Equivalence with delta = -10. |
| 324 | Equivalence eq_dec = {20, 10, +12}; // [20,32) --> [10,22). |
| 325 | // Offsets in "old" block. |
| 326 | EXPECT_EQ(10U, project(eq_dec, 20U)); |
| 327 | EXPECT_EQ(11U, project(eq_dec, 21U)); |
| 328 | EXPECT_EQ(21U, project(eq_dec, 31U)); |
| 329 | // Offsets before "old" block, no underflow |
| 330 | EXPECT_EQ(9U, project(eq_dec, 19U)); |
| 331 | EXPECT_EQ(1U, project(eq_dec, 11U)); |
| 332 | EXPECT_EQ(0U, project(eq_dec, 10U)); |
| 333 | // Offsets before "old" block, underflow (possible since delta < 0). |
| 334 | EXPECT_EQ(0U, project(eq_dec, 9U)); |
| 335 | EXPECT_EQ(0U, project(eq_dec, 5U)); |
| 336 | EXPECT_EQ(0U, project(eq_dec, 0U)); |
| 337 | // Offsets after "old" block, no overflow. |
| 338 | EXPECT_EQ(20U, project(eq_dec, 30U)); |
| 339 | EXPECT_EQ(64U, project(eq_dec, 74U)); |
| 340 | EXPECT_EQ(90U, project(eq_dec, 100U)); |
| 341 | EXPECT_EQ(490U, project(eq_dec, 500U)); |
| 342 | EXPECT_EQ(999U, project(eq_dec, 1009U)); |
| 343 | // Offsets after "old" block, overflow. |
| 344 | EXPECT_EQ(999U, project(eq_dec, 1010U)); |
| 345 | EXPECT_EQ(999U, project(eq_dec, 2000U)); |
| 346 | EXPECT_EQ(999U, project(eq_dec, kOffsetBound - 1)); |
| 347 | |
| 348 | // Equivalence with delta = +10. |
| 349 | Equivalence eq_inc = {7, 17, +80}; // [7,87) --> [17,97). |
| 350 | // Offsets in "old" block. |
| 351 | EXPECT_EQ(17U, project(eq_inc, 7U)); |
| 352 | EXPECT_EQ(60U, project(eq_inc, 50U)); |
| 353 | EXPECT_EQ(96U, project(eq_inc, 86U)); |
| 354 | // Offsets before "old" block, underflow impossible since delta >= 0. |
| 355 | EXPECT_EQ(16U, project(eq_inc, 6U)); |
| 356 | EXPECT_EQ(10U, project(eq_inc, 0U)); |
| 357 | // Offsets after "old" block, no overflow. |
| 358 | EXPECT_EQ(97U, project(eq_inc, 87U)); |
| 359 | EXPECT_EQ(510U, project(eq_inc, 500U)); |
| 360 | EXPECT_EQ(999U, project(eq_inc, 989U)); |
| 361 | // Offsets after "old" block, overflow. |
| 362 | EXPECT_EQ(999U, project(eq_inc, 990U)); |
| 363 | EXPECT_EQ(999U, project(eq_inc, 2000U)); |
| 364 | EXPECT_EQ(999U, project(eq_inc, kOffsetBound - 1)); |
| 365 | } |
| 366 | |
| 367 | TEST(EquivalenceMapTest, ExtendedForwardProject) { |
| 368 | // EquivalenceMaps provided must be sorted by "old" offset, and pruned. |
| 369 | // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18). |
| 370 | OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 20U, |
| 371 | 25U); |
| 372 | EXPECT_EQ(10U, offset_mapper1.ExtendedForwardProject(0U)); |
| 373 | EXPECT_EQ(11U, offset_mapper1.ExtendedForwardProject(1U)); |
| 374 | EXPECT_EQ(13U, offset_mapper1.ExtendedForwardProject(2U)); |
| 375 | EXPECT_EQ(14U, offset_mapper1.ExtendedForwardProject(3U)); // Previous equiv. |
| 376 | EXPECT_EQ(16U, offset_mapper1.ExtendedForwardProject(4U)); |
| 377 | EXPECT_EQ(17U, offset_mapper1.ExtendedForwardProject(5U)); |
| 378 | EXPECT_EQ(18U, offset_mapper1.ExtendedForwardProject(6U)); // Previous equiv. |
| 379 | // Fake offsets. |
| 380 | EXPECT_EQ(25U, offset_mapper1.ExtendedForwardProject(20U)); |
| 381 | EXPECT_EQ(26U, offset_mapper1.ExtendedForwardProject(21U)); |
| 382 | EXPECT_EQ(1005U, offset_mapper1.ExtendedForwardProject(1000U)); |
| 383 | EXPECT_EQ(kOffsetBound - 1, |
| 384 | offset_mapper1.ExtendedForwardProject(kOffsetBound - 1)); |
| 385 | |
| 386 | // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6). |
| 387 | OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 25U, |
| 388 | 20U); |
| 389 | EXPECT_EQ(10U, offset_mapper2.ExtendedForwardProject(0U)); |
| 390 | EXPECT_EQ(11U, offset_mapper2.ExtendedForwardProject(1U)); |
| 391 | EXPECT_EQ(2U, offset_mapper2.ExtendedForwardProject(13U)); |
| 392 | EXPECT_EQ(3U, offset_mapper2.ExtendedForwardProject(14U)); // Previous equiv. |
| 393 | EXPECT_EQ(4U, offset_mapper2.ExtendedForwardProject(16U)); |
| 394 | EXPECT_EQ(5U, offset_mapper2.ExtendedForwardProject(17U)); |
| 395 | EXPECT_EQ(6U, offset_mapper2.ExtendedForwardProject(18U)); // Previous equiv. |
| 396 | // Fake offsets. |
| 397 | EXPECT_EQ(20U, offset_mapper2.ExtendedForwardProject(25U)); |
| 398 | EXPECT_EQ(21U, offset_mapper2.ExtendedForwardProject(26U)); |
| 399 | EXPECT_EQ(995U, offset_mapper2.ExtendedForwardProject(1000U)); |
| 400 | EXPECT_EQ(kOffsetBound - 1 - 5, |
| 401 | offset_mapper2.ExtendedForwardProject(kOffsetBound - 1)); |
| 402 | } |
| 403 | |
| 404 | TEST(EquivalenceMapTest, ExtendedForwardProjectEncoding) { |
| 405 | // Tests OffsetMapper::ExtendedForwardProject(), which maps every "old" offset |
| 406 | // to a "new" offset, with possible overlap (even though blocks don't |
| 407 | // overlap). Not testing real offsets only (no fake offsets). |
| 408 | // |old_spec| is a string like "<<aaAAaabbBBbcCCc>>": |
| 409 | // - Upper case letters are covered "old" offsets. |
| 410 | // - Lower case letters are non-covered offsets that are properly mapped using |
| 411 | // nearest "old" block. |
| 412 | // - '<' denotes underflow (clamped to 0). |
| 413 | // - '>' denotes overflow (clampled to "new" size - 1). |
| 414 | // |new_spec| is a string like "aaAA(ab)(ab)BBb..cCCc": |
| 415 | // - Upper and lower case letters are mapped "new" targets, occurring in the |
| 416 | // order that they appear in |old_spec|. |
| 417 | // - '.' are "new" offsets that appear as output. |
| 418 | // - '(' and ')' surround a single "new" location that are repeated as output. |
| 419 | int case_no = 0; |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 420 | auto run_test = [&case_no](std::deque<Equivalence>&& equivalences, |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 421 | const std::string& old_spec, |
| 422 | const std::string& new_spec) { |
| 423 | const size_t old_size = old_spec.length(); |
| 424 | // Build expected "new" offsets, queue up for each letter. |
| 425 | std::map<char, std::deque<offset_t>> expected; |
| 426 | offset_t cur_new_offset = 0; |
| 427 | char state = ')'; // ')' = increase offset, '(' = stay. |
| 428 | for (char ch : new_spec) { |
| 429 | if (ch == '(' || ch == ')') |
| 430 | state = ch; |
| 431 | else |
| 432 | expected[ch].push_back(cur_new_offset); |
| 433 | cur_new_offset += (state == ')') ? 1 : 0; |
| 434 | } |
| 435 | const size_t new_size = cur_new_offset; |
| 436 | // Forward-project for each "old" index, pull from queue from matching |
| 437 | // letter, and compare. |
| 438 | OffsetMapper offset_mapper(std::move(equivalences), old_size, new_size); |
| 439 | for (offset_t old_offset = 0; old_offset < old_size; ++old_offset) { |
| 440 | offset_t new_offset = offset_mapper.ExtendedForwardProject(old_offset); |
| 441 | char ch = old_spec[old_offset]; |
| 442 | if (ch == '<') { // Special case: Underflow. |
| 443 | EXPECT_EQ(0U, new_offset) << "in case " << case_no; |
| 444 | } else if (ch == '>') { // Special case: Overflow. |
| 445 | EXPECT_EQ(static_cast<offset_t>(new_size - 1), new_offset) |
| 446 | << "in case " << case_no; |
| 447 | } else { |
| 448 | std::deque<offset_t>& q = expected[ch]; |
| 449 | ASSERT_FALSE(q.empty()); |
| 450 | EXPECT_EQ(q.front(), new_offset) << "in case " << case_no; |
| 451 | q.pop_front(); |
| 452 | if (q.empty()) |
| 453 | expected.erase(ch); |
| 454 | } |
| 455 | } |
| 456 | // Clear useless '.', and ensure everything is consumed. |
| 457 | expected.erase('.'); |
| 458 | EXPECT_TRUE(expected.empty()) << "in case " << case_no; |
| 459 | ++case_no; |
| 460 | }; |
| 461 | |
| 462 | // Trivial: [5,9) --> [5,9). |
| 463 | run_test({{5, 5, +4}}, "aaaaaAAAAaaaaa", "aaaaaAAAAaaaaa"); |
| 464 | // Swap: [0,4) --> [6,10), [4,10) --> [0,6). |
| 465 | run_test({{0, 6, +4}, {4, 0, +6}}, "AAAABBBBBB", "BBBBBBAAAA"); |
| 466 | // Overlap: [0,4) --> [2,6), [4,10) --> [3,9). |
| 467 | run_test({{0, 2, +4}, {4, 3, +6}}, "AAAABBBBBB", "..A(AB)(AB)(AB)BBB."); |
| 468 | // Converge: [1,3) --> [2,4), [7,8) --> [6,7). |
| 469 | run_test({{1, 2, +2}, {7, 6, +1}}, "aAAaabbBbb", ".aAA(ab)(ab)Bbb."); |
| 470 | // Converge with tie-breaker: [1,3) --> [2,4), [8,9) --> [7,8). |
| 471 | run_test({{1, 2, +2}, {8, 7, +1}}, "aAAaaabbBb", ".aAAa(ab)(ab)Bb."); |
| 472 | // Shift left: [6,8) --> [2,4): Underflow occurs. |
| 473 | run_test({{6, 2, +2}}, "<<<<aaAAaa", "aaAAaa...."); |
| 474 | // Shift right: [2,5) --> [6,9): Overflow occurs. |
| 475 | run_test({{2, 6, +3}}, "aaAAAa>>>>", "....aaAAAa"); |
| 476 | // Diverge: [3,5) --> [1,3], [7,9) --> [9,11). |
| 477 | run_test({{3, 1, +2}, {7, 9, +2}}, "<<aAAabBBb>>", "aAAa....bBBb"); |
| 478 | // Pile-up: [0,2) --> [7,9), [9,11) --> [9,11), [18,20) --> [11,13). |
| 479 | run_test({{0, 7, +2}, {9, 9, +2}, {18, 11, +2}}, "AAaaaabbbBBbbbbcccCC", |
| 480 | "......b(Ab)(Abc)(Bac)(Bac)(Cab)(Cab)bb....."); |
| 481 | // Inverse pile-up: [7,9) --> [0,2), [9,11) --> [9,11), [13,15) --> [18,20). |
| 482 | run_test({{7, 0, +2}, {9, 9, +2}, {11, 18, +2}}, "<<<<<<<AABBCC>>>>>>>", |
| 483 | "AA.......BB.......CC"); |
| 484 | // Sparse rotate: [3,4) -> [10,11), [10,11) --> [17,18), [17,18) --> [3,4). |
| 485 | run_test({{3, 10, +1}, {10, 17, +1}, {17, 3, +1}}, "aaaAaaabbbBbbbcccCccc", |
| 486 | "cccCcccaaaAaaabbbBbbb"); |
| 487 | // Messy swap: [2,4) --> [10,12), [12,16) --> [3,7). |
| 488 | run_test({{2, 10, +2}, {12, 3, +4}}, "aaAAaa>><bbbBBBBbb", |
| 489 | "bbbBBBBb(ab)aAAaa"); |
| 490 | // Messy expand: [6,8) --> [3,5), [10,11) -> [11,12), [14,17) --> [16,19). |
| 491 | run_test({{6, 3, +2}, {10, 11, +1}, {14, 16, +3}}, "<<<aaaAAabBbbcCCCc>>>>>", |
| 492 | "aaaAAa....bBbb.cCCCc"); |
| 493 | // Interleave: [1,2) --> [0,1), [5,6) --> [10,11), [6,8) --> [3,5), |
| 494 | // [11,13) --> [12,14), [14,16) --> [6,8), [17,18) --> [17,18). |
| 495 | run_test({{1, 0, +1}, |
| 496 | {5, 10, +1}, |
| 497 | {6, 3, +2}, |
| 498 | {11, 12, +2}, |
| 499 | {14, 6, +2}, |
| 500 | {17, 17, +1}}, |
| 501 | "<AaabBCCccdDDdEEeFf>", "AaaCCc(Ec)EebBdDDd..Ff"); |
| 502 | } |
| 503 | |
| 504 | TEST(EquivalenceMapTest, ForwardProjectAll) { |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 505 | auto ForwardProjectAllTest = [](const OffsetMapper& offset_mapper, |
| 506 | std::initializer_list<offset_t> offsets) { |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 507 | std::deque<offset_t> offsets_vec(offsets); |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 508 | offset_mapper.ForwardProjectAll(&offsets_vec); |
| 509 | return offsets_vec; |
| 510 | }; |
| 511 | |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 512 | // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18). |
| 513 | OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 100U, |
| 514 | 100U); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 515 | EXPECT_EQ(OffsetDeque({10}), ForwardProjectAllTest(offset_mapper1, {0})); |
| 516 | EXPECT_EQ(OffsetDeque({13}), ForwardProjectAllTest(offset_mapper1, {2})); |
| 517 | EXPECT_EQ(OffsetDeque({}), ForwardProjectAllTest(offset_mapper1, {3})); |
| 518 | EXPECT_EQ(OffsetDeque({10, 13}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 519 | ForwardProjectAllTest(offset_mapper1, {0, 2})); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 520 | EXPECT_EQ(OffsetDeque({11, 13, 17}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 521 | ForwardProjectAllTest(offset_mapper1, {1, 2, 5})); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 522 | EXPECT_EQ(OffsetDeque({11, 17}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 523 | ForwardProjectAllTest(offset_mapper1, {1, 3, 5})); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 524 | EXPECT_EQ(OffsetDeque({10, 11, 13, 16, 17}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 525 | ForwardProjectAllTest(offset_mapper1, {0, 1, 2, 3, 4, 5, 6})); |
| 526 | |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 527 | // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6). |
| 528 | OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 100U, |
| 529 | 100U); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 530 | EXPECT_EQ(OffsetDeque({2}), ForwardProjectAllTest(offset_mapper2, {13})); |
| 531 | EXPECT_EQ(OffsetDeque({10, 2}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 532 | ForwardProjectAllTest(offset_mapper2, {0, 13})); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 533 | EXPECT_EQ(OffsetDeque({11, 2, 5}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 534 | ForwardProjectAllTest(offset_mapper2, {1, 13, 17})); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 535 | EXPECT_EQ(OffsetDeque({11, 5}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 536 | ForwardProjectAllTest(offset_mapper2, {1, 14, 17})); |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 537 | EXPECT_EQ(OffsetDeque({10, 11, 2, 4, 5}), |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 538 | ForwardProjectAllTest(offset_mapper2, {0, 1, 13, 14, 16, 17, 18})); |
| 539 | } |
| 540 | |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 541 | TEST(EquivalenceMapTest, Build) { |
| 542 | auto test_build_equivalence = [](const ImageIndex old_index, |
| 543 | const ImageIndex new_index, |
| 544 | double minimum_similarity) { |
| 545 | auto affinities = MakeTargetsAffinitiesForTesting(old_index, new_index, {}); |
| 546 | |
| 547 | EncodedView old_view(old_index); |
| 548 | EncodedView new_view(new_index); |
| 549 | |
| 550 | for (const auto& old_pool_tag_and_targets : old_index.target_pools()) { |
| 551 | PoolTag pool_tag = old_pool_tag_and_targets.first; |
| 552 | std::vector<uint32_t> old_labels; |
| 553 | std::vector<uint32_t> new_labels; |
| 554 | size_t label_bound = affinities[pool_tag.value()].AssignLabels( |
| 555 | 1.0, &old_labels, &new_labels); |
| 556 | old_view.SetLabels(pool_tag, std::move(old_labels), label_bound); |
| 557 | new_view.SetLabels(pool_tag, std::move(new_labels), label_bound); |
| 558 | } |
| 559 | |
| 560 | std::vector<offset_t> old_sa = |
| 561 | MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()); |
| 562 | |
| 563 | EquivalenceMap equivalence_map; |
| 564 | equivalence_map.Build(old_sa, old_view, new_view, affinities, |
| 565 | minimum_similarity); |
| 566 | |
| 567 | offset_t current_dst_offset = 0; |
| 568 | offset_t coverage = 0; |
| 569 | for (const auto& candidate : equivalence_map) { |
| 570 | EXPECT_GE(candidate.eq.dst_offset, current_dst_offset); |
| 571 | EXPECT_GT(candidate.eq.length, offset_t(0)); |
| 572 | EXPECT_LE(candidate.eq.src_offset + candidate.eq.length, |
| 573 | old_index.size()); |
| 574 | EXPECT_LE(candidate.eq.dst_offset + candidate.eq.length, |
| 575 | new_index.size()); |
| 576 | EXPECT_GE(candidate.similarity, minimum_similarity); |
| 577 | current_dst_offset = candidate.eq.dst_offset; |
| 578 | coverage += candidate.eq.length; |
| 579 | } |
| 580 | return coverage; |
| 581 | }; |
| 582 | |
| 583 | EXPECT_EQ(0U, |
| 584 | test_build_equivalence(MakeImageIndexForTesting("", {}, {}), |
| 585 | MakeImageIndexForTesting("", {}, {}), 4.0)); |
| 586 | |
| 587 | EXPECT_EQ(0U, test_build_equivalence( |
| 588 | MakeImageIndexForTesting("", {}, {}), |
| 589 | MakeImageIndexForTesting("banana", {}, {}), 4.0)); |
| 590 | |
| 591 | EXPECT_EQ(0U, |
| 592 | test_build_equivalence(MakeImageIndexForTesting("banana", {}, {}), |
| 593 | MakeImageIndexForTesting("", {}, {}), 4.0)); |
| 594 | |
| 595 | EXPECT_EQ(0U, test_build_equivalence( |
| 596 | MakeImageIndexForTesting("banana", {}, {}), |
| 597 | MakeImageIndexForTesting("zzzz", {}, {}), 4.0)); |
| 598 | |
| 599 | EXPECT_EQ(6U, test_build_equivalence( |
| 600 | MakeImageIndexForTesting("banana", {}, {}), |
| 601 | MakeImageIndexForTesting("banana", {}, {}), 4.0)); |
| 602 | |
| 603 | EXPECT_EQ(6U, test_build_equivalence( |
| 604 | MakeImageIndexForTesting("bananaxx", {}, {}), |
| 605 | MakeImageIndexForTesting("bananayy", {}, {}), 4.0)); |
| 606 | |
| 607 | EXPECT_EQ(8U, test_build_equivalence( |
| 608 | MakeImageIndexForTesting("banana11", {{6, 0}}, {}), |
| 609 | MakeImageIndexForTesting("banana11", {{6, 0}}, {}), 4.0)); |
| 610 | |
| 611 | EXPECT_EQ(6U, test_build_equivalence( |
| 612 | MakeImageIndexForTesting("banana11", {{6, 0}}, {}), |
| 613 | MakeImageIndexForTesting("banana22", {}, {{6, 0}}), 4.0)); |
| 614 | |
| 615 | EXPECT_EQ( |
| 616 | 15U, |
| 617 | test_build_equivalence( |
| 618 | MakeImageIndexForTesting("banana11pineapple", {{6, 0}}, {}), |
| 619 | MakeImageIndexForTesting("banana22pineapple", {}, {{6, 0}}), 4.0)); |
| 620 | |
| 621 | EXPECT_EQ( |
| 622 | 15U, |
| 623 | test_build_equivalence( |
| 624 | MakeImageIndexForTesting("bananaxxxxxxxxpineapple", {}, {}), |
| 625 | MakeImageIndexForTesting("bananayyyyyyyypineapple", {}, {}), 4.0)); |
| 626 | |
| 627 | EXPECT_EQ( |
| 628 | 19U, |
| 629 | test_build_equivalence( |
| 630 | MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}), |
| 631 | MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}), |
| 632 | 4.0)); |
| 633 | } |
| 634 | |
| 635 | } // namespace zucchini |