Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ |
| 6 | #define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ |
| 7 | |
| 8 | #include <stddef.h> |
| 9 | |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 10 | #include <deque> |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 11 | #include <limits> |
| 12 | #include <vector> |
| 13 | |
| 14 | #include "components/zucchini/image_index.h" |
| 15 | #include "components/zucchini/image_utils.h" |
| 16 | #include "components/zucchini/targets_affinity.h" |
| 17 | |
| 18 | namespace zucchini { |
| 19 | |
| 20 | constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity(); |
| 21 | |
| 22 | class EncodedView; |
| 23 | class EquivalenceSource; |
| 24 | |
| 25 | // Returns similarity score between a token (raw byte or first byte of a |
| 26 | // reference) in |old_image_index| at |src| and a token in |new_image_index| |
| 27 | // at |dst|. |targets_affinities| describes affinities for each target pool and |
| 28 | // is used to evaluate similarity between references, hence it's size must be |
| 29 | // equal to the number of pools in both |old_image_index| and |new_image_index|. |
| 30 | // Both |src| and |dst| must refer to tokens in |old_image_index| and |
| 31 | // |new_image_index|. |
| 32 | double GetTokenSimilarity( |
| 33 | const ImageIndex& old_image_index, |
| 34 | const ImageIndex& new_image_index, |
| 35 | const std::vector<TargetsAffinity>& targets_affinities, |
| 36 | offset_t src, |
| 37 | offset_t dst); |
| 38 | |
| 39 | // Returns a similarity score between content in |old_image_index| and |
| 40 | // |new_image_index| at regions described by |equivalence|, using |
| 41 | // |targets_affinities| to evaluate similarity between references. |
| 42 | double GetEquivalenceSimilarity( |
| 43 | const ImageIndex& old_image_index, |
| 44 | const ImageIndex& new_image_index, |
| 45 | const std::vector<TargetsAffinity>& targets_affinities, |
| 46 | const Equivalence& equivalence); |
| 47 | |
| 48 | // Extends |equivalence| forward and returns the result. This is related to |
| 49 | // VisitEquivalenceSeed(). |
| 50 | EquivalenceCandidate ExtendEquivalenceForward( |
| 51 | const ImageIndex& old_image_index, |
| 52 | const ImageIndex& new_image_index, |
| 53 | const std::vector<TargetsAffinity>& targets_affinities, |
| 54 | const EquivalenceCandidate& equivalence, |
| 55 | double min_similarity); |
| 56 | |
| 57 | // Extends |equivalence| backward and returns the result. This is related to |
| 58 | // VisitEquivalenceSeed(). |
| 59 | EquivalenceCandidate ExtendEquivalenceBackward( |
| 60 | const ImageIndex& old_image_index, |
| 61 | const ImageIndex& new_image_index, |
| 62 | const std::vector<TargetsAffinity>& targets_affinities, |
| 63 | const EquivalenceCandidate& equivalence, |
| 64 | double min_similarity); |
| 65 | |
| 66 | // Creates an equivalence, starting with |src| and |dst| as offset hint, and |
| 67 | // extends it both forward and backward, trying to maximise similarity between |
| 68 | // |old_image_index| and |new_image_index|, and returns the result. |
| 69 | // |targets_affinities| is used to evaluate similarity between references. |
| 70 | // |min_similarity| describes the minimum acceptable similarity score and is |
| 71 | // used as threshold to discard bad equivalences. |
| 72 | EquivalenceCandidate VisitEquivalenceSeed( |
| 73 | const ImageIndex& old_image_index, |
| 74 | const ImageIndex& new_image_index, |
| 75 | const std::vector<TargetsAffinity>& targets_affinities, |
| 76 | offset_t src, |
| 77 | offset_t dst, |
| 78 | double min_similarity); |
| 79 | |
| 80 | // Container of pruned equivalences used to map offsets from |old_image| to |
| 81 | // offsets in |new_image|. Equivalences are pruned by cropping smaller |
| 82 | // equivalences to avoid overlaps, to make the equivalence map (for covered |
| 83 | // bytes in |old_image| and |new_image|) one-to-one. |
| 84 | class OffsetMapper { |
| 85 | public: |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 86 | using const_iterator = std::deque<Equivalence>::const_iterator; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 87 | |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 88 | // Constructors for various data sources. "Old" and "new" image sizes are |
| 89 | // needed for bounds checks and to handle dangling targets. |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 90 | // - From a list of |equivalences|, already sorted (by |src_offset|) and |
| 91 | // pruned, useful for tests. |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 92 | OffsetMapper(std::deque<Equivalence>&& equivalences, |
Etienne Pierre-doray | 725a873 | 2018-09-10 16:19:33 +0000 | [diff] [blame] | 93 | offset_t old_image_size, |
| 94 | offset_t new_image_size); |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 95 | // - From a generator, useful for Zucchini-apply. |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 96 | OffsetMapper(EquivalenceSource&& equivalence_source, |
Etienne Pierre-doray | 725a873 | 2018-09-10 16:19:33 +0000 | [diff] [blame] | 97 | offset_t old_image_size, |
| 98 | offset_t new_image_size); |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 99 | // - From an EquivalenceMap that needs to be processed, useful for |
| 100 | // Zucchini-gen. |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 101 | OffsetMapper(const EquivalenceMap& equivalence_map, |
Etienne Pierre-doray | 725a873 | 2018-09-10 16:19:33 +0000 | [diff] [blame] | 102 | offset_t old_image_size, |
| 103 | offset_t new_image_size); |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 104 | ~OffsetMapper(); |
| 105 | |
| 106 | size_t size() const { return equivalences_.size(); } |
| 107 | const_iterator begin() const { return equivalences_.begin(); } |
| 108 | const_iterator end() const { return equivalences_.end(); } |
| 109 | |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 110 | // Returns naive extended forward-projection of "old" |offset| that follows |
| 111 | // |eq|'s delta. |eq| needs not cover |offset|. |
| 112 | // - Averts underflow / overflow by clamping to |[0, new_image_size_)|. |
| 113 | // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the |
| 114 | // caller must to make the check (hence "naive"). |
| 115 | offset_t NaiveExtendedForwardProject(const Equivalence& unit, |
| 116 | offset_t offset) const; |
| 117 | |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 118 | // Returns an offset in |new_image| corresponding to |offset| in |old_image|. |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 119 | // Assumes |equivalences_| to be non-empty. Cases: |
| 120 | // - If |offset| is covered (i.e., in an "old" block), then use the delta of |
| 121 | // the (unique) equivalence unit that covers |offset|. |
| 122 | // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the |
| 123 | // nearest "old" block, use its delta, and avert underflow / overflow by |
| 124 | // clamping the result to |[0, new_image_size_)|. |
| 125 | // - If |offset| is >= |new_image_size_| (a "fake offset"), then use |
| 126 | // |new_image_size_ - old_image_size_| as the delta. |
| 127 | offset_t ExtendedForwardProject(offset_t offset) const; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 128 | |
| 129 | // Given sorted |offsets|, applies a projection in-place of all offsets that |
| 130 | // are part of a pruned equivalence from |old_image| to |new_image|. Other |
| 131 | // offsets are removed from |offsets|. |
Etienne Pierre-doray | aff4086 | 2021-09-14 17:31:51 +0000 | [diff] [blame] | 132 | void ForwardProjectAll(std::deque<offset_t>* offsets) const; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 133 | |
| 134 | // Accessor for testing. |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 135 | const std::deque<Equivalence> equivalences() const { return equivalences_; } |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 136 | |
| 137 | // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a |
| 138 | // source location that was covered by some Equivalence would become covered |
| 139 | // by exactly one Equivalence. Moreover, for the offset, the equivalence |
| 140 | // corresponds to the largest (pre-pruning) covering Equivalence, and in case |
| 141 | // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may |
| 142 | // change in size since empty Equivalences are removed. |
| 143 | static void PruneEquivalencesAndSortBySource( |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 144 | std::deque<Equivalence>* equivalences); |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 145 | |
| 146 | private: |
Samuel Huang | 8fdb8ba | 2018-06-26 14:47:02 +0000 | [diff] [blame] | 147 | // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new" |
| 148 | // block overlaps). Also, it is sorted by "old" offsets. |
Etienne Pierre-doray | 8bb965d | 2021-10-29 14:12:23 +0000 | [diff] [blame] | 149 | std::deque<Equivalence> equivalences_; |
Etienne Pierre-doray | 725a873 | 2018-09-10 16:19:33 +0000 | [diff] [blame] | 150 | const offset_t old_image_size_; |
| 151 | const offset_t new_image_size_; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 152 | }; |
| 153 | |
| 154 | // Container of equivalences between |old_image_index| and |new_image_index|, |
| 155 | // sorted by |Equivalence::dst_offset|, only used during patch generation. |
| 156 | class EquivalenceMap { |
| 157 | public: |
| 158 | using const_iterator = std::vector<EquivalenceCandidate>::const_iterator; |
| 159 | |
| 160 | EquivalenceMap(); |
| 161 | // Initializes the object with |equivalences|. |
| 162 | explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates); |
| 163 | EquivalenceMap(EquivalenceMap&&); |
| 164 | EquivalenceMap(const EquivalenceMap&) = delete; |
| 165 | ~EquivalenceMap(); |
| 166 | |
| 167 | // Finds relevant equivalences between |old_view| and |new_view|, using |
| 168 | // suffix array |old_sa| computed from |old_view| and using |
| 169 | // |targets_affinities| to evaluate similarity between references. This |
| 170 | // function is not symmetric. Equivalences might overlap in |old_view|, but |
| 171 | // not in |new_view|. It tries to maximize accumulated similarity within each |
| 172 | // equivalence, while maximizing |new_view| coverage. The minimum similarity |
| 173 | // of an equivalence is given by |min_similarity|. |
| 174 | void Build(const std::vector<offset_t>& old_sa, |
| 175 | const EncodedView& old_view, |
| 176 | const EncodedView& new_view, |
| 177 | const std::vector<TargetsAffinity>& targets_affinities, |
| 178 | double min_similarity); |
| 179 | |
| 180 | size_t size() const { return candidates_.size(); } |
| 181 | const_iterator begin() const { return candidates_.begin(); } |
| 182 | const_iterator end() const { return candidates_.end(); } |
| 183 | |
| 184 | private: |
| 185 | // Discovers equivalence candidates between |old_view| and |new_view| and |
| 186 | // stores them in the object. Note that resulting candidates are not sorted |
| 187 | // and might be overlapping in new image. |
| 188 | void CreateCandidates(const std::vector<offset_t>& old_sa, |
| 189 | const EncodedView& old_view, |
| 190 | const EncodedView& new_view, |
| 191 | const std::vector<TargetsAffinity>& targets_affinities, |
| 192 | double min_similarity); |
| 193 | // Sorts candidates by their offset in new image. |
| 194 | void SortByDestination(); |
| 195 | // Visits |candidates_| (sorted by |dst_offset|) and remove all destination |
| 196 | // overlaps. Candidates with low similarity scores are more likely to be |
| 197 | // shrunken. Unfit candidates may be removed. |
| 198 | void Prune(const EncodedView& old_view, |
| 199 | const EncodedView& new_view, |
| 200 | const std::vector<TargetsAffinity>& targets_affinities, |
| 201 | double min_similarity); |
| 202 | |
| 203 | std::vector<EquivalenceCandidate> candidates_; |
| 204 | }; |
| 205 | |
| 206 | } // namespace zucchini |
| 207 | |
| 208 | #endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ |