blob: af99ac44eb03c7650c62722c4658d2dfed9d6b5b [file] [log] [blame]
Samuel Huang06f1ae92018-03-13 18:19:34 +00001// Copyright 2017 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
6#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
7
8#include <stddef.h>
9
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +000010#include <deque>
Samuel Huang06f1ae92018-03-13 18:19:34 +000011#include <limits>
12#include <vector>
13
14#include "components/zucchini/image_index.h"
15#include "components/zucchini/image_utils.h"
16#include "components/zucchini/targets_affinity.h"
17
18namespace zucchini {
19
20constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
21
22class EncodedView;
23class EquivalenceSource;
24
25// Returns similarity score between a token (raw byte or first byte of a
26// reference) in |old_image_index| at |src| and a token in |new_image_index|
27// at |dst|. |targets_affinities| describes affinities for each target pool and
28// is used to evaluate similarity between references, hence it's size must be
29// equal to the number of pools in both |old_image_index| and |new_image_index|.
30// Both |src| and |dst| must refer to tokens in |old_image_index| and
31// |new_image_index|.
32double GetTokenSimilarity(
33 const ImageIndex& old_image_index,
34 const ImageIndex& new_image_index,
35 const std::vector<TargetsAffinity>& targets_affinities,
36 offset_t src,
37 offset_t dst);
38
39// Returns a similarity score between content in |old_image_index| and
40// |new_image_index| at regions described by |equivalence|, using
41// |targets_affinities| to evaluate similarity between references.
42double GetEquivalenceSimilarity(
43 const ImageIndex& old_image_index,
44 const ImageIndex& new_image_index,
45 const std::vector<TargetsAffinity>& targets_affinities,
46 const Equivalence& equivalence);
47
48// Extends |equivalence| forward and returns the result. This is related to
49// VisitEquivalenceSeed().
50EquivalenceCandidate ExtendEquivalenceForward(
51 const ImageIndex& old_image_index,
52 const ImageIndex& new_image_index,
53 const std::vector<TargetsAffinity>& targets_affinities,
54 const EquivalenceCandidate& equivalence,
55 double min_similarity);
56
57// Extends |equivalence| backward and returns the result. This is related to
58// VisitEquivalenceSeed().
59EquivalenceCandidate ExtendEquivalenceBackward(
60 const ImageIndex& old_image_index,
61 const ImageIndex& new_image_index,
62 const std::vector<TargetsAffinity>& targets_affinities,
63 const EquivalenceCandidate& equivalence,
64 double min_similarity);
65
66// Creates an equivalence, starting with |src| and |dst| as offset hint, and
67// extends it both forward and backward, trying to maximise similarity between
68// |old_image_index| and |new_image_index|, and returns the result.
69// |targets_affinities| is used to evaluate similarity between references.
70// |min_similarity| describes the minimum acceptable similarity score and is
71// used as threshold to discard bad equivalences.
72EquivalenceCandidate VisitEquivalenceSeed(
73 const ImageIndex& old_image_index,
74 const ImageIndex& new_image_index,
75 const std::vector<TargetsAffinity>& targets_affinities,
76 offset_t src,
77 offset_t dst,
78 double min_similarity);
79
80// Container of pruned equivalences used to map offsets from |old_image| to
81// offsets in |new_image|. Equivalences are pruned by cropping smaller
82// equivalences to avoid overlaps, to make the equivalence map (for covered
83// bytes in |old_image| and |new_image|) one-to-one.
84class OffsetMapper {
85 public:
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +000086 using const_iterator = std::deque<Equivalence>::const_iterator;
Samuel Huang06f1ae92018-03-13 18:19:34 +000087
Samuel Huang8fdb8ba2018-06-26 14:47:02 +000088 // Constructors for various data sources. "Old" and "new" image sizes are
89 // needed for bounds checks and to handle dangling targets.
Samuel Huang06f1ae92018-03-13 18:19:34 +000090 // - From a list of |equivalences|, already sorted (by |src_offset|) and
91 // pruned, useful for tests.
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +000092 OffsetMapper(std::deque<Equivalence>&& equivalences,
Etienne Pierre-doray725a8732018-09-10 16:19:33 +000093 offset_t old_image_size,
94 offset_t new_image_size);
Samuel Huang06f1ae92018-03-13 18:19:34 +000095 // - From a generator, useful for Zucchini-apply.
Samuel Huang8fdb8ba2018-06-26 14:47:02 +000096 OffsetMapper(EquivalenceSource&& equivalence_source,
Etienne Pierre-doray725a8732018-09-10 16:19:33 +000097 offset_t old_image_size,
98 offset_t new_image_size);
Samuel Huang06f1ae92018-03-13 18:19:34 +000099 // - From an EquivalenceMap that needs to be processed, useful for
100 // Zucchini-gen.
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000101 OffsetMapper(const EquivalenceMap& equivalence_map,
Etienne Pierre-doray725a8732018-09-10 16:19:33 +0000102 offset_t old_image_size,
103 offset_t new_image_size);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000104 ~OffsetMapper();
105
106 size_t size() const { return equivalences_.size(); }
107 const_iterator begin() const { return equivalences_.begin(); }
108 const_iterator end() const { return equivalences_.end(); }
109
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000110 // Returns naive extended forward-projection of "old" |offset| that follows
111 // |eq|'s delta. |eq| needs not cover |offset|.
112 // - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
113 // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
114 // caller must to make the check (hence "naive").
115 offset_t NaiveExtendedForwardProject(const Equivalence& unit,
116 offset_t offset) const;
117
Samuel Huang06f1ae92018-03-13 18:19:34 +0000118 // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000119 // Assumes |equivalences_| to be non-empty. Cases:
120 // - If |offset| is covered (i.e., in an "old" block), then use the delta of
121 // the (unique) equivalence unit that covers |offset|.
122 // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
123 // nearest "old" block, use its delta, and avert underflow / overflow by
124 // clamping the result to |[0, new_image_size_)|.
125 // - If |offset| is >= |new_image_size_| (a "fake offset"), then use
126 // |new_image_size_ - old_image_size_| as the delta.
127 offset_t ExtendedForwardProject(offset_t offset) const;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000128
129 // Given sorted |offsets|, applies a projection in-place of all offsets that
130 // are part of a pruned equivalence from |old_image| to |new_image|. Other
131 // offsets are removed from |offsets|.
Etienne Pierre-dorayaff40862021-09-14 17:31:51 +0000132 void ForwardProjectAll(std::deque<offset_t>* offsets) const;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000133
134 // Accessor for testing.
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000135 const std::deque<Equivalence> equivalences() const { return equivalences_; }
Samuel Huang06f1ae92018-03-13 18:19:34 +0000136
137 // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
138 // source location that was covered by some Equivalence would become covered
139 // by exactly one Equivalence. Moreover, for the offset, the equivalence
140 // corresponds to the largest (pre-pruning) covering Equivalence, and in case
141 // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
142 // change in size since empty Equivalences are removed.
143 static void PruneEquivalencesAndSortBySource(
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000144 std::deque<Equivalence>* equivalences);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000145
146 private:
Samuel Huang8fdb8ba2018-06-26 14:47:02 +0000147 // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
148 // block overlaps). Also, it is sorted by "old" offsets.
Etienne Pierre-doray8bb965d2021-10-29 14:12:23 +0000149 std::deque<Equivalence> equivalences_;
Etienne Pierre-doray725a8732018-09-10 16:19:33 +0000150 const offset_t old_image_size_;
151 const offset_t new_image_size_;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000152};
153
154// Container of equivalences between |old_image_index| and |new_image_index|,
155// sorted by |Equivalence::dst_offset|, only used during patch generation.
156class EquivalenceMap {
157 public:
158 using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
159
160 EquivalenceMap();
161 // Initializes the object with |equivalences|.
162 explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
163 EquivalenceMap(EquivalenceMap&&);
164 EquivalenceMap(const EquivalenceMap&) = delete;
165 ~EquivalenceMap();
166
167 // Finds relevant equivalences between |old_view| and |new_view|, using
168 // suffix array |old_sa| computed from |old_view| and using
169 // |targets_affinities| to evaluate similarity between references. This
170 // function is not symmetric. Equivalences might overlap in |old_view|, but
171 // not in |new_view|. It tries to maximize accumulated similarity within each
172 // equivalence, while maximizing |new_view| coverage. The minimum similarity
173 // of an equivalence is given by |min_similarity|.
174 void Build(const std::vector<offset_t>& old_sa,
175 const EncodedView& old_view,
176 const EncodedView& new_view,
177 const std::vector<TargetsAffinity>& targets_affinities,
178 double min_similarity);
179
180 size_t size() const { return candidates_.size(); }
181 const_iterator begin() const { return candidates_.begin(); }
182 const_iterator end() const { return candidates_.end(); }
183
184 private:
185 // Discovers equivalence candidates between |old_view| and |new_view| and
186 // stores them in the object. Note that resulting candidates are not sorted
187 // and might be overlapping in new image.
188 void CreateCandidates(const std::vector<offset_t>& old_sa,
189 const EncodedView& old_view,
190 const EncodedView& new_view,
191 const std::vector<TargetsAffinity>& targets_affinities,
192 double min_similarity);
193 // Sorts candidates by their offset in new image.
194 void SortByDestination();
195 // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
196 // overlaps. Candidates with low similarity scores are more likely to be
197 // shrunken. Unfit candidates may be removed.
198 void Prune(const EncodedView& old_view,
199 const EncodedView& new_view,
200 const std::vector<TargetsAffinity>& targets_affinities,
201 double min_similarity);
202
203 std::vector<EquivalenceCandidate> candidates_;
204};
205
206} // namespace zucchini
207
208#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_