blob: a517a2c5023e58328c9cc64404564486f5d562e1 [file] [log] [blame]
Samuel Huang06f1ae92018-03-13 18:19:34 +00001// Copyright 2017 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
6#define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
7
8#include <stdint.h>
9
10#include <tuple>
11#include <vector>
12
Samuel Huang06f1ae92018-03-13 18:19:34 +000013#include "components/zucchini/algorithm.h"
14#include "components/zucchini/image_utils.h"
15
16namespace zucchini {
17
18// There are several ways to reason about addresses in an image:
19// - Offset: Position relative to start of image.
20// - VA (Virtual Address): Virtual memory address of a loaded image. This is
21// subject to relocation by the OS.
22// - RVA (Relative Virtual Address): VA relative to some base address. This is
23// the preferred way to specify pointers in an image.
24//
25// Zucchini is primarily concerned with offsets and RVAs. Executable images like
26// PE and ELF are organized into sections. Each section specifies offset and RVA
27// ranges as:
28// {Offset start, offset size, RVA start, RVA size}.
29// This constitutes a basic unit to translate between offsets and RVAs. Note:
30// |offset size| < |RVA size| is possible. For example, the .bss section can can
31// have zero-filled statically-allocated data that have no corresponding bytes
32// on image (to save space). This poses a problem for Zucchini, which stores
33// addresses as offsets: now we'd have "dangling RVAs" that don't map to
34// offsets! Some ways to handling this are:
35// 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means
36// some reference targets would escape detection and processing.
37// 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data
38// must not be read on these fake offsets, which are only valid as target
39// addresses for reference matching.
40// As for |RVA size| < |offset size|, the extra portion just gets ignored.
41//
42// Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to
43// fake offsets by adding a large value. This value can be chosen as an
44// exclusive upper bound of all offsets (i.e., image size). This allows them to
45// be easily detected and processed as a special-case.
46// TODO(huangs): Investigate option (1), now that the refactored code makes
47// experimentation easier.
48// TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t|
49// ranges and create "fake" units to accommodate dangling RVAs. Then
50// AddressTranslator can be simplified.
51
52// Virtual Address relative to some base address (RVA). There's distinction
53// between "valid RVA" and "existent RVA":
54// - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|.
55// - Existent RVA: An RVA that has semantic meaning in an image, and may
56// translate to an offset in an image or (if a dangling RVA) a fake offset.
57// All existent RVAs are valid RVAs.
58using rva_t = uint32_t;
59// Divide by 2 to match |kOffsetBound|.
60constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2;
Samuel Huang98dd0172018-10-10 15:48:10 +000061constexpr rva_t kInvalidRva = static_cast<rva_t>(-2);
Samuel Huang06f1ae92018-03-13 18:19:34 +000062
63// A utility to translate between offsets and RVAs in an image.
64class AddressTranslator {
65 public:
66 // A basic unit for address translation, roughly maps to a section, but may
67 // be processed (e.g., merged) as an optimization.
68 struct Unit {
69 offset_t offset_end() const { return offset_begin + offset_size; }
70 rva_t rva_end() const { return rva_begin + rva_size; }
71 bool IsEmpty() const {
72 // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed
73 // yet, and once it is then it's empty.
74 // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but
75 // is not empty.
76 return rva_size == 0;
77 }
78 bool CoversOffset(offset_t offset) const {
79 return RangeCovers(offset_begin, offset_size, offset);
80 }
81 bool CoversRva(rva_t rva) const {
82 return RangeCovers(rva_begin, rva_size, rva);
83 }
84 bool CoversDanglingRva(rva_t rva) const {
85 return CoversRva(rva) && rva - rva_begin >= offset_size;
86 }
87 // Assumes valid |offset| (*cannot* be fake offset).
88 rva_t OffsetToRvaUnsafe(offset_t offset) const {
89 return offset - offset_begin + rva_begin;
90 }
91 // Assumes valid |rva| (*can* be danging RVA).
92 offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const {
93 rva_t delta = rva - rva_begin;
94 return delta < offset_size ? delta + offset_begin
95 : fake_offset_begin + rva;
96 }
97 bool HasDanglingRva() const { return rva_size > offset_size; }
98 friend bool operator==(const Unit& a, const Unit& b) {
99 return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) ==
100 std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size);
101 }
102
103 offset_t offset_begin;
104 offset_t offset_size;
105 rva_t rva_begin;
106 rva_t rva_size;
107 };
108
109 // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit
110 // found, to reduce the number of OffsetToUnit() calls for clustered queries.
111 class OffsetToRvaCache {
112 public:
113 // Embeds |translator| for use. Now object lifetime is tied to |translator|
114 // lifetime.
115 explicit OffsetToRvaCache(const AddressTranslator& translator);
Samuel Huang1cec5a72021-06-01 18:29:53 +0000116 OffsetToRvaCache(const OffsetToRvaCache&) = delete;
117 const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000118
119 rva_t Convert(offset_t offset) const;
120
121 private:
122 const AddressTranslator& translator_;
123 mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000124 };
125
126 // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit
127 // found, to reduce the number of RvaToUnit() calls for clustered queries.
128 class RvaToOffsetCache {
129 public:
130 // Embeds |translator| for use. Now object lifetime is tied to |translator|
131 // lifetime.
132 explicit RvaToOffsetCache(const AddressTranslator& translator);
Samuel Huang1cec5a72021-06-01 18:29:53 +0000133 RvaToOffsetCache(const RvaToOffsetCache&) = delete;
134 const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000135
136 bool IsValid(rva_t rva) const;
Samuel Huang98dd0172018-10-10 15:48:10 +0000137
Samuel Huang06f1ae92018-03-13 18:19:34 +0000138 offset_t Convert(rva_t rva) const;
139
140 private:
141 const AddressTranslator& translator_;
142 mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000143 };
144
145 enum Status {
146 kSuccess = 0,
147 kErrorOverflow,
148 kErrorBadOverlap,
149 kErrorBadOverlapDanglingRva,
150 kErrorFakeOffsetBeginTooLarge,
151 };
152
153 AddressTranslator();
Samuel Huang1cec5a72021-06-01 18:29:53 +0000154 AddressTranslator(AddressTranslator&&);
155 AddressTranslator(const AddressTranslator&) = delete;
156 const AddressTranslator& operator=(const AddressTranslator&) = delete;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000157 ~AddressTranslator();
158
159 // Consumes |units| to populate data in this class. Performs consistency
160 // checks and overlapping Units. Returns Status to indicate success.
161 Status Initialize(std::vector<Unit>&& units);
162
163 // Returns the (possibly dangling) RVA corresponding to |offset|, or
164 // kInvalidRva if not found.
165 rva_t OffsetToRva(offset_t offset) const;
166
167 // Returns the (possibly fake) offset corresponding to |rva|, or
168 // kInvalidOffset if not found (i.e., |rva| is non-existent).
169 offset_t RvaToOffset(rva_t rva) const;
170
171 // For testing.
172 offset_t fake_offset_begin() const { return fake_offset_begin_; }
173
174 const std::vector<Unit>& units_sorted_by_offset() const {
175 return units_sorted_by_offset_;
176 }
177
178 const std::vector<Unit>& units_sorted_by_rva() const {
179 return units_sorted_by_rva_;
180 }
181
182 private:
183 // Helper to find the Unit that contains given |offset| or |rva|. Returns null
184 // if not found.
185 const Unit* OffsetToUnit(offset_t offset) const;
186 const Unit* RvaToUnit(rva_t rva) const;
187
188 // Storage of Units. All offset ranges are non-empty and disjoint. Likewise
189 // for all RVA ranges.
190 std::vector<Unit> units_sorted_by_offset_;
191 std::vector<Unit> units_sorted_by_rva_;
192
193 // Conversion factor to translate between dangling RVAs and fake offsets.
194 offset_t fake_offset_begin_;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000195};
196
197} // namespace zucchini
198
199#endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_