Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |
| 6 | #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |
| 7 | |
| 8 | #include <stdint.h> |
| 9 | |
| 10 | #include <tuple> |
| 11 | #include <vector> |
| 12 | |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 13 | #include "components/zucchini/algorithm.h" |
| 14 | #include "components/zucchini/image_utils.h" |
| 15 | |
| 16 | namespace zucchini { |
| 17 | |
| 18 | // There are several ways to reason about addresses in an image: |
| 19 | // - Offset: Position relative to start of image. |
| 20 | // - VA (Virtual Address): Virtual memory address of a loaded image. This is |
| 21 | // subject to relocation by the OS. |
| 22 | // - RVA (Relative Virtual Address): VA relative to some base address. This is |
| 23 | // the preferred way to specify pointers in an image. |
| 24 | // |
| 25 | // Zucchini is primarily concerned with offsets and RVAs. Executable images like |
| 26 | // PE and ELF are organized into sections. Each section specifies offset and RVA |
| 27 | // ranges as: |
| 28 | // {Offset start, offset size, RVA start, RVA size}. |
| 29 | // This constitutes a basic unit to translate between offsets and RVAs. Note: |
| 30 | // |offset size| < |RVA size| is possible. For example, the .bss section can can |
| 31 | // have zero-filled statically-allocated data that have no corresponding bytes |
| 32 | // on image (to save space). This poses a problem for Zucchini, which stores |
| 33 | // addresses as offsets: now we'd have "dangling RVAs" that don't map to |
| 34 | // offsets! Some ways to handling this are: |
| 35 | // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means |
| 36 | // some reference targets would escape detection and processing. |
| 37 | // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data |
| 38 | // must not be read on these fake offsets, which are only valid as target |
| 39 | // addresses for reference matching. |
| 40 | // As for |RVA size| < |offset size|, the extra portion just gets ignored. |
| 41 | // |
| 42 | // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to |
| 43 | // fake offsets by adding a large value. This value can be chosen as an |
| 44 | // exclusive upper bound of all offsets (i.e., image size). This allows them to |
| 45 | // be easily detected and processed as a special-case. |
| 46 | // TODO(huangs): Investigate option (1), now that the refactored code makes |
| 47 | // experimentation easier. |
| 48 | // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| |
| 49 | // ranges and create "fake" units to accommodate dangling RVAs. Then |
| 50 | // AddressTranslator can be simplified. |
| 51 | |
| 52 | // Virtual Address relative to some base address (RVA). There's distinction |
| 53 | // between "valid RVA" and "existent RVA": |
| 54 | // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. |
| 55 | // - Existent RVA: An RVA that has semantic meaning in an image, and may |
| 56 | // translate to an offset in an image or (if a dangling RVA) a fake offset. |
| 57 | // All existent RVAs are valid RVAs. |
| 58 | using rva_t = uint32_t; |
| 59 | // Divide by 2 to match |kOffsetBound|. |
| 60 | constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2; |
Samuel Huang | 98dd017 | 2018-10-10 15:48:10 +0000 | [diff] [blame] | 61 | constexpr rva_t kInvalidRva = static_cast<rva_t>(-2); |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 62 | |
| 63 | // A utility to translate between offsets and RVAs in an image. |
| 64 | class AddressTranslator { |
| 65 | public: |
| 66 | // A basic unit for address translation, roughly maps to a section, but may |
| 67 | // be processed (e.g., merged) as an optimization. |
| 68 | struct Unit { |
| 69 | offset_t offset_end() const { return offset_begin + offset_size; } |
| 70 | rva_t rva_end() const { return rva_begin + rva_size; } |
| 71 | bool IsEmpty() const { |
| 72 | // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed |
| 73 | // yet, and once it is then it's empty. |
| 74 | // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but |
| 75 | // is not empty. |
| 76 | return rva_size == 0; |
| 77 | } |
| 78 | bool CoversOffset(offset_t offset) const { |
| 79 | return RangeCovers(offset_begin, offset_size, offset); |
| 80 | } |
| 81 | bool CoversRva(rva_t rva) const { |
| 82 | return RangeCovers(rva_begin, rva_size, rva); |
| 83 | } |
| 84 | bool CoversDanglingRva(rva_t rva) const { |
| 85 | return CoversRva(rva) && rva - rva_begin >= offset_size; |
| 86 | } |
| 87 | // Assumes valid |offset| (*cannot* be fake offset). |
| 88 | rva_t OffsetToRvaUnsafe(offset_t offset) const { |
| 89 | return offset - offset_begin + rva_begin; |
| 90 | } |
| 91 | // Assumes valid |rva| (*can* be danging RVA). |
| 92 | offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { |
| 93 | rva_t delta = rva - rva_begin; |
| 94 | return delta < offset_size ? delta + offset_begin |
| 95 | : fake_offset_begin + rva; |
| 96 | } |
| 97 | bool HasDanglingRva() const { return rva_size > offset_size; } |
| 98 | friend bool operator==(const Unit& a, const Unit& b) { |
| 99 | return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == |
| 100 | std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); |
| 101 | } |
| 102 | |
| 103 | offset_t offset_begin; |
| 104 | offset_t offset_size; |
| 105 | rva_t rva_begin; |
| 106 | rva_t rva_size; |
| 107 | }; |
| 108 | |
| 109 | // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit |
| 110 | // found, to reduce the number of OffsetToUnit() calls for clustered queries. |
| 111 | class OffsetToRvaCache { |
| 112 | public: |
| 113 | // Embeds |translator| for use. Now object lifetime is tied to |translator| |
| 114 | // lifetime. |
| 115 | explicit OffsetToRvaCache(const AddressTranslator& translator); |
Samuel Huang | 1cec5a7 | 2021-06-01 18:29:53 +0000 | [diff] [blame] | 116 | OffsetToRvaCache(const OffsetToRvaCache&) = delete; |
| 117 | const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 118 | |
| 119 | rva_t Convert(offset_t offset) const; |
| 120 | |
| 121 | private: |
| 122 | const AddressTranslator& translator_; |
| 123 | mutable const AddressTranslator::Unit* cached_unit_ = nullptr; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 124 | }; |
| 125 | |
| 126 | // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit |
| 127 | // found, to reduce the number of RvaToUnit() calls for clustered queries. |
| 128 | class RvaToOffsetCache { |
| 129 | public: |
| 130 | // Embeds |translator| for use. Now object lifetime is tied to |translator| |
| 131 | // lifetime. |
| 132 | explicit RvaToOffsetCache(const AddressTranslator& translator); |
Samuel Huang | 1cec5a7 | 2021-06-01 18:29:53 +0000 | [diff] [blame] | 133 | RvaToOffsetCache(const RvaToOffsetCache&) = delete; |
| 134 | const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 135 | |
| 136 | bool IsValid(rva_t rva) const; |
Samuel Huang | 98dd017 | 2018-10-10 15:48:10 +0000 | [diff] [blame] | 137 | |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 138 | offset_t Convert(rva_t rva) const; |
| 139 | |
| 140 | private: |
| 141 | const AddressTranslator& translator_; |
| 142 | mutable const AddressTranslator::Unit* cached_unit_ = nullptr; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 143 | }; |
| 144 | |
| 145 | enum Status { |
| 146 | kSuccess = 0, |
| 147 | kErrorOverflow, |
| 148 | kErrorBadOverlap, |
| 149 | kErrorBadOverlapDanglingRva, |
| 150 | kErrorFakeOffsetBeginTooLarge, |
| 151 | }; |
| 152 | |
| 153 | AddressTranslator(); |
Samuel Huang | 1cec5a7 | 2021-06-01 18:29:53 +0000 | [diff] [blame] | 154 | AddressTranslator(AddressTranslator&&); |
| 155 | AddressTranslator(const AddressTranslator&) = delete; |
| 156 | const AddressTranslator& operator=(const AddressTranslator&) = delete; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 157 | ~AddressTranslator(); |
| 158 | |
| 159 | // Consumes |units| to populate data in this class. Performs consistency |
| 160 | // checks and overlapping Units. Returns Status to indicate success. |
| 161 | Status Initialize(std::vector<Unit>&& units); |
| 162 | |
| 163 | // Returns the (possibly dangling) RVA corresponding to |offset|, or |
| 164 | // kInvalidRva if not found. |
| 165 | rva_t OffsetToRva(offset_t offset) const; |
| 166 | |
| 167 | // Returns the (possibly fake) offset corresponding to |rva|, or |
| 168 | // kInvalidOffset if not found (i.e., |rva| is non-existent). |
| 169 | offset_t RvaToOffset(rva_t rva) const; |
| 170 | |
| 171 | // For testing. |
| 172 | offset_t fake_offset_begin() const { return fake_offset_begin_; } |
| 173 | |
| 174 | const std::vector<Unit>& units_sorted_by_offset() const { |
| 175 | return units_sorted_by_offset_; |
| 176 | } |
| 177 | |
| 178 | const std::vector<Unit>& units_sorted_by_rva() const { |
| 179 | return units_sorted_by_rva_; |
| 180 | } |
| 181 | |
| 182 | private: |
| 183 | // Helper to find the Unit that contains given |offset| or |rva|. Returns null |
| 184 | // if not found. |
| 185 | const Unit* OffsetToUnit(offset_t offset) const; |
| 186 | const Unit* RvaToUnit(rva_t rva) const; |
| 187 | |
| 188 | // Storage of Units. All offset ranges are non-empty and disjoint. Likewise |
| 189 | // for all RVA ranges. |
| 190 | std::vector<Unit> units_sorted_by_offset_; |
| 191 | std::vector<Unit> units_sorted_by_rva_; |
| 192 | |
| 193 | // Conversion factor to translate between dangling RVAs and fake offsets. |
| 194 | offset_t fake_offset_begin_; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 195 | }; |
| 196 | |
| 197 | } // namespace zucchini |
| 198 | |
| 199 | #endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |