| // Copyright 2017 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |
| #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |
| |
| #include <stdint.h> |
| |
| #include <tuple> |
| #include <vector> |
| |
| #include "components/zucchini/algorithm.h" |
| #include "components/zucchini/image_utils.h" |
| |
| namespace zucchini { |
| |
| // There are several ways to reason about addresses in an image: |
| // - Offset: Position relative to start of image. |
| // - VA (Virtual Address): Virtual memory address of a loaded image. This is |
| // subject to relocation by the OS. |
| // - RVA (Relative Virtual Address): VA relative to some base address. This is |
| // the preferred way to specify pointers in an image. |
| // |
| // Zucchini is primarily concerned with offsets and RVAs. Executable images like |
| // PE and ELF are organized into sections. Each section specifies offset and RVA |
| // ranges as: |
| // {Offset start, offset size, RVA start, RVA size}. |
| // This constitutes a basic unit to translate between offsets and RVAs. Note: |
| // |offset size| < |RVA size| is possible. For example, the .bss section can can |
| // have zero-filled statically-allocated data that have no corresponding bytes |
| // on image (to save space). This poses a problem for Zucchini, which stores |
| // addresses as offsets: now we'd have "dangling RVAs" that don't map to |
| // offsets! Some ways to handling this are: |
| // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means |
| // some reference targets would escape detection and processing. |
| // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data |
| // must not be read on these fake offsets, which are only valid as target |
| // addresses for reference matching. |
| // As for |RVA size| < |offset size|, the extra portion just gets ignored. |
| // |
| // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to |
| // fake offsets by adding a large value. This value can be chosen as an |
| // exclusive upper bound of all offsets (i.e., image size). This allows them to |
| // be easily detected and processed as a special-case. |
| // TODO(huangs): Investigate option (1), now that the refactored code makes |
| // experimentation easier. |
| // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| |
| // ranges and create "fake" units to accommodate dangling RVAs. Then |
| // AddressTranslator can be simplified. |
| |
| // Virtual Address relative to some base address (RVA). There's distinction |
| // between "valid RVA" and "existent RVA": |
| // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. |
| // - Existent RVA: An RVA that has semantic meaning in an image, and may |
| // translate to an offset in an image or (if a dangling RVA) a fake offset. |
| // All existent RVAs are valid RVAs. |
| using rva_t = uint32_t; |
| // Divide by 2 to match |kOffsetBound|. |
| constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2; |
| constexpr rva_t kInvalidRva = static_cast<rva_t>(-2); |
| |
| // A utility to translate between offsets and RVAs in an image. |
| class AddressTranslator { |
| public: |
| // A basic unit for address translation, roughly maps to a section, but may |
| // be processed (e.g., merged) as an optimization. |
| struct Unit { |
| offset_t offset_end() const { return offset_begin + offset_size; } |
| rva_t rva_end() const { return rva_begin + rva_size; } |
| bool IsEmpty() const { |
| // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed |
| // yet, and once it is then it's empty. |
| // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but |
| // is not empty. |
| return rva_size == 0; |
| } |
| bool CoversOffset(offset_t offset) const { |
| return RangeCovers(offset_begin, offset_size, offset); |
| } |
| bool CoversRva(rva_t rva) const { |
| return RangeCovers(rva_begin, rva_size, rva); |
| } |
| bool CoversDanglingRva(rva_t rva) const { |
| return CoversRva(rva) && rva - rva_begin >= offset_size; |
| } |
| // Assumes valid |offset| (*cannot* be fake offset). |
| rva_t OffsetToRvaUnsafe(offset_t offset) const { |
| return offset - offset_begin + rva_begin; |
| } |
| // Assumes valid |rva| (*can* be danging RVA). |
| offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { |
| rva_t delta = rva - rva_begin; |
| return delta < offset_size ? delta + offset_begin |
| : fake_offset_begin + rva; |
| } |
| bool HasDanglingRva() const { return rva_size > offset_size; } |
| friend bool operator==(const Unit& a, const Unit& b) { |
| return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == |
| std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); |
| } |
| |
| offset_t offset_begin; |
| offset_t offset_size; |
| rva_t rva_begin; |
| rva_t rva_size; |
| }; |
| |
| // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit |
| // found, to reduce the number of OffsetToUnit() calls for clustered queries. |
| class OffsetToRvaCache { |
| public: |
| // Embeds |translator| for use. Now object lifetime is tied to |translator| |
| // lifetime. |
| explicit OffsetToRvaCache(const AddressTranslator& translator); |
| OffsetToRvaCache(const OffsetToRvaCache&) = delete; |
| const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete; |
| |
| rva_t Convert(offset_t offset) const; |
| |
| private: |
| const AddressTranslator& translator_; |
| mutable const AddressTranslator::Unit* cached_unit_ = nullptr; |
| }; |
| |
| // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit |
| // found, to reduce the number of RvaToUnit() calls for clustered queries. |
| class RvaToOffsetCache { |
| public: |
| // Embeds |translator| for use. Now object lifetime is tied to |translator| |
| // lifetime. |
| explicit RvaToOffsetCache(const AddressTranslator& translator); |
| RvaToOffsetCache(const RvaToOffsetCache&) = delete; |
| const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete; |
| |
| bool IsValid(rva_t rva) const; |
| |
| offset_t Convert(rva_t rva) const; |
| |
| private: |
| const AddressTranslator& translator_; |
| mutable const AddressTranslator::Unit* cached_unit_ = nullptr; |
| }; |
| |
| enum Status { |
| kSuccess = 0, |
| kErrorOverflow, |
| kErrorBadOverlap, |
| kErrorBadOverlapDanglingRva, |
| kErrorFakeOffsetBeginTooLarge, |
| }; |
| |
| AddressTranslator(); |
| AddressTranslator(AddressTranslator&&); |
| AddressTranslator(const AddressTranslator&) = delete; |
| const AddressTranslator& operator=(const AddressTranslator&) = delete; |
| ~AddressTranslator(); |
| |
| // Consumes |units| to populate data in this class. Performs consistency |
| // checks and overlapping Units. Returns Status to indicate success. |
| Status Initialize(std::vector<Unit>&& units); |
| |
| // Returns the (possibly dangling) RVA corresponding to |offset|, or |
| // kInvalidRva if not found. |
| rva_t OffsetToRva(offset_t offset) const; |
| |
| // Returns the (possibly fake) offset corresponding to |rva|, or |
| // kInvalidOffset if not found (i.e., |rva| is non-existent). |
| offset_t RvaToOffset(rva_t rva) const; |
| |
| // For testing. |
| offset_t fake_offset_begin() const { return fake_offset_begin_; } |
| |
| const std::vector<Unit>& units_sorted_by_offset() const { |
| return units_sorted_by_offset_; |
| } |
| |
| const std::vector<Unit>& units_sorted_by_rva() const { |
| return units_sorted_by_rva_; |
| } |
| |
| private: |
| // Helper to find the Unit that contains given |offset| or |rva|. Returns null |
| // if not found. |
| const Unit* OffsetToUnit(offset_t offset) const; |
| const Unit* RvaToUnit(rva_t rva) const; |
| |
| // Storage of Units. All offset ranges are non-empty and disjoint. Likewise |
| // for all RVA ranges. |
| std::vector<Unit> units_sorted_by_offset_; |
| std::vector<Unit> units_sorted_by_rva_; |
| |
| // Conversion factor to translate between dangling RVAs and fake offsets. |
| offset_t fake_offset_begin_; |
| }; |
| |
| } // namespace zucchini |
| |
| #endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ |