Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ |
| 6 | #define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ |
| 7 | |
| 8 | #include <stddef.h> |
| 9 | |
| 10 | #include <memory> |
| 11 | #include <string> |
| 12 | #include <vector> |
| 13 | |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 14 | #include "components/zucchini/buffer_view.h" |
| 15 | #include "components/zucchini/image_utils.h" |
| 16 | |
| 17 | namespace zucchini { |
| 18 | |
Etienne Pierre-doray | e57c4e6 | 2018-08-10 17:44:37 +0000 | [diff] [blame] | 19 | // A vacuous ReferenceReader that produces no references. |
| 20 | class EmptyReferenceReader : public ReferenceReader { |
| 21 | public: |
Anton Bikineev | 1a96551 | 2021-05-15 22:35:36 +0000 | [diff] [blame] | 22 | absl::optional<Reference> GetNext() override; |
Etienne Pierre-doray | e57c4e6 | 2018-08-10 17:44:37 +0000 | [diff] [blame] | 23 | }; |
| 24 | |
Samuel Huang | 431d119 | 2018-12-31 19:59:22 +0000 | [diff] [blame] | 25 | // A vacuous EmptyReferenceWriter that does not write. |
| 26 | class EmptyReferenceWriter : public ReferenceWriter { |
| 27 | public: |
| 28 | void PutNext(Reference reference) override; |
| 29 | }; |
| 30 | |
Peter Collingbourne | 87dabe1 | 2018-06-13 22:17:49 +0000 | [diff] [blame] | 31 | // Disassembler needs to be declared before ReferenceGroup because the latter |
| 32 | // contains member pointers based on the former, and we use a compiler flag, |
| 33 | // -fcomplete-member-pointers, which enforces that member pointer base types are |
| 34 | // complete. This flag helps prevent us from running into problems in the |
| 35 | // Microsoft C++ ABI (see https://crbug.com/847724). |
| 36 | |
| 37 | class ReferenceGroup; |
| 38 | |
| 39 | // A Disassembler is used to encapsulate architecture specific operations, to: |
| 40 | // - Describe types of references found in the architecture using traits. |
| 41 | // - Extract references contained in an image file. |
| 42 | // - Correct target for some references. |
| 43 | class Disassembler { |
| 44 | public: |
| 45 | // Attempts to parse |image| and create an architecture-specifc Disassembler, |
| 46 | // as determined by DIS, which is inherited from Disassembler. Returns an |
| 47 | // instance of DIS if successful, and null otherwise. |
| 48 | template <class DIS> |
| 49 | static std::unique_ptr<DIS> Make(ConstBufferView image) { |
| 50 | auto disasm = std::make_unique<DIS>(); |
| 51 | if (!disasm->Parse(image)) |
| 52 | return nullptr; |
| 53 | return disasm; |
| 54 | } |
| 55 | |
Samuel Huang | f137bf4 | 2021-08-13 15:42:26 +0000 | [diff] [blame] | 56 | Disassembler(const Disassembler&) = delete; |
| 57 | const Disassembler& operator=(const Disassembler&) = delete; |
Peter Collingbourne | 87dabe1 | 2018-06-13 22:17:49 +0000 | [diff] [blame] | 58 | virtual ~Disassembler(); |
| 59 | |
| 60 | // Returns the type of executable handled by the Disassembler. |
| 61 | virtual ExecutableType GetExeType() const = 0; |
| 62 | |
| 63 | // Returns a more detailed description of the executable type. |
| 64 | virtual std::string GetExeTypeString() const = 0; |
| 65 | |
| 66 | // Creates and returns a vector that contains all groups of references. |
| 67 | // Groups must be aggregated by pool. |
| 68 | virtual std::vector<ReferenceGroup> MakeReferenceGroups() const = 0; |
| 69 | |
| 70 | ConstBufferView image() const { return image_; } |
| 71 | size_t size() const { return image_.size(); } |
| 72 | |
| 73 | int num_equivalence_iterations() const { return num_equivalence_iterations_; } |
| 74 | |
| 75 | protected: |
| 76 | explicit Disassembler(int num_equivalence_iterations); |
| 77 | |
| 78 | // Parses |image| and initializes internal states. Returns true on success. |
| 79 | // This must be called once and before any other operation. |
| 80 | virtual bool Parse(ConstBufferView image) = 0; |
| 81 | |
| 82 | // Raw image data. After Parse(), a Disassembler should shrink this to contain |
| 83 | // only the portion containing the executable file it recognizes. |
| 84 | ConstBufferView image_; |
| 85 | |
| 86 | // The number of iterations to run for equivalence map generation. This should |
| 87 | // roughly be the max length of reference indirection chains. |
| 88 | int num_equivalence_iterations_; |
Peter Collingbourne | 87dabe1 | 2018-06-13 22:17:49 +0000 | [diff] [blame] | 89 | }; |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 90 | |
| 91 | // A ReferenceGroup is associated with a specific |type| and has convenience |
| 92 | // methods to obtain readers and writers for that type. A ReferenceGroup does |
| 93 | // not store references; it is a lightweight class that communicates with the |
| 94 | // disassembler to operate on them. |
| 95 | class ReferenceGroup { |
| 96 | public: |
| 97 | // Member function pointer used to obtain a ReferenceReader. |
| 98 | using ReaderFactory = std::unique_ptr<ReferenceReader> ( |
| 99 | Disassembler::*)(offset_t lower, offset_t upper); |
| 100 | |
| 101 | // Member function pointer used to obtain a ReferenceWriter. |
| 102 | using WriterFactory = std::unique_ptr<ReferenceWriter> (Disassembler::*)( |
| 103 | MutableBufferView image); |
| 104 | |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 105 | // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be |
| 106 | // identical to GeneratorFactory and ReceptorFactory, but they must be |
| 107 | // convertible. As a result, they can be pointer to member function of a |
| 108 | // derived Disassembler. |
| 109 | template <class RefinedReaderFactory, class RefinedWriterFactory> |
| 110 | ReferenceGroup(ReferenceTypeTraits traits, |
| 111 | RefinedReaderFactory reader_factory, |
| 112 | RefinedWriterFactory writer_factory) |
| 113 | : traits_(traits), |
| 114 | reader_factory_(static_cast<ReaderFactory>(reader_factory)), |
| 115 | writer_factory_(static_cast<WriterFactory>(writer_factory)) {} |
| 116 | |
| 117 | // Returns a reader for all references in the binary. |
| 118 | // Invalidates any other writer or reader previously obtained for |disasm|. |
| 119 | std::unique_ptr<ReferenceReader> GetReader(Disassembler* disasm) const; |
| 120 | |
| 121 | // Returns a reader for references whose bytes are entirely contained in |
| 122 | // |[lower, upper)|. |
| 123 | // Invalidates any other writer or reader previously obtained for |disasm|. |
| 124 | std::unique_ptr<ReferenceReader> GetReader(offset_t lower, |
| 125 | offset_t upper, |
| 126 | Disassembler* disasm) const; |
| 127 | |
| 128 | // Returns a writer for references in |image|, assuming that |image| was the |
| 129 | // same one initially parsed by |disasm|. |
| 130 | // Invalidates any other writer or reader previously obtained for |disasm|. |
| 131 | std::unique_ptr<ReferenceWriter> GetWriter(MutableBufferView image, |
| 132 | Disassembler* disasm) const; |
| 133 | |
| 134 | // Returns traits describing the reference type. |
| 135 | const ReferenceTypeTraits& traits() const { return traits_; } |
| 136 | |
| 137 | // Shorthand for traits().width. |
| 138 | offset_t width() const { return traits().width; } |
| 139 | |
| 140 | // Shorthand for traits().type_tag. |
| 141 | TypeTag type_tag() const { return traits().type_tag; } |
| 142 | |
| 143 | // Shorthand for traits().pool_tag. |
| 144 | PoolTag pool_tag() const { return traits().pool_tag; } |
| 145 | |
| 146 | private: |
| 147 | ReferenceTypeTraits traits_; |
| 148 | ReaderFactory reader_factory_ = nullptr; |
| 149 | WriterFactory writer_factory_ = nullptr; |
| 150 | }; |
| 151 | |
Samuel Huang | 06f1ae9 | 2018-03-13 18:19:34 +0000 | [diff] [blame] | 152 | } // namespace zucchini |
| 153 | |
| 154 | #endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ |