blob: ce81f8bfbe3b46cdbd9c4a860fea129352d956a0 [file] [log] [blame]
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +00001//===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
11#define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
12
Mitch Phillips99fa1402017-10-23 20:25:19 +000013#include "llvm/ADT/DenseMap.h"
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000014#include "llvm/BinaryFormat/ELF.h"
Mitch Phillipsc15bdf52017-11-03 20:54:26 +000015#include "llvm/DebugInfo/Symbolize/Symbolize.h"
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000016#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstPrinter.h"
21#include "llvm/MC/MCInstrAnalysis.h"
22#include "llvm/MC/MCInstrDesc.h"
23#include "llvm/MC/MCInstrInfo.h"
24#include "llvm/MC/MCObjectFileInfo.h"
25#include "llvm/MC/MCRegisterInfo.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Object/Binary.h"
28#include "llvm/Object/COFF.h"
29#include "llvm/Object/ELFObjectFile.h"
30#include "llvm/Object/ObjectFile.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/Error.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Support/TargetRegistry.h"
36#include "llvm/Support/TargetSelect.h"
37#include "llvm/Support/raw_ostream.h"
38
39#include <functional>
40#include <set>
41#include <string>
42#include <unordered_map>
43
44namespace llvm {
45namespace cfi_verify {
46
Mitch Phillips3b9ea322017-11-10 21:00:22 +000047struct GraphResult;
48
Mitch Phillipsc15bdf52017-11-03 20:54:26 +000049extern bool IgnoreDWARFFlag;
50
Mitch Phillips3b9ea322017-11-10 21:00:22 +000051enum class CFIProtectionStatus {
52 // This instruction is protected by CFI.
53 PROTECTED,
54 // The instruction is not an indirect control flow instruction, and thus
55 // shouldn't be protected.
56 FAIL_NOT_INDIRECT_CF,
57 // There is a path to the instruction that was unexpected.
58 FAIL_ORPHANS,
59 // There is a path to the instruction from a conditional branch that does not
60 // properly check the destination for this vcall/icall.
61 FAIL_BAD_CONDITIONAL_BRANCH,
Mitch Phillips2e7be2a2017-11-15 00:35:26 +000062 // One of the operands of the indirect CF instruction is modified between the
63 // CFI-check and execution.
64 FAIL_REGISTER_CLOBBERED,
Mitch Phillips3b9ea322017-11-10 21:00:22 +000065 // The instruction referenced does not exist. This normally indicates an
66 // error in the program, where you try and validate a graph that was created
67 // in a different FileAnalysis object.
68 FAIL_INVALID_INSTRUCTION,
69};
70
71StringRef stringCFIProtectionStatus(CFIProtectionStatus Status);
72
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000073// Disassembler and analysis tool for machine code files. Keeps track of non-
74// sequential control flows, including indirect control flow instructions.
75class FileAnalysis {
76public:
77 // A metadata struct for an instruction.
78 struct Instr {
79 uint64_t VMAddress; // Virtual memory address of this instruction.
80 MCInst Instruction; // Instruction.
81 uint64_t InstructionSize; // Size of this instruction.
82 bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
83 // undefined.
84 };
85
86 // Construct a FileAnalysis from a file path.
87 static Expected<FileAnalysis> Create(StringRef Filename);
88
89 // Construct and take ownership of the supplied object. Do not use this
90 // constructor, prefer to use FileAnalysis::Create instead.
91 FileAnalysis(object::OwningBinary<object::Binary> Binary);
92 FileAnalysis() = delete;
93 FileAnalysis(const FileAnalysis &) = delete;
94 FileAnalysis(FileAnalysis &&Other) = default;
95
96 // Returns the instruction at the provided address. Returns nullptr if there
97 // is no instruction at the provided address.
98 const Instr *getInstruction(uint64_t Address) const;
99
100 // Returns the instruction at the provided adress, dying if the instruction is
101 // not found.
102 const Instr &getInstructionOrDie(uint64_t Address) const;
103
104 // Returns a pointer to the previous/next instruction in sequence,
105 // respectively. Returns nullptr if the next/prev instruction doesn't exist,
106 // or if the provided instruction doesn't exist.
107 const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
108 const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
109
Vlad Tsyrklevich0ee26322017-10-11 23:17:29 +0000110 // Returns whether this instruction is used by CFI to trap the program.
111 bool isCFITrap(const Instr &InstrMeta) const;
112
113 // Returns whether this function can fall through to the next instruction.
114 // Undefined (and bad) instructions cannot fall through, and instruction that
115 // modify the control flow can only fall through if they are conditional
116 // branches or calls.
117 bool canFallThrough(const Instr &InstrMeta) const;
118
119 // Returns the definitive next instruction. This is different from the next
120 // instruction sequentially as it will follow unconditional branches (assuming
121 // they can be resolved at compile time, i.e. not indirect). This method
122 // returns nullptr if the provided instruction does not transfer control flow
123 // to exactly one instruction that is known deterministically at compile time.
124 // Also returns nullptr if the deterministic target does not exist in this
125 // file.
126 const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;
127
128 // Get a list of deterministic control flows that lead to the provided
129 // instruction. This list includes all static control flow cross-references as
130 // well as the previous instruction if it can fall through.
131 std::set<const Instr *>
132 getDirectControlFlowXRefs(const Instr &InstrMeta) const;
133
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000134 // Returns whether this instruction uses a register operand.
135 bool usesRegisterOperand(const Instr &InstrMeta) const;
136
137 // Returns the list of indirect instructions.
138 const std::set<uint64_t> &getIndirectInstructions() const;
139
140 const MCRegisterInfo *getRegisterInfo() const;
141 const MCInstrInfo *getMCInstrInfo() const;
142 const MCInstrAnalysis *getMCInstrAnalysis() const;
143
Mitch Phillips3b9ea322017-11-10 21:00:22 +0000144 // Returns the inlining information for the provided address.
145 Expected<DIInliningInfo> symbolizeInlinedCode(uint64_t Address);
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000146
Mitch Phillips3b9ea322017-11-10 21:00:22 +0000147 // Returns whether the provided Graph represents a protected indirect control
148 // flow instruction in this file.
149 CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const;
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000150
Mitch Phillips2e7be2a2017-11-15 00:35:26 +0000151 // Returns the first place the operand register is clobbered between the CFI-
152 // check and the indirect CF instruction execution. If the register is not
153 // modified, returns the address of the indirect CF instruction. The result is
154 // undefined if the provided graph does not fall under either the
155 // FAIL_REGISTER_CLOBBERED or PROTECTED status (see CFIProtectionStatus).
156 uint64_t indirectCFOperandClobber(const GraphResult& Graph) const;
157
Mitch Phillips02993892017-11-14 22:43:13 +0000158 // Prints an instruction to the provided stream using this object's pretty-
159 // printers.
160 void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const;
161
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000162protected:
163 // Construct a blank object with the provided triple and features. Used in
164 // testing, where a sub class will dependency inject protected methods to
165 // allow analysis of raw binary, without requiring a fully valid ELF file.
166 FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
167
168 // Add an instruction to this object.
169 void addInstruction(const Instr &Instruction);
170
171 // Disassemble and parse the provided bytes into this object. Instruction
172 // address calculation is done relative to the provided SectionAddress.
173 void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
174 uint64_t SectionAddress);
175
176 // Constructs and initialises members required for disassembly.
177 Error initialiseDisassemblyMembers();
178
179 // Parses code sections from the internal object file. Saves them into the
180 // internal members. Should only be called once by Create().
181 Error parseCodeSections();
182
183private:
184 // Members that describe the input file.
185 object::OwningBinary<object::Binary> Binary;
186 const object::ObjectFile *Object = nullptr;
187 Triple ObjectTriple;
188 std::string ArchName;
189 std::string MCPU;
190 const Target *ObjectTarget = nullptr;
191 SubtargetFeatures Features;
192
193 // Members required for disassembly.
194 std::unique_ptr<const MCRegisterInfo> RegisterInfo;
195 std::unique_ptr<const MCAsmInfo> AsmInfo;
196 std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
197 std::unique_ptr<const MCInstrInfo> MII;
198 MCObjectFileInfo MOFI;
199 std::unique_ptr<MCContext> Context;
200 std::unique_ptr<const MCDisassembler> Disassembler;
201 std::unique_ptr<const MCInstrAnalysis> MIA;
202 std::unique_ptr<MCInstPrinter> Printer;
203
Mitch Phillipsc15bdf52017-11-03 20:54:26 +0000204 // Symbolizer used for debug information parsing.
205 std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000206
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000207 // A mapping between the virtual memory address to the instruction metadata
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000208 // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
209 // insertion allocation.
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000210 std::map<uint64_t, Instr> Instructions;
211
212 // Contains a mapping between a specific address, and a list of instructions
213 // that use this address as a branch target (including call instructions).
Mitch Phillips99fa1402017-10-23 20:25:19 +0000214 DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000215
216 // A list of addresses of indirect control flow instructions.
217 std::set<uint64_t> IndirectInstructions;
218};
219
220class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
221public:
222 static char ID;
Mitch Phillipsd9af3832017-10-23 20:54:01 +0000223 std::string Text;
224
225 UnsupportedDisassembly(StringRef Text);
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000226
227 void log(raw_ostream &OS) const override;
228 std::error_code convertToErrorCode() const override;
229};
230
231} // namespace cfi_verify
232} // namespace llvm
233
234#endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H