blob: 820c3683540d65f760b4cc45403ed001a7b62ecf [file] [log] [blame]
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +00001//===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
11#define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
12
Mitch Phillips99fa1402017-10-23 20:25:19 +000013#include "llvm/ADT/DenseMap.h"
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000014#include "llvm/BinaryFormat/ELF.h"
Mitch Phillipsc15bdf52017-11-03 20:54:26 +000015#include "llvm/DebugInfo/Symbolize/Symbolize.h"
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000016#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstPrinter.h"
21#include "llvm/MC/MCInstrAnalysis.h"
22#include "llvm/MC/MCInstrDesc.h"
23#include "llvm/MC/MCInstrInfo.h"
24#include "llvm/MC/MCObjectFileInfo.h"
25#include "llvm/MC/MCRegisterInfo.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Object/Binary.h"
28#include "llvm/Object/COFF.h"
29#include "llvm/Object/ELFObjectFile.h"
30#include "llvm/Object/ObjectFile.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/Error.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Support/TargetRegistry.h"
36#include "llvm/Support/TargetSelect.h"
37#include "llvm/Support/raw_ostream.h"
38
39#include <functional>
40#include <set>
41#include <string>
42#include <unordered_map>
43
44namespace llvm {
45namespace cfi_verify {
46
Mitch Phillips3b9ea322017-11-10 21:00:22 +000047struct GraphResult;
48
Mitch Phillipsc15bdf52017-11-03 20:54:26 +000049extern bool IgnoreDWARFFlag;
50
Mitch Phillips3b9ea322017-11-10 21:00:22 +000051enum class CFIProtectionStatus {
52 // This instruction is protected by CFI.
53 PROTECTED,
54 // The instruction is not an indirect control flow instruction, and thus
55 // shouldn't be protected.
56 FAIL_NOT_INDIRECT_CF,
57 // There is a path to the instruction that was unexpected.
58 FAIL_ORPHANS,
59 // There is a path to the instruction from a conditional branch that does not
60 // properly check the destination for this vcall/icall.
61 FAIL_BAD_CONDITIONAL_BRANCH,
62 // The instruction referenced does not exist. This normally indicates an
63 // error in the program, where you try and validate a graph that was created
64 // in a different FileAnalysis object.
65 FAIL_INVALID_INSTRUCTION,
66};
67
68StringRef stringCFIProtectionStatus(CFIProtectionStatus Status);
69
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000070// Disassembler and analysis tool for machine code files. Keeps track of non-
71// sequential control flows, including indirect control flow instructions.
72class FileAnalysis {
73public:
74 // A metadata struct for an instruction.
75 struct Instr {
76 uint64_t VMAddress; // Virtual memory address of this instruction.
77 MCInst Instruction; // Instruction.
78 uint64_t InstructionSize; // Size of this instruction.
79 bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
80 // undefined.
81 };
82
83 // Construct a FileAnalysis from a file path.
84 static Expected<FileAnalysis> Create(StringRef Filename);
85
86 // Construct and take ownership of the supplied object. Do not use this
87 // constructor, prefer to use FileAnalysis::Create instead.
88 FileAnalysis(object::OwningBinary<object::Binary> Binary);
89 FileAnalysis() = delete;
90 FileAnalysis(const FileAnalysis &) = delete;
91 FileAnalysis(FileAnalysis &&Other) = default;
92
93 // Returns the instruction at the provided address. Returns nullptr if there
94 // is no instruction at the provided address.
95 const Instr *getInstruction(uint64_t Address) const;
96
97 // Returns the instruction at the provided adress, dying if the instruction is
98 // not found.
99 const Instr &getInstructionOrDie(uint64_t Address) const;
100
101 // Returns a pointer to the previous/next instruction in sequence,
102 // respectively. Returns nullptr if the next/prev instruction doesn't exist,
103 // or if the provided instruction doesn't exist.
104 const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
105 const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
106
Vlad Tsyrklevich0ee26322017-10-11 23:17:29 +0000107 // Returns whether this instruction is used by CFI to trap the program.
108 bool isCFITrap(const Instr &InstrMeta) const;
109
110 // Returns whether this function can fall through to the next instruction.
111 // Undefined (and bad) instructions cannot fall through, and instruction that
112 // modify the control flow can only fall through if they are conditional
113 // branches or calls.
114 bool canFallThrough(const Instr &InstrMeta) const;
115
116 // Returns the definitive next instruction. This is different from the next
117 // instruction sequentially as it will follow unconditional branches (assuming
118 // they can be resolved at compile time, i.e. not indirect). This method
119 // returns nullptr if the provided instruction does not transfer control flow
120 // to exactly one instruction that is known deterministically at compile time.
121 // Also returns nullptr if the deterministic target does not exist in this
122 // file.
123 const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;
124
125 // Get a list of deterministic control flows that lead to the provided
126 // instruction. This list includes all static control flow cross-references as
127 // well as the previous instruction if it can fall through.
128 std::set<const Instr *>
129 getDirectControlFlowXRefs(const Instr &InstrMeta) const;
130
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000131 // Returns whether this instruction uses a register operand.
132 bool usesRegisterOperand(const Instr &InstrMeta) const;
133
134 // Returns the list of indirect instructions.
135 const std::set<uint64_t> &getIndirectInstructions() const;
136
137 const MCRegisterInfo *getRegisterInfo() const;
138 const MCInstrInfo *getMCInstrInfo() const;
139 const MCInstrAnalysis *getMCInstrAnalysis() const;
140
Mitch Phillips3b9ea322017-11-10 21:00:22 +0000141 // Returns the inlining information for the provided address.
142 Expected<DIInliningInfo> symbolizeInlinedCode(uint64_t Address);
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000143
Mitch Phillips3b9ea322017-11-10 21:00:22 +0000144 // Returns whether the provided Graph represents a protected indirect control
145 // flow instruction in this file.
146 CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const;
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000147
Mitch Phillips02993892017-11-14 22:43:13 +0000148 // Prints an instruction to the provided stream using this object's pretty-
149 // printers.
150 void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const;
151
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000152protected:
153 // Construct a blank object with the provided triple and features. Used in
154 // testing, where a sub class will dependency inject protected methods to
155 // allow analysis of raw binary, without requiring a fully valid ELF file.
156 FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
157
158 // Add an instruction to this object.
159 void addInstruction(const Instr &Instruction);
160
161 // Disassemble and parse the provided bytes into this object. Instruction
162 // address calculation is done relative to the provided SectionAddress.
163 void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
164 uint64_t SectionAddress);
165
166 // Constructs and initialises members required for disassembly.
167 Error initialiseDisassemblyMembers();
168
169 // Parses code sections from the internal object file. Saves them into the
170 // internal members. Should only be called once by Create().
171 Error parseCodeSections();
172
173private:
174 // Members that describe the input file.
175 object::OwningBinary<object::Binary> Binary;
176 const object::ObjectFile *Object = nullptr;
177 Triple ObjectTriple;
178 std::string ArchName;
179 std::string MCPU;
180 const Target *ObjectTarget = nullptr;
181 SubtargetFeatures Features;
182
183 // Members required for disassembly.
184 std::unique_ptr<const MCRegisterInfo> RegisterInfo;
185 std::unique_ptr<const MCAsmInfo> AsmInfo;
186 std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
187 std::unique_ptr<const MCInstrInfo> MII;
188 MCObjectFileInfo MOFI;
189 std::unique_ptr<MCContext> Context;
190 std::unique_ptr<const MCDisassembler> Disassembler;
191 std::unique_ptr<const MCInstrAnalysis> MIA;
192 std::unique_ptr<MCInstPrinter> Printer;
193
Mitch Phillipsc15bdf52017-11-03 20:54:26 +0000194 // Symbolizer used for debug information parsing.
195 std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000196
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000197 // A mapping between the virtual memory address to the instruction metadata
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000198 // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
199 // insertion allocation.
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000200 std::map<uint64_t, Instr> Instructions;
201
202 // Contains a mapping between a specific address, and a list of instructions
203 // that use this address as a branch target (including call instructions).
Mitch Phillips99fa1402017-10-23 20:25:19 +0000204 DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000205
206 // A list of addresses of indirect control flow instructions.
207 std::set<uint64_t> IndirectInstructions;
208};
209
210class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
211public:
212 static char ID;
Mitch Phillipsd9af3832017-10-23 20:54:01 +0000213 std::string Text;
214
215 UnsupportedDisassembly(StringRef Text);
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000216
217 void log(raw_ostream &OS) const override;
218 std::error_code convertToErrorCode() const override;
219};
220
221} // namespace cfi_verify
222} // namespace llvm
223
224#endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H