blob: e0eecb037c37193e6c15d4dd7e8f1b06805b0060 [file] [log] [blame]
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +00001//===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
11#define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
12
Mitch Phillips99fa1402017-10-23 20:25:19 +000013#include "llvm/ADT/DenseMap.h"
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000014#include "llvm/BinaryFormat/ELF.h"
Mitch Phillipsc15bdf52017-11-03 20:54:26 +000015#include "llvm/DebugInfo/Symbolize/Symbolize.h"
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000016#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstPrinter.h"
21#include "llvm/MC/MCInstrAnalysis.h"
22#include "llvm/MC/MCInstrDesc.h"
23#include "llvm/MC/MCInstrInfo.h"
24#include "llvm/MC/MCObjectFileInfo.h"
25#include "llvm/MC/MCRegisterInfo.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Object/Binary.h"
28#include "llvm/Object/COFF.h"
29#include "llvm/Object/ELFObjectFile.h"
30#include "llvm/Object/ObjectFile.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/Error.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Support/TargetRegistry.h"
36#include "llvm/Support/TargetSelect.h"
37#include "llvm/Support/raw_ostream.h"
38
39#include <functional>
40#include <set>
41#include <string>
42#include <unordered_map>
43
44namespace llvm {
45namespace cfi_verify {
46
Mitch Phillipsc15bdf52017-11-03 20:54:26 +000047extern bool IgnoreDWARFFlag;
48
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000049// Disassembler and analysis tool for machine code files. Keeps track of non-
50// sequential control flows, including indirect control flow instructions.
51class FileAnalysis {
52public:
53 // A metadata struct for an instruction.
54 struct Instr {
55 uint64_t VMAddress; // Virtual memory address of this instruction.
56 MCInst Instruction; // Instruction.
57 uint64_t InstructionSize; // Size of this instruction.
58 bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
59 // undefined.
60 };
61
62 // Construct a FileAnalysis from a file path.
63 static Expected<FileAnalysis> Create(StringRef Filename);
64
65 // Construct and take ownership of the supplied object. Do not use this
66 // constructor, prefer to use FileAnalysis::Create instead.
67 FileAnalysis(object::OwningBinary<object::Binary> Binary);
68 FileAnalysis() = delete;
69 FileAnalysis(const FileAnalysis &) = delete;
70 FileAnalysis(FileAnalysis &&Other) = default;
71
Mitch Phillips5ff01cd2017-10-25 21:21:16 +000072 // Check whether the provided instruction is CFI protected in this file.
73 // Returns false if this instruction doesn't exist in this file, if it's not
74 // an indirect control flow instruction, or isn't CFI protected. Returns true
75 // otherwise.
76 bool isIndirectInstructionCFIProtected(uint64_t Address) const;
77
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +000078 // Returns the instruction at the provided address. Returns nullptr if there
79 // is no instruction at the provided address.
80 const Instr *getInstruction(uint64_t Address) const;
81
82 // Returns the instruction at the provided adress, dying if the instruction is
83 // not found.
84 const Instr &getInstructionOrDie(uint64_t Address) const;
85
86 // Returns a pointer to the previous/next instruction in sequence,
87 // respectively. Returns nullptr if the next/prev instruction doesn't exist,
88 // or if the provided instruction doesn't exist.
89 const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
90 const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
91
Vlad Tsyrklevich0ee26322017-10-11 23:17:29 +000092 // Returns whether this instruction is used by CFI to trap the program.
93 bool isCFITrap(const Instr &InstrMeta) const;
94
95 // Returns whether this function can fall through to the next instruction.
96 // Undefined (and bad) instructions cannot fall through, and instruction that
97 // modify the control flow can only fall through if they are conditional
98 // branches or calls.
99 bool canFallThrough(const Instr &InstrMeta) const;
100
101 // Returns the definitive next instruction. This is different from the next
102 // instruction sequentially as it will follow unconditional branches (assuming
103 // they can be resolved at compile time, i.e. not indirect). This method
104 // returns nullptr if the provided instruction does not transfer control flow
105 // to exactly one instruction that is known deterministically at compile time.
106 // Also returns nullptr if the deterministic target does not exist in this
107 // file.
108 const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;
109
110 // Get a list of deterministic control flows that lead to the provided
111 // instruction. This list includes all static control flow cross-references as
112 // well as the previous instruction if it can fall through.
113 std::set<const Instr *>
114 getDirectControlFlowXRefs(const Instr &InstrMeta) const;
115
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000116 // Returns whether this instruction uses a register operand.
117 bool usesRegisterOperand(const Instr &InstrMeta) const;
118
119 // Returns the list of indirect instructions.
120 const std::set<uint64_t> &getIndirectInstructions() const;
121
122 const MCRegisterInfo *getRegisterInfo() const;
123 const MCInstrInfo *getMCInstrInfo() const;
124 const MCInstrAnalysis *getMCInstrAnalysis() const;
Mitch Phillipsc15bdf52017-11-03 20:54:26 +0000125 symbolize::LLVMSymbolizer &getSymbolizer();
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000126
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000127 // Returns true if this class is using DWARF line tables for elimination.
128 bool hasLineTableInfo() const;
129
130 // Returns the line table information for the range {Address +-
131 // DWARFSearchRange}. Returns an empty table if the address has no valid line
132 // table information, or this analysis object has DWARF handling disabled.
133 DILineInfoTable getLineInfoForAddressRange(uint64_t Address);
134
135 // Returns whether the provided address has valid line information for
136 // instructions in the range of Address +- DWARFSearchRange.
137 bool hasValidLineInfoForAddressRange(uint64_t Address);
138
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000139protected:
140 // Construct a blank object with the provided triple and features. Used in
141 // testing, where a sub class will dependency inject protected methods to
142 // allow analysis of raw binary, without requiring a fully valid ELF file.
143 FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
144
145 // Add an instruction to this object.
146 void addInstruction(const Instr &Instruction);
147
148 // Disassemble and parse the provided bytes into this object. Instruction
149 // address calculation is done relative to the provided SectionAddress.
150 void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
151 uint64_t SectionAddress);
152
153 // Constructs and initialises members required for disassembly.
154 Error initialiseDisassemblyMembers();
155
156 // Parses code sections from the internal object file. Saves them into the
157 // internal members. Should only be called once by Create().
158 Error parseCodeSections();
159
160private:
161 // Members that describe the input file.
162 object::OwningBinary<object::Binary> Binary;
163 const object::ObjectFile *Object = nullptr;
164 Triple ObjectTriple;
165 std::string ArchName;
166 std::string MCPU;
167 const Target *ObjectTarget = nullptr;
168 SubtargetFeatures Features;
169
170 // Members required for disassembly.
171 std::unique_ptr<const MCRegisterInfo> RegisterInfo;
172 std::unique_ptr<const MCAsmInfo> AsmInfo;
173 std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
174 std::unique_ptr<const MCInstrInfo> MII;
175 MCObjectFileInfo MOFI;
176 std::unique_ptr<MCContext> Context;
177 std::unique_ptr<const MCDisassembler> Disassembler;
178 std::unique_ptr<const MCInstrAnalysis> MIA;
179 std::unique_ptr<MCInstPrinter> Printer;
180
Mitch Phillipsc15bdf52017-11-03 20:54:26 +0000181 // Symbolizer used for debug information parsing.
182 std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000183
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000184 // A mapping between the virtual memory address to the instruction metadata
Mitch Phillips7db6f7a2017-10-31 23:20:05 +0000185 // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
186 // insertion allocation.
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000187 std::map<uint64_t, Instr> Instructions;
188
189 // Contains a mapping between a specific address, and a list of instructions
190 // that use this address as a branch target (including call instructions).
Mitch Phillips99fa1402017-10-23 20:25:19 +0000191 DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000192
193 // A list of addresses of indirect control flow instructions.
194 std::set<uint64_t> IndirectInstructions;
195};
196
197class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
198public:
199 static char ID;
Mitch Phillipsd9af3832017-10-23 20:54:01 +0000200 std::string Text;
201
202 UnsupportedDisassembly(StringRef Text);
Vlad Tsyrklevich89c3c8c2017-10-11 20:35:01 +0000203
204 void log(raw_ostream &OS) const override;
205 std::error_code convertToErrorCode() const override;
206};
207
208} // namespace cfi_verify
209} // namespace llvm
210
211#endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H