blob: 03390c2b586e157471b609d2efac0e4c48735eae [file] [log] [blame]
Benjamin Kramer685a2502011-07-20 19:37:35 +00001//===-- MCFunction.cpp ----------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the algorithm to break down a region of machine code
11// into basic blocks and try to reconstruct a CFG from it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "MCFunction.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/MC/MCDisassembler.h"
18#include "llvm/MC/MCInst.h"
19#include "llvm/MC/MCInstPrinter.h"
Benjamin Kramer41ab14b2011-08-08 18:56:44 +000020#include "llvm/MC/MCInstrAnalysis.h"
Benjamin Kramer685a2502011-07-20 19:37:35 +000021#include "llvm/MC/MCInstrDesc.h"
22#include "llvm/MC/MCInstrInfo.h"
23#include "llvm/Support/MemoryObject.h"
24#include "llvm/Support/raw_ostream.h"
25#include "llvm/Support/system_error.h"
26#include <set>
27using namespace llvm;
28
29MCFunction
30MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
31 const MemoryObject &Region, uint64_t Start,
Benjamin Kramer41ab14b2011-08-08 18:56:44 +000032 uint64_t End, const MCInstrAnalysis *Ana,
Benjamin Kramer0b8b7712011-09-19 17:56:04 +000033 raw_ostream &DebugOut,
34 SmallVectorImpl<uint64_t> &Calls) {
35 std::vector<MCDecodedInst> Instructions;
Benjamin Kramer685a2502011-07-20 19:37:35 +000036 std::set<uint64_t> Splits;
37 Splits.insert(Start);
Benjamin Kramer685a2502011-07-20 19:37:35 +000038 uint64_t Size;
39
Benjamin Kramer685a2502011-07-20 19:37:35 +000040 MCFunction f(Name);
41
Benjamin Kramer0b8b7712011-09-19 17:56:04 +000042 {
43 DenseSet<uint64_t> VisitedInsts;
44 SmallVector<uint64_t, 16> WorkList;
45 WorkList.push_back(Start);
46 // Disassemble code and gather basic block split points.
47 while (!WorkList.empty()) {
48 uint64_t Index = WorkList.pop_back_val();
49 if (VisitedInsts.find(Index) != VisitedInsts.end())
50 continue;
51
52 for (;Index < End; Index += Size) {
53 MCInst Inst;
54
55 if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
56 if (Ana->isBranch(Inst)) {
57 uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
58 if (targ != -1ULL && targ == Index+Size) {
59 Instructions.push_back(MCDecodedInst(Index, Size, Inst));
60 VisitedInsts.insert(Index);
61 continue;
62 }
63 if (targ != -1ULL) {
64 Splits.insert(targ);
65 WorkList.push_back(targ);
66 WorkList.push_back(Index+Size);
67 }
68 Splits.insert(Index+Size);
69 Instructions.push_back(MCDecodedInst(Index, Size, Inst));
70 VisitedInsts.insert(Index);
71 break;
72 } else if (Ana->isReturn(Inst)) {
73 Splits.insert(Index+Size);
74 Instructions.push_back(MCDecodedInst(Index, Size, Inst));
75 VisitedInsts.insert(Index);
76 break;
77 } else if (Ana->isCall(Inst)) {
78 uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
79 if (targ != -1ULL && targ != Index+Size) {
80 Calls.push_back(targ);
81 }
82 }
83
84 Instructions.push_back(MCDecodedInst(Index, Size, Inst));
85 VisitedInsts.insert(Index);
86 } else {
87 VisitedInsts.insert(Index);
88 errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
89 if (Size == 0)
90 Size = 1; // skip illegible bytes
91 }
92 }
93 }
94 }
95
96 std::sort(Instructions.begin(), Instructions.end());
97
98 // Create basic blocks.
Benjamin Kramer685a2502011-07-20 19:37:35 +000099 unsigned ii = 0, ie = Instructions.size();
100 for (std::set<uint64_t>::iterator spi = Splits.begin(),
Benjamin Kramer0b8b7712011-09-19 17:56:04 +0000101 spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
Benjamin Kramer685a2502011-07-20 19:37:35 +0000102 MCBasicBlock BB;
Benjamin Kramer0b8b7712011-09-19 17:56:04 +0000103 uint64_t BlockEnd = *llvm::next(spi);
Benjamin Kramer685a2502011-07-20 19:37:35 +0000104 // Add instructions to the BB.
105 for (; ii != ie; ++ii) {
106 if (Instructions[ii].Address < *spi ||
107 Instructions[ii].Address >= BlockEnd)
108 break;
109 BB.addInst(Instructions[ii]);
110 }
111 f.addBlock(*spi, BB);
112 }
113
Benjamin Kramer0b8b7712011-09-19 17:56:04 +0000114 std::sort(f.Blocks.begin(), f.Blocks.end());
115
Benjamin Kramer685a2502011-07-20 19:37:35 +0000116 // Calculate successors of each block.
117 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
118 MCBasicBlock &BB = i->second;
119 if (BB.getInsts().empty()) continue;
120 const MCDecodedInst &Inst = BB.getInsts().back();
Benjamin Kramer685a2502011-07-20 19:37:35 +0000121
Benjamin Kramer41ab14b2011-08-08 18:56:44 +0000122 if (Ana->isBranch(Inst.Inst)) {
123 uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
124 if (targ == -1ULL) {
Benjamin Kramer685a2502011-07-20 19:37:35 +0000125 // Indirect branch. Bail and add all blocks of the function as a
126 // successor.
127 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
Benjamin Kramer0b8b7712011-09-19 17:56:04 +0000128 BB.addSucc(i->first);
Benjamin Kramer41ab14b2011-08-08 18:56:44 +0000129 } else if (targ != Inst.Address+Inst.Size)
Benjamin Kramer0b8b7712011-09-19 17:56:04 +0000130 BB.addSucc(targ);
Benjamin Kramer41ab14b2011-08-08 18:56:44 +0000131 // Conditional branches can also fall through to the next block.
132 if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
Benjamin Kramer0b8b7712011-09-19 17:56:04 +0000133 BB.addSucc(llvm::next(i)->first);
Benjamin Kramer685a2502011-07-20 19:37:35 +0000134 } else {
135 // No branch. Fall through to the next block.
Benjamin Kramer41ab14b2011-08-08 18:56:44 +0000136 if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
Benjamin Kramer0b8b7712011-09-19 17:56:04 +0000137 BB.addSucc(llvm::next(i)->first);
Benjamin Kramer685a2502011-07-20 19:37:35 +0000138 }
139 }
140
141 return f;
142}