Vlad Tsyrklevich | e344601 | 2018-04-04 01:21:16 +0000 | [diff] [blame] | 1 | //===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame^] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Vlad Tsyrklevich | e344601 | 2018-04-04 01:21:16 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The ShadowCallStack pass instruments function prologs/epilogs to check that |
| 10 | // the return address has not been corrupted during the execution of the |
| 11 | // function. The return address is stored in a 'shadow call stack' addressed |
| 12 | // using the %gs segment register. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "X86.h" |
| 17 | #include "X86InstrBuilder.h" |
| 18 | #include "X86InstrInfo.h" |
| 19 | #include "X86Subtarget.h" |
| 20 | |
| 21 | #include "llvm/CodeGen/MachineFunction.h" |
| 22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 24 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 26 | #include "llvm/CodeGen/Passes.h" |
Vlad Tsyrklevich | b324733 | 2018-04-04 01:34:42 +0000 | [diff] [blame] | 27 | #include "llvm/CodeGen/TargetInstrInfo.h" |
Vlad Tsyrklevich | e344601 | 2018-04-04 01:21:16 +0000 | [diff] [blame] | 28 | #include "llvm/Pass.h" |
| 29 | #include "llvm/Support/raw_ostream.h" |
Vlad Tsyrklevich | e344601 | 2018-04-04 01:21:16 +0000 | [diff] [blame] | 30 | |
| 31 | using namespace llvm; |
| 32 | |
Vlad Tsyrklevich | e344601 | 2018-04-04 01:21:16 +0000 | [diff] [blame] | 33 | namespace { |
| 34 | |
| 35 | class ShadowCallStack : public MachineFunctionPass { |
| 36 | public: |
| 37 | static char ID; |
| 38 | |
| 39 | ShadowCallStack() : MachineFunctionPass(ID) { |
| 40 | initializeShadowCallStackPass(*PassRegistry::getPassRegistry()); |
| 41 | } |
| 42 | |
| 43 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 44 | MachineFunctionPass::getAnalysisUsage(AU); |
| 45 | } |
| 46 | |
| 47 | bool runOnMachineFunction(MachineFunction &Fn) override; |
| 48 | |
| 49 | private: |
| 50 | // Do not instrument leaf functions with this many or fewer instructions. The |
| 51 | // shadow call stack instrumented prolog/epilog are slightly race-y reading |
| 52 | // and checking the saved return address, so it is better to not instrument |
| 53 | // functions that have fewer instructions than the instrumented prolog/epilog |
| 54 | // race. |
| 55 | static const size_t SkipLeafInstructions = 3; |
| 56 | }; |
| 57 | |
| 58 | char ShadowCallStack::ID = 0; |
| 59 | } // end anonymous namespace. |
| 60 | |
| 61 | static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII, |
| 62 | MachineBasicBlock &MBB, const DebugLoc &DL); |
| 63 | static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII, |
| 64 | MachineBasicBlock &MBB, const DebugLoc &DL, |
| 65 | MCPhysReg FreeRegister); |
| 66 | |
| 67 | static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB, |
| 68 | MachineInstr &MI, MachineBasicBlock &TrapBB); |
| 69 | static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB, |
| 70 | MachineInstr &MI, MachineBasicBlock &TrapBB, |
| 71 | MCPhysReg FreeRegister); |
| 72 | // Generate a longer epilog that only uses r10 when a tailcall branches to r11. |
| 73 | static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB, |
| 74 | MachineInstr &MI, MachineBasicBlock &TrapBB); |
| 75 | |
| 76 | // Helper function to add ModR/M references for [Seg: Reg + Offset] memory |
| 77 | // accesses |
| 78 | static inline const MachineInstrBuilder & |
| 79 | addSegmentedMem(const MachineInstrBuilder &MIB, MCPhysReg Seg, MCPhysReg Reg, |
| 80 | int Offset = 0) { |
| 81 | return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg); |
| 82 | } |
| 83 | |
| 84 | static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII, |
| 85 | MachineBasicBlock &MBB, const DebugLoc &DL) { |
| 86 | const MCPhysReg ReturnReg = X86::R10; |
| 87 | const MCPhysReg OffsetReg = X86::R11; |
| 88 | |
| 89 | auto MBBI = MBB.begin(); |
| 90 | // mov r10, [rsp] |
| 91 | addDirectMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(ReturnReg), |
| 92 | X86::RSP); |
| 93 | // xor r11, r11 |
| 94 | BuildMI(MBB, MBBI, DL, TII->get(X86::XOR64rr)) |
| 95 | .addDef(OffsetReg) |
| 96 | .addReg(OffsetReg, RegState::Undef) |
| 97 | .addReg(OffsetReg, RegState::Undef); |
| 98 | // add QWORD [gs:r11], 8 |
| 99 | addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64mi8)), X86::GS, |
| 100 | OffsetReg) |
| 101 | .addImm(8); |
| 102 | // mov r11, [gs:r11] |
| 103 | addSegmentedMem( |
| 104 | BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS, |
| 105 | OffsetReg); |
| 106 | // mov [gs:r11], r10 |
| 107 | addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64mr)), X86::GS, |
| 108 | OffsetReg) |
| 109 | .addReg(ReturnReg); |
| 110 | } |
| 111 | |
| 112 | static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII, |
| 113 | MachineBasicBlock &MBB, const DebugLoc &DL, |
| 114 | MCPhysReg FreeRegister) { |
| 115 | // mov REG, [rsp] |
| 116 | addDirectMem(BuildMI(MBB, MBB.begin(), DL, TII->get(X86::MOV64rm)) |
| 117 | .addDef(FreeRegister), |
| 118 | X86::RSP); |
| 119 | } |
| 120 | |
| 121 | static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB, |
| 122 | MachineInstr &MI, MachineBasicBlock &TrapBB) { |
| 123 | const DebugLoc &DL = MI.getDebugLoc(); |
| 124 | |
| 125 | // xor r11, r11 |
| 126 | BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr)) |
| 127 | .addDef(X86::R11) |
| 128 | .addReg(X86::R11, RegState::Undef) |
| 129 | .addReg(X86::R11, RegState::Undef); |
| 130 | // mov r10, [gs:r11] |
| 131 | addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), |
| 132 | X86::GS, X86::R11); |
| 133 | // mov r10, [gs:r10] |
| 134 | addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), |
| 135 | X86::GS, X86::R10); |
| 136 | // sub QWORD [gs:r11], 8 |
| 137 | // This instruction should not be moved up to avoid a signal race. |
| 138 | addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), |
| 139 | X86::GS, X86::R11) |
| 140 | .addImm(8); |
| 141 | // cmp [rsp], r10 |
| 142 | addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) |
| 143 | .addReg(X86::R10); |
| 144 | // jne trap |
| 145 | BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB); |
| 146 | MBB.addSuccessor(&TrapBB); |
| 147 | } |
| 148 | |
| 149 | static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB, |
| 150 | MachineInstr &MI, MachineBasicBlock &TrapBB, |
| 151 | MCPhysReg FreeRegister) { |
| 152 | const DebugLoc &DL = MI.getDebugLoc(); |
| 153 | |
| 154 | // cmp [rsp], REG |
| 155 | addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) |
| 156 | .addReg(FreeRegister); |
| 157 | // jne trap |
| 158 | BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB); |
| 159 | MBB.addSuccessor(&TrapBB); |
| 160 | } |
| 161 | |
| 162 | static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB, |
| 163 | MachineInstr &MI, MachineBasicBlock &TrapBB) { |
| 164 | const DebugLoc &DL = MI.getDebugLoc(); |
| 165 | |
| 166 | // xor r10, r10 |
| 167 | BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr)) |
| 168 | .addDef(X86::R10) |
| 169 | .addReg(X86::R10, RegState::Undef) |
| 170 | .addReg(X86::R10, RegState::Undef); |
| 171 | // mov r10, [gs:r10] |
| 172 | addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), |
| 173 | X86::GS, X86::R10); |
| 174 | // mov r10, [gs:r10] |
| 175 | addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), |
| 176 | X86::GS, X86::R10); |
| 177 | // sub QWORD [gs:0], 8 |
| 178 | // This instruction should not be moved up to avoid a signal race. |
| 179 | addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0) |
| 180 | .addImm(8); |
| 181 | // cmp [rsp], r10 |
| 182 | addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) |
| 183 | .addReg(X86::R10); |
| 184 | // jne trap |
| 185 | BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB); |
| 186 | MBB.addSuccessor(&TrapBB); |
| 187 | } |
| 188 | |
| 189 | bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) { |
| 190 | if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) || |
| 191 | Fn.getFunction().hasFnAttribute(Attribute::Naked)) |
| 192 | return false; |
| 193 | |
| 194 | if (Fn.empty() || !Fn.getRegInfo().tracksLiveness()) |
| 195 | return false; |
| 196 | |
| 197 | // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live |
| 198 | // on entry for parameters with the nest attribute.) |
| 199 | if (Fn.front().isLiveIn(X86::R10) || Fn.front().isLiveIn(X86::R11)) |
| 200 | return false; |
| 201 | |
| 202 | // FIXME: Skip functions with conditional and r10 tail calls for now. |
| 203 | bool HasReturn = false; |
| 204 | for (auto &MBB : Fn) { |
| 205 | if (MBB.empty()) |
| 206 | continue; |
| 207 | |
| 208 | const MachineInstr &MI = MBB.instr_back(); |
| 209 | if (MI.isReturn()) |
| 210 | HasReturn = true; |
| 211 | |
| 212 | if (MI.isReturn() && MI.isCall()) { |
| 213 | if (MI.findRegisterUseOperand(X86::EFLAGS)) |
| 214 | return false; |
| 215 | // This should only be possible on Windows 64 (see GR64_TC versus |
| 216 | // GR64_TCW64.) |
| 217 | if (MI.findRegisterUseOperand(X86::R10) || |
| 218 | MI.hasRegisterImplicitUseOperand(X86::R10)) |
| 219 | return false; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | if (!HasReturn) |
| 224 | return false; |
| 225 | |
| 226 | // For leaf functions: |
| 227 | // 1. Do not instrument very short functions where it would not improve that |
| 228 | // function's security. |
| 229 | // 2. Detect if there is an unused caller-saved register we can reserve to |
| 230 | // hold the return address instead of writing/reading it from the shadow |
| 231 | // call stack. |
| 232 | MCPhysReg LeafFuncRegister = X86::NoRegister; |
| 233 | if (!Fn.getFrameInfo().adjustsStack()) { |
| 234 | size_t InstructionCount = 0; |
| 235 | std::bitset<X86::NUM_TARGET_REGS> UsedRegs; |
| 236 | for (auto &MBB : Fn) { |
| 237 | for (auto &LiveIn : MBB.liveins()) |
| 238 | UsedRegs.set(LiveIn.PhysReg); |
| 239 | for (auto &MI : MBB) { |
Vlad Tsyrklevich | 0cdc6ec | 2018-04-10 01:31:01 +0000 | [diff] [blame] | 240 | if (!MI.isDebugValue() && !MI.isCFIInstruction() && !MI.isLabel()) |
| 241 | InstructionCount++; |
Vlad Tsyrklevich | e344601 | 2018-04-04 01:21:16 +0000 | [diff] [blame] | 242 | for (auto &Op : MI.operands()) |
| 243 | if (Op.isReg() && Op.isDef()) |
| 244 | UsedRegs.set(Op.getReg()); |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | if (InstructionCount <= SkipLeafInstructions) |
| 249 | return false; |
| 250 | |
| 251 | std::bitset<X86::NUM_TARGET_REGS> CalleeSavedRegs; |
| 252 | const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs(); |
| 253 | for (size_t i = 0; CSRegs[i]; i++) |
| 254 | CalleeSavedRegs.set(CSRegs[i]); |
| 255 | |
| 256 | const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); |
| 257 | for (auto &Reg : X86::GR64_NOSPRegClass.getRegisters()) { |
| 258 | // FIXME: Optimization opportunity: spill/restore a callee-saved register |
| 259 | // if a caller-saved register is unavailable. |
| 260 | if (CalleeSavedRegs.test(Reg)) |
| 261 | continue; |
| 262 | |
| 263 | bool Used = false; |
| 264 | for (MCSubRegIterator SR(Reg, TRI, true); SR.isValid(); ++SR) |
| 265 | if ((Used = UsedRegs.test(*SR))) |
| 266 | break; |
| 267 | |
| 268 | if (!Used) { |
| 269 | LeafFuncRegister = Reg; |
| 270 | break; |
| 271 | } |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | const bool LeafFuncOptimization = LeafFuncRegister != X86::NoRegister; |
| 276 | if (LeafFuncOptimization) |
| 277 | // Mark the leaf function register live-in for all MBBs except the entry MBB |
| 278 | for (auto I = ++Fn.begin(), E = Fn.end(); I != E; ++I) |
| 279 | I->addLiveIn(LeafFuncRegister); |
| 280 | |
| 281 | MachineBasicBlock &MBB = Fn.front(); |
| 282 | const MachineBasicBlock *NonEmpty = MBB.empty() ? MBB.getFallThrough() : &MBB; |
| 283 | const DebugLoc &DL = NonEmpty->front().getDebugLoc(); |
| 284 | |
| 285 | const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); |
| 286 | if (LeafFuncOptimization) |
| 287 | addPrologLeaf(Fn, TII, MBB, DL, LeafFuncRegister); |
| 288 | else |
| 289 | addProlog(Fn, TII, MBB, DL); |
| 290 | |
| 291 | MachineBasicBlock *Trap = nullptr; |
| 292 | for (auto &MBB : Fn) { |
| 293 | if (MBB.empty()) |
| 294 | continue; |
| 295 | |
| 296 | MachineInstr &MI = MBB.instr_back(); |
| 297 | if (MI.isReturn()) { |
| 298 | if (!Trap) { |
| 299 | Trap = Fn.CreateMachineBasicBlock(); |
| 300 | BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP)); |
| 301 | Fn.push_back(Trap); |
| 302 | } |
| 303 | |
| 304 | if (LeafFuncOptimization) |
| 305 | addEpilogLeaf(TII, MBB, MI, *Trap, LeafFuncRegister); |
| 306 | else if (MI.findRegisterUseOperand(X86::R11)) |
| 307 | addEpilogOnlyR10(TII, MBB, MI, *Trap); |
| 308 | else |
| 309 | addEpilog(TII, MBB, MI, *Trap); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | return true; |
| 314 | } |
| 315 | |
| 316 | INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack", |
| 317 | false, false) |
| 318 | |
| 319 | FunctionPass *llvm::createShadowCallStackPass() { |
| 320 | return new ShadowCallStack(); |
| 321 | } |