Blame - llvm/lib/Target/X86/X86RetpolineThunks.cpp - toolchain/llvm-project

blob: 6b4bc8a4e1b34f7bb9cf0ba705545d93636478dc [file] [log] [blame]

Chandler Carruth	c58f216	2018-01-22 22:05:25 +0000	[diff] [blame^]	1	//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	///
				11	/// Pass that injects an MI thunk implementing a "retpoline". This is
				12	/// a RET-implemented trampoline that is used to lower indirect calls in a way
				13	/// that prevents speculation on some x86 processors and can be used to mitigate
				14	/// security vulnerabilities due to targeted speculative execution and side
				15	/// channels such as CVE-2017-5715.
				16	///
				17	/// TODO(chandlerc): All of this code could use better comments and
				18	/// documentation.
				19	///
				20	//===----------------------------------------------------------------------===//
				21
				22	#include "X86.h"
				23	#include "X86InstrBuilder.h"
				24	#include "X86Subtarget.h"
				25	#include "llvm/CodeGen/MachineFunction.h"
				26	#include "llvm/CodeGen/MachineInstrBuilder.h"
				27	#include "llvm/CodeGen/MachineModuleInfo.h"
				28	#include "llvm/CodeGen/Passes.h"
				29	#include "llvm/CodeGen/TargetPassConfig.h"
				30	#include "llvm/IR/IRBuilder.h"
				31	#include "llvm/IR/Instructions.h"
				32	#include "llvm/IR/Module.h"
				33	#include "llvm/Support/CommandLine.h"
				34	#include "llvm/Support/Debug.h"
				35	#include "llvm/Support/raw_ostream.h"
				36
				37	using namespace llvm;
				38
				39	#define DEBUG_TYPE "x86-retpoline-thunks"
				40
				41	namespace {
				42	class X86RetpolineThunks : public ModulePass {
				43	public:
				44	static char ID;
				45
				46	X86RetpolineThunks() : ModulePass(ID) {}
				47
				48	StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
				49
				50	bool runOnModule(Module &M) override;
				51
				52	void getAnalysisUsage(AnalysisUsage &AU) const override {
				53	AU.addRequired<MachineModuleInfo>();
				54	AU.addPreserved<MachineModuleInfo>();
				55	}
				56
				57	private:
				58	MachineModuleInfo *MMI;
				59	const TargetMachine *TM;
				60	bool Is64Bit;
				61	const X86Subtarget *STI;
				62	const X86InstrInfo *TII;
				63
				64	Function *createThunkFunction(Module &M, StringRef Name);
				65	void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
				66	void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
				67	void createThunk(Module &M, StringRef NameSuffix,
				68	Optional<unsigned> Reg = None);
				69	};
				70
				71	} // end anonymous namespace
				72
				73	ModulePass *llvm::createX86RetpolineThunksPass() {
				74	return new X86RetpolineThunks();
				75	}
				76
				77	char X86RetpolineThunks::ID = 0;
				78
				79	bool X86RetpolineThunks::runOnModule(Module &M) {
				80	DEBUG(dbgs() << getPassName() << '\n');
				81
				82	auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
				83	assert(TPC && "X86-specific target pass should not be run without a target "
				84	"pass config!");
				85
				86	MMI = &getAnalysis<MachineModuleInfo>();
				87	TM = &TPC->getTM<TargetMachine>();
				88	Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
				89
				90	// Only add a thunk if we have at least one function that has the retpoline
				91	// feature enabled in its subtarget.
				92	// FIXME: Conditionalize on indirect calls so we don't emit a thunk when
				93	// nothing will end up calling it.
				94	// FIXME: It's a little silly to look at every function just to enumerate
				95	// the subtargets, but eventually we'll want to look at them for indirect
				96	// calls, so maybe this is OK.
				97	if (!llvm::any_of(M, [&](const Function &F) {
				98	// Save the subtarget we find for use in emitting the subsequent
				99	// thunk.
				100	STI = &TM->getSubtarget<X86Subtarget>(F);
				101	return STI->useRetpoline() && !STI->useRetpolineExternalThunk();
				102	}))
				103	return false;
				104
				105	// If we have a relevant subtarget, get the instr info as well.
				106	TII = STI->getInstrInfo();
				107
				108	if (Is64Bit) {
				109	// __llvm_retpoline_r11:
				110	// callq .Lr11_call_target
				111	// .Lr11_capture_spec:
				112	// pause
				113	// lfence
				114	// jmp .Lr11_capture_spec
				115	// .align 16
				116	// .Lr11_call_target:
				117	// movq %r11, (%rsp)
				118	// retq
				119
				120	createThunk(M, "r11", X86::R11);
				121	} else {
				122	// For 32-bit targets we need to emit a collection of thunks for various
				123	// possible scratch registers as well as a fallback that is used when
				124	// there are no scratch registers and assumes the retpoline target has
				125	// been pushed.
				126	// __llvm_retpoline_eax:
				127	// calll .Leax_call_target
				128	// .Leax_capture_spec:
				129	// pause
				130	// jmp .Leax_capture_spec
				131	// .align 16
				132	// .Leax_call_target:
				133	// movl %eax, (%esp) # Clobber return addr
				134	// retl
				135	//
				136	// __llvm_retpoline_ecx:
				137	// ... # Same setup
				138	// movl %ecx, (%esp)
				139	// retl
				140	//
				141	// __llvm_retpoline_edx:
				142	// ... # Same setup
				143	// movl %edx, (%esp)
				144	// retl
				145	//
				146	// This last one is a bit more special and so needs a little extra
				147	// handling.
				148	// __llvm_retpoline_push:
				149	// calll .Lpush_call_target
				150	// .Lpush_capture_spec:
				151	// pause
				152	// lfence
				153	// jmp .Lpush_capture_spec
				154	// .align 16
				155	// .Lpush_call_target:
				156	// # Clear pause_loop return address.
				157	// addl $4, %esp
				158	// # Top of stack words are: Callee, RA. Exchange Callee and RA.
				159	// pushl 4(%esp) # Push callee
				160	// pushl 4(%esp) # Push RA
				161	// popl 8(%esp) # Pop RA to final RA
				162	// popl (%esp) # Pop callee to next top of stack
				163	// retl # Ret to callee
				164	createThunk(M, "eax", X86::EAX);
				165	createThunk(M, "ecx", X86::ECX);
				166	createThunk(M, "edx", X86::EDX);
				167	createThunk(M, "push");
				168	}
				169
				170	return true;
				171	}
				172
				173	Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
				174	LLVMContext &Ctx = M.getContext();
				175	auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
				176	Function *F =
				177	Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
				178	F->setVisibility(GlobalValue::HiddenVisibility);
				179	F->setComdat(M.getOrInsertComdat(Name));
				180
				181	// Add Attributes so that we don't create a frame, unwind information, or
				182	// inline.
				183	AttrBuilder B;
				184	B.addAttribute(llvm::Attribute::NoUnwind);
				185	B.addAttribute(llvm::Attribute::Naked);
				186	F->addAttributes(llvm::AttributeList::FunctionIndex, B);
				187
				188	// Populate our function a bit so that we can verify.
				189	BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
				190	IRBuilder<> Builder(Entry);
				191
				192	Builder.CreateRetVoid();
				193	return F;
				194	}
				195
				196	void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
				197	unsigned Reg) {
				198	const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
				199	const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
				200	addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
				201	.addReg(Reg);
				202	}
				203	void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
				204	MachineBasicBlock &MBB) {
				205	// The instruction sequence we use to replace the return address without
				206	// a scratch register is somewhat complicated:
				207	// # Clear capture_spec from return address.
				208	// addl $4, %esp
				209	// # Top of stack words are: Callee, RA. Exchange Callee and RA.
				210	// pushl 4(%esp) # Push callee
				211	// pushl 4(%esp) # Push RA
				212	// popl 8(%esp) # Pop RA to final RA
				213	// popl (%esp) # Pop callee to next top of stack
				214	// retl # Ret to callee
				215	BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP)
				216	.addReg(X86::ESP)
				217	.addImm(4);
				218	addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
				219	false, 4);
				220	addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
				221	false, 4);
				222	addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
				223	false, 8);
				224	addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
				225	false, 0);
				226	}
				227
				228	void X86RetpolineThunks::createThunk(Module &M, StringRef NameSuffix,
				229	Optional<unsigned> Reg) {
				230	Function &F =
				231	*createThunkFunction(M, (Twine("__llvm_retpoline_") + NameSuffix).str());
				232	MachineFunction &MF = MMI->getOrCreateMachineFunction(F);
				233
				234	// Set MF properties. We never use vregs...
				235	MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
				236
				237	BasicBlock &OrigEntryBB = F.getEntryBlock();
				238	MachineBasicBlock *Entry = MF.CreateMachineBasicBlock(&OrigEntryBB);
				239	MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(&OrigEntryBB);
				240	MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(&OrigEntryBB);
				241
				242	MF.push_back(Entry);
				243	MF.push_back(CaptureSpec);
				244	MF.push_back(CallTarget);
				245
				246	const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
				247	const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
				248
				249	BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget);
				250	Entry->addSuccessor(CallTarget);
				251	Entry->addSuccessor(CaptureSpec);
				252	CallTarget->setHasAddressTaken();
				253
				254	// In the capture loop for speculation, we want to stop the processor from
				255	// speculating as fast as possible. On Intel processors, the PAUSE instruction
				256	// will block speculation without consuming any execution resources. On AMD
				257	// processors, the PAUSE instruction is (essentially) a nop, so we also use an
				258	// LFENCE instruction which they have advised will stop speculation as well
				259	// with minimal resource utilization. We still end the capture with a jump to
				260	// form an infinite loop to fully guarantee that no matter what implementation
				261	// of the x86 ISA, speculating this code path never escapes.
				262	BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
				263	BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
				264	BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
				265	CaptureSpec->setHasAddressTaken();
				266	CaptureSpec->addSuccessor(CaptureSpec);
				267
				268	CallTarget->setAlignment(4);
				269	if (Reg) {
				270	insertRegReturnAddrClobber(CallTarget, Reg);
				271	} else {
				272	assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
				273	insert32BitPushReturnAddrClobber(*CallTarget);
				274	}
				275	BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
				276	}