Blame - llvm/lib/Target/X86/X86FrameLowering.cpp - toolchain/llvm-project

blob: 930163c36883e67abac9ff3aefc2ac645b55ad42 [file] [log] [blame]

Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame^]	1	//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file contains the X86 implementation of TargetFrameLowering class.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "X86FrameLowering.h"
				15	#include "X86InstrBuilder.h"
				16	#include "X86InstrInfo.h"
				17	#include "X86MachineFunctionInfo.h"
				18	#include "X86Subtarget.h"
				19	#include "X86TargetMachine.h"
				20	#include "llvm/ADT/SmallSet.h"
				21	#include "llvm/CodeGen/MachineFrameInfo.h"
				22	#include "llvm/CodeGen/MachineFunction.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/MachineModuleInfo.h"
				25	#include "llvm/CodeGen/MachineRegisterInfo.h"
				26	#include "llvm/IR/DataLayout.h"
				27	#include "llvm/IR/Function.h"
				28	#include "llvm/MC/MCAsmInfo.h"
				29	#include "llvm/MC/MCSymbol.h"
				30	#include "llvm/Support/CommandLine.h"
				31	#include "llvm/Target/TargetOptions.h"
				32	#include "llvm/Support/Debug.h"
				33	#include <cstdlib>
				34
				35	using namespace llvm;
				36
				37	// FIXME: completely move here.
				38	extern cl::opt<bool> ForceStackAlign;
				39
				40	bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
				41	return !MF.getFrameInfo()->hasVarSizedObjects();
				42	}
				43
				44	/// hasFP - Return true if the specified function should have a dedicated frame
				45	/// pointer register. This is true if the function has variable sized allocas
				46	/// or if frame pointer elimination is disabled.
				47	bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
				48	const MachineFrameInfo *MFI = MF.getFrameInfo();
				49	const MachineModuleInfo &MMI = MF.getMMI();
				50	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
				51
				52	return (MF.getTarget().Options.DisableFramePointerElim(MF) \|\|
				53	RegInfo->needsStackRealignment(MF) \|\|
				54	MFI->hasVarSizedObjects() \|\|
				55	MFI->isFrameAddressTaken() \|\| MFI->hasInlineAsmWithSPAdjust() \|\|
				56	MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() \|\|
				57	MMI.callsUnwindInit() \|\| MMI.callsEHReturn() \|\|
				58	MFI->hasStackMap() \|\| MFI->hasPatchPoint());
				59	}
				60
				61	static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
				62	if (IsLP64) {
				63	if (isInt<8>(Imm))
				64	return X86::SUB64ri8;
				65	return X86::SUB64ri32;
				66	} else {
				67	if (isInt<8>(Imm))
				68	return X86::SUB32ri8;
				69	return X86::SUB32ri;
				70	}
				71	}
				72
				73	static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
				74	if (IsLP64) {
				75	if (isInt<8>(Imm))
				76	return X86::ADD64ri8;
				77	return X86::ADD64ri32;
				78	} else {
				79	if (isInt<8>(Imm))
				80	return X86::ADD32ri8;
				81	return X86::ADD32ri;
				82	}
				83	}
				84
				85	static unsigned getSUBrrOpcode(unsigned isLP64) {
				86	return isLP64 ? X86::SUB64rr : X86::SUB32rr;
				87	}
				88
				89	static unsigned getADDrrOpcode(unsigned isLP64) {
				90	return isLP64 ? X86::ADD64rr : X86::ADD32rr;
				91	}
				92
				93	static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
				94	if (IsLP64) {
				95	if (isInt<8>(Imm))
				96	return X86::AND64ri8;
				97	return X86::AND64ri32;
				98	}
				99	if (isInt<8>(Imm))
				100	return X86::AND32ri8;
				101	return X86::AND32ri;
				102	}
				103
				104	static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) {
				105	// We don't support LP64 for now.
				106	assert(!IsLP64);
				107
				108	if (MO.isImm() && isInt<8>(MO.getImm()))
				109	return X86::PUSH32i8;
				110
				111	return X86::PUSHi32;;
				112	}
				113
				114	static unsigned getLEArOpcode(unsigned IsLP64) {
				115	return IsLP64 ? X86::LEA64r : X86::LEA32r;
				116	}
				117
				118	/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
				119	/// when it reaches the "return" instruction. We can then pop a stack object
				120	/// to this register without worry about clobbering it.
				121	static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
				122	MachineBasicBlock::iterator &MBBI,
				123	const TargetRegisterInfo &TRI,
				124	bool Is64Bit) {
				125	const MachineFunction *MF = MBB.getParent();
				126	const Function *F = MF->getFunction();
				127	if (!F \|\| MF->getMMI().callsEHReturn())
				128	return 0;
				129
				130	static const uint16_t CallerSavedRegs32Bit[] = {
				131	X86::EAX, X86::EDX, X86::ECX, 0
				132	};
				133
				134	static const uint16_t CallerSavedRegs64Bit[] = {
				135	X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
				136	X86::R8, X86::R9, X86::R10, X86::R11, 0
				137	};
				138
				139	unsigned Opc = MBBI->getOpcode();
				140	switch (Opc) {
				141	default: return 0;
				142	case X86::RETL:
				143	case X86::RETQ:
				144	case X86::RETIL:
				145	case X86::RETIQ:
				146	case X86::TCRETURNdi:
				147	case X86::TCRETURNri:
				148	case X86::TCRETURNmi:
				149	case X86::TCRETURNdi64:
				150	case X86::TCRETURNri64:
				151	case X86::TCRETURNmi64:
				152	case X86::EH_RETURN:
				153	case X86::EH_RETURN64: {
				154	SmallSet<uint16_t, 8> Uses;
				155	for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
				156	MachineOperand &MO = MBBI->getOperand(i);
				157	if (!MO.isReg() \|\| MO.isDef())
				158	continue;
				159	unsigned Reg = MO.getReg();
				160	if (!Reg)
				161	continue;
				162	for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
				163	Uses.insert(*AI);
				164	}
				165
				166	const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
				167	for (; *CS; ++CS)
				168	if (!Uses.count(*CS))
				169	return *CS;
				170	}
				171	}
				172
				173	return 0;
				174	}
				175
				176	static bool isEAXLiveIn(MachineFunction &MF) {
				177	for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
				178	EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
				179	unsigned Reg = II->first;
				180
				181	if (Reg == X86::RAX \|\| Reg == X86::EAX \|\| Reg == X86::AX \|\|
				182	Reg == X86::AH \|\| Reg == X86::AL)
				183	return true;
				184	}
				185
				186	return false;
				187	}
				188
				189	/// emitSPUpdate - Emit a series of instructions to increment / decrement the
				190	/// stack pointer by a constant value.
				191	static
				192	void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
				193	unsigned StackPtr, int64_t NumBytes,
				194	bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
				195	const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
				196	bool isSub = NumBytes < 0;
				197	uint64_t Offset = isSub ? -NumBytes : NumBytes;
				198	unsigned Opc;
				199	if (UseLEA)
				200	Opc = getLEArOpcode(Is64BitStackPtr);
				201	else
				202	Opc = isSub
				203	? getSUBriOpcode(Is64BitStackPtr, Offset)
				204	: getADDriOpcode(Is64BitStackPtr, Offset);
				205
				206	uint64_t Chunk = (1LL << 31) - 1;
				207	DebugLoc DL = MBB.findDebugLoc(MBBI);
				208
				209	while (Offset) {
				210	if (Offset > Chunk) {
				211	// Rather than emit a long series of instructions for large offsets,
				212	// load the offset into a register and do one sub/add
				213	unsigned Reg = 0;
				214
				215	if (isSub && !isEAXLiveIn(*MBB.getParent()))
				216	Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
				217	else
				218	Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
				219
				220	if (Reg) {
				221	Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
				222	BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
				223	.addImm(Offset);
				224	Opc = isSub
				225	? getSUBrrOpcode(Is64BitTarget)
				226	: getADDrrOpcode(Is64BitTarget);
				227	MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
				228	.addReg(StackPtr)
				229	.addReg(Reg);
				230	MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
				231	Offset = 0;
				232	continue;
				233	}
				234	}
				235
				236	uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
				237	if (ThisVal == (Is64BitTarget ? 8 : 4)) {
				238	// Use push / pop instead.
				239	unsigned Reg = isSub
				240	? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
				241	: findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
				242	if (Reg) {
				243	Opc = isSub
				244	? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
				245	: (Is64BitTarget ? X86::POP64r : X86::POP32r);
				246	MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
				247	.addReg(Reg, getDefRegState(!isSub) \| getUndefRegState(isSub));
				248	if (isSub)
				249	MI->setFlag(MachineInstr::FrameSetup);
				250	Offset -= ThisVal;
				251	continue;
				252	}
				253	}
				254
				255	MachineInstr *MI = nullptr;
				256
				257	if (UseLEA) {
				258	MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
				259	StackPtr, false, isSub ? -ThisVal : ThisVal);
				260	} else {
				261	MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
				262	.addReg(StackPtr)
				263	.addImm(ThisVal);
				264	MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
				265	}
				266
				267	if (isSub)
				268	MI->setFlag(MachineInstr::FrameSetup);
				269
				270	Offset -= ThisVal;
				271	}
				272	}
				273
				274	/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
				275	static
				276	void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
				277	unsigned StackPtr, uint64_t *NumBytes = nullptr) {
				278	if (MBBI == MBB.begin()) return;
				279
				280	MachineBasicBlock::iterator PI = std::prev(MBBI);
				281	unsigned Opc = PI->getOpcode();
				282	if ((Opc == X86::ADD64ri32 \|\| Opc == X86::ADD64ri8 \|\|
				283	Opc == X86::ADD32ri \|\| Opc == X86::ADD32ri8 \|\|
				284	Opc == X86::LEA32r \|\| Opc == X86::LEA64_32r) &&
				285	PI->getOperand(0).getReg() == StackPtr) {
				286	if (NumBytes)
				287	*NumBytes += PI->getOperand(2).getImm();
				288	MBB.erase(PI);
				289	} else if ((Opc == X86::SUB64ri32 \|\| Opc == X86::SUB64ri8 \|\|
				290	Opc == X86::SUB32ri \|\| Opc == X86::SUB32ri8) &&
				291	PI->getOperand(0).getReg() == StackPtr) {
				292	if (NumBytes)
				293	*NumBytes -= PI->getOperand(2).getImm();
				294	MBB.erase(PI);
				295	}
				296	}
				297
				298	/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
				299	/// iterator.
				300	static
				301	void mergeSPUpdatesDown(MachineBasicBlock &MBB,
				302	MachineBasicBlock::iterator &MBBI,
				303	unsigned StackPtr, uint64_t *NumBytes = nullptr) {
				304	// FIXME: THIS ISN'T RUN!!!
				305	return;
				306
				307	if (MBBI == MBB.end()) return;
				308
				309	MachineBasicBlock::iterator NI = std::next(MBBI);
				310	if (NI == MBB.end()) return;
				311
				312	unsigned Opc = NI->getOpcode();
				313	if ((Opc == X86::ADD64ri32 \|\| Opc == X86::ADD64ri8 \|\|
				314	Opc == X86::ADD32ri \|\| Opc == X86::ADD32ri8) &&
				315	NI->getOperand(0).getReg() == StackPtr) {
				316	if (NumBytes)
				317	*NumBytes -= NI->getOperand(2).getImm();
				318	MBB.erase(NI);
				319	MBBI = NI;
				320	} else if ((Opc == X86::SUB64ri32 \|\| Opc == X86::SUB64ri8 \|\|
				321	Opc == X86::SUB32ri \|\| Opc == X86::SUB32ri8) &&
				322	NI->getOperand(0).getReg() == StackPtr) {
				323	if (NumBytes)
				324	*NumBytes += NI->getOperand(2).getImm();
				325	MBB.erase(NI);
				326	MBBI = NI;
				327	}
				328	}
				329
				330	/// mergeSPUpdates - Checks the instruction before/after the passed
				331	/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
				332	/// the stack adjustment is returned as a positive value for ADD/LEA and a
				333	/// negative for SUB.
				334	static int mergeSPUpdates(MachineBasicBlock &MBB,
				335	MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
				336	bool doMergeWithPrevious) {
				337	if ((doMergeWithPrevious && MBBI == MBB.begin()) \|\|
				338	(!doMergeWithPrevious && MBBI == MBB.end()))
				339	return 0;
				340
				341	MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
				342	MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
				343	: std::next(MBBI);
				344	unsigned Opc = PI->getOpcode();
				345	int Offset = 0;
				346
				347	if ((Opc == X86::ADD64ri32 \|\| Opc == X86::ADD64ri8 \|\|
				348	Opc == X86::ADD32ri \|\| Opc == X86::ADD32ri8 \|\|
				349	Opc == X86::LEA32r \|\| Opc == X86::LEA64_32r) &&
				350	PI->getOperand(0).getReg() == StackPtr){
				351	Offset += PI->getOperand(2).getImm();
				352	MBB.erase(PI);
				353	if (!doMergeWithPrevious) MBBI = NI;
				354	} else if ((Opc == X86::SUB64ri32 \|\| Opc == X86::SUB64ri8 \|\|
				355	Opc == X86::SUB32ri \|\| Opc == X86::SUB32ri8) &&
				356	PI->getOperand(0).getReg() == StackPtr) {
				357	Offset -= PI->getOperand(2).getImm();
				358	MBB.erase(PI);
				359	if (!doMergeWithPrevious) MBBI = NI;
				360	}
				361
				362	return Offset;
				363	}
				364
				365	void
				366	X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
				367	MachineBasicBlock::iterator MBBI,
				368	DebugLoc DL) const {
				369	MachineFunction &MF = *MBB.getParent();
				370	MachineFrameInfo *MFI = MF.getFrameInfo();
				371	MachineModuleInfo &MMI = MF.getMMI();
				372	const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
				373	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				374
				375	// Add callee saved registers to move list.
				376	const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
				377	if (CSI.empty()) return;
				378
				379	// Calculate offsets.
				380	for (std::vector<CalleeSavedInfo>::const_iterator
				381	I = CSI.begin(), E = CSI.end(); I != E; ++I) {
				382	int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
				383	unsigned Reg = I->getReg();
				384
				385	unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
				386	unsigned CFIIndex =
				387	MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
				388	Offset));
				389	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
				390	.addCFIIndex(CFIIndex);
				391	}
				392	}
				393
				394	/// usesTheStack - This function checks if any of the users of EFLAGS
				395	/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
				396	/// to use the stack, and if we don't adjust the stack we clobber the first
				397	/// frame index.
				398	/// See X86InstrInfo::copyPhysReg.
				399	static bool usesTheStack(const MachineFunction &MF) {
				400	const MachineRegisterInfo &MRI = MF.getRegInfo();
				401
				402	for (MachineRegisterInfo::reg_instr_iterator
				403	ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
				404	ri != re; ++ri)
				405	if (ri->isCopy())
				406	return true;
				407
				408	return false;
				409	}
				410
				411	void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
				412	MachineBasicBlock &MBB,
				413	MachineBasicBlock::iterator MBBI,
				414	DebugLoc DL) {
				415	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				416	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				417	bool Is64Bit = STI.is64Bit();
				418	bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
				419
				420	unsigned CallOp;
				421	if (Is64Bit)
				422	CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
				423	else
				424	CallOp = X86::CALLpcrel32;
				425
				426	const char *Symbol;
				427	if (Is64Bit) {
				428	if (STI.isTargetCygMing()) {
				429	Symbol = "___chkstk_ms";
				430	} else {
				431	Symbol = "__chkstk";
				432	}
				433	} else if (STI.isTargetCygMing())
				434	Symbol = "_alloca";
				435	else
				436	Symbol = "_chkstk";
				437
				438	MachineInstrBuilder CI;
				439
				440	// All current stack probes take AX and SP as input, clobber flags, and
				441	// preserve all registers. x86_64 probes leave RSP unmodified.
				442	if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
				443	// For the large code model, we have to call through a register. Use R11,
				444	// as it is scratch in all supported calling conventions.
				445	BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
				446	.addExternalSymbol(Symbol);
				447	CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
				448	} else {
				449	CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
				450	}
				451
				452	unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
				453	unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
				454	CI.addReg(AX, RegState::Implicit)
				455	.addReg(SP, RegState::Implicit)
				456	.addReg(AX, RegState::Define \| RegState::Implicit)
				457	.addReg(SP, RegState::Define \| RegState::Implicit)
				458	.addReg(X86::EFLAGS, RegState::Define \| RegState::Implicit);
				459
				460	if (Is64Bit) {
				461	// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
				462	// themselves. It also does not clobber %rax so we can reuse it when
				463	// adjusting %rsp.
				464	BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
				465	.addReg(X86::RSP)
				466	.addReg(X86::RAX);
				467	}
				468	}
				469
				470	/// emitPrologue - Push callee-saved registers onto the stack, which
				471	/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
				472	/// space for local variables. Also emit labels used by the exception handler to
				473	/// generate the exception handling frames.
				474
				475	/*
				476	Here's a gist of what gets emitted:
				477
				478	; Establish frame pointer, if needed
				479	[if needs FP]
				480	push %rbp
				481	.cfi_def_cfa_offset 16
				482	.cfi_offset %rbp, -16
				483	.seh_pushreg %rpb
				484	mov %rsp, %rbp
				485	.cfi_def_cfa_register %rbp
				486
				487	; Spill general-purpose registers
				488	[for all callee-saved GPRs]
				489	pushq %<reg>
				490	[if not needs FP]
				491	.cfi_def_cfa_offset (offset from RETADDR)
				492	.seh_pushreg %<reg>
				493
				494	; If the required stack alignment > default stack alignment
				495	; rsp needs to be re-aligned. This creates a "re-alignment gap"
				496	; of unknown size in the stack frame.
				497	[if stack needs re-alignment]
				498	and $MASK, %rsp
				499
				500	; Allocate space for locals
				501	[if target is Windows and allocated space > 4096 bytes]
				502	; Windows needs special care for allocations larger
				503	; than one page.
				504	mov $NNN, %rax
				505	call ___chkstk_ms/___chkstk
				506	sub %rax, %rsp
				507	[else]
				508	sub $NNN, %rsp
				509
				510	[if needs FP]
				511	.seh_stackalloc (size of XMM spill slots)
				512	.seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
				513	[else]
				514	.seh_stackalloc NNN
				515
				516	; Spill XMMs
				517	; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
				518	; they may get spilled on any platform, if the current function
				519	; calls @llvm.eh.unwind.init
				520	[if needs FP]
				521	[for all callee-saved XMM registers]
				522	movaps %<xmm reg>, -MMM(%rbp)
				523	[for all callee-saved XMM registers]
				524	.seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
				525	; i.e. the offset relative to (%rbp - SEHFrameOffset)
				526	[else]
				527	[for all callee-saved XMM registers]
				528	movaps %<xmm reg>, KKK(%rsp)
				529	[for all callee-saved XMM registers]
				530	.seh_savexmm %<xmm reg>, KKK
				531
				532	.seh_endprologue
				533
				534	[if needs base pointer]
				535	mov %rsp, %rbx
				536	[if needs to restore base pointer]
				537	mov %rsp, -MMM(%rbp)
				538
				539	; Emit CFI info
				540	[if needs FP]
				541	[for all callee-saved registers]
				542	.cfi_offset %<reg>, (offset from %rbp)
				543	[else]
				544	.cfi_def_cfa_offset (offset from RETADDR)
				545	[for all callee-saved registers]
				546	.cfi_offset %<reg>, (offset from %rsp)
				547
				548	Notes:
				549	- .seh directives are emitted only for Windows 64 ABI
				550	- .cfi directives are emitted for all other ABIs
				551	- for 32-bit code, substitute %e?? registers for %r??
				552	*/
				553
				554	void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
				555	MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
				556	MachineBasicBlock::iterator MBBI = MBB.begin();
				557	MachineFrameInfo *MFI = MF.getFrameInfo();
				558	const Function *Fn = MF.getFunction();
				559	const X86RegisterInfo *RegInfo =
				560	static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
				561	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				562	MachineModuleInfo &MMI = MF.getMMI();
				563	X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
				564	uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
				565	uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
				566	bool HasFP = hasFP(MF);
				567	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				568	bool Is64Bit = STI.is64Bit();
				569	// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
				570	const bool Uses64BitFramePtr = STI.isTarget64BitLP64() \|\| STI.isTargetNaCl64();
				571	bool IsWin64 = STI.isTargetWin64();
				572	// Not necessarily synonymous with IsWin64.
				573	bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
				574	bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
				575	bool NeedsDwarfCFI =
				576	!IsWinEH && (MMI.hasDebugInfo() \|\| Fn->needsUnwindTableEntry());
				577	bool UseLEA = STI.useLeaForSP();
				578	unsigned StackAlign = getStackAlignment();
				579	unsigned SlotSize = RegInfo->getSlotSize();
				580	unsigned FramePtr = RegInfo->getFrameRegister(MF);
				581	const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
				582	getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
				583	unsigned StackPtr = RegInfo->getStackRegister();
				584	unsigned BasePtr = RegInfo->getBaseRegister();
				585	DebugLoc DL;
				586
				587	// If we're forcing a stack realignment we can't rely on just the frame
				588	// info, we need to know the ABI stack alignment as well in case we
				589	// have a call out. Otherwise just make sure we have some alignment - we'll
				590	// go with the minimum SlotSize.
				591	if (ForceStackAlign) {
				592	if (MFI->hasCalls())
				593	MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
				594	else if (MaxAlign < SlotSize)
				595	MaxAlign = SlotSize;
				596	}
				597
				598	// Add RETADDR move area to callee saved frame size.
				599	int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
				600	if (TailCallReturnAddrDelta < 0)
				601	X86FI->setCalleeSavedFrameSize(
				602	X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
				603
				604	bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO());
				605
				606	// The default stack probe size is 4096 if the function has no stackprobesize
				607	// attribute.
				608	unsigned StackProbeSize = 4096;
				609	if (Fn->hasFnAttribute("stack-probe-size"))
				610	Fn->getFnAttribute("stack-probe-size")
				611	.getValueAsString()
				612	.getAsInteger(0, StackProbeSize);
				613
				614	// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
				615	// function, and use up to 128 bytes of stack space, don't have a frame
				616	// pointer, calls, or dynamic alloca then we do not need to adjust the
				617	// stack pointer (we fit in the Red Zone). We also check that we don't
				618	// push and pop from the stack.
				619	if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
				620	Attribute::NoRedZone) &&
				621	!RegInfo->needsStackRealignment(MF) &&
				622	!MFI->hasVarSizedObjects() && // No dynamic alloca.
				623	!MFI->adjustsStack() && // No calls.
				624	!IsWin64 && // Win64 has no Red Zone
				625	!usesTheStack(MF) && // Don't push and pop.
				626	!MF.shouldSplitStack()) { // Regular stack
				627	uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
				628	if (HasFP) MinSize += SlotSize;
				629	StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
				630	MFI->setStackSize(StackSize);
				631	}
				632
				633	// Insert stack pointer adjustment for later moving of return addr. Only
				634	// applies to tail call optimized functions where the callee argument stack
				635	// size is bigger than the callers.
				636	if (TailCallReturnAddrDelta < 0) {
				637	MachineInstr *MI =
				638	BuildMI(MBB, MBBI, DL,
				639	TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
				640	StackPtr)
				641	.addReg(StackPtr)
				642	.addImm(-TailCallReturnAddrDelta)
				643	.setMIFlag(MachineInstr::FrameSetup);
				644	MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
				645	}
				646
				647	// Mapping for machine moves:
				648	//
				649	// DST: VirtualFP AND
				650	// SRC: VirtualFP => DW_CFA_def_cfa_offset
				651	// ELSE => DW_CFA_def_cfa
				652	//
				653	// SRC: VirtualFP AND
				654	// DST: Register => DW_CFA_def_cfa_register
				655	//
				656	// ELSE
				657	// OFFSET < 0 => DW_CFA_offset_extended_sf
				658	// REG < 64 => DW_CFA_offset + Reg
				659	// ELSE => DW_CFA_offset_extended
				660
				661	uint64_t NumBytes = 0;
				662	int stackGrowth = -SlotSize;
				663
				664	if (HasFP) {
				665	// Calculate required stack adjustment.
				666	uint64_t FrameSize = StackSize - SlotSize;
				667	// If required, include space for extra hidden slot for stashing base pointer.
				668	if (X86FI->getRestoreBasePointer())
				669	FrameSize += SlotSize;
				670	if (RegInfo->needsStackRealignment(MF)) {
				671	// Callee-saved registers are pushed on stack before the stack
				672	// is realigned.
				673	FrameSize -= X86FI->getCalleeSavedFrameSize();
				674	NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
				675	} else {
				676	NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
				677	}
				678
				679	// Get the offset of the stack slot for the EBP register, which is
				680	// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
				681	// Update the frame offset adjustment.
				682	MFI->setOffsetAdjustment(-NumBytes);
				683
				684	// Save EBP/RBP into the appropriate stack slot.
				685	BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
				686	.addReg(MachineFramePtr, RegState::Kill)
				687	.setMIFlag(MachineInstr::FrameSetup);
				688
				689	if (NeedsDwarfCFI) {
				690	// Mark the place where EBP/RBP was saved.
				691	// Define the current CFA rule to use the provided offset.
				692	assert(StackSize);
				693	unsigned CFIIndex = MMI.addFrameInst(
				694	MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
				695	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
				696	.addCFIIndex(CFIIndex);
				697
				698	// Change the rule for the FramePtr to be an "offset" rule.
				699	unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
				700	CFIIndex = MMI.addFrameInst(
				701	MCCFIInstruction::createOffset(nullptr,
				702	DwarfFramePtr, 2 * stackGrowth));
				703	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
				704	.addCFIIndex(CFIIndex);
				705	}
				706
				707	if (NeedsWinEH) {
				708	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
				709	.addImm(FramePtr)
				710	.setMIFlag(MachineInstr::FrameSetup);
				711	}
				712
				713	// Update EBP with the new base value.
				714	BuildMI(MBB, MBBI, DL,
				715	TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
				716	.addReg(StackPtr)
				717	.setMIFlag(MachineInstr::FrameSetup);
				718
				719	if (NeedsDwarfCFI) {
				720	// Mark effective beginning of when frame pointer becomes valid.
				721	// Define the current CFA to use the EBP/RBP register.
				722	unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
				723	unsigned CFIIndex = MMI.addFrameInst(
				724	MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
				725	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
				726	.addCFIIndex(CFIIndex);
				727	}
				728
				729	// Mark the FramePtr as live-in in every block.
				730	for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
				731	I->addLiveIn(MachineFramePtr);
				732	} else {
				733	NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
				734	}
				735
				736	// Skip the callee-saved push instructions.
				737	bool PushedRegs = false;
				738	int StackOffset = 2 * stackGrowth;
				739
				740	while (MBBI != MBB.end() &&
				741	(MBBI->getOpcode() == X86::PUSH32r \|\|
				742	MBBI->getOpcode() == X86::PUSH64r)) {
				743	PushedRegs = true;
				744	unsigned Reg = MBBI->getOperand(0).getReg();
				745	++MBBI;
				746
				747	if (!HasFP && NeedsDwarfCFI) {
				748	// Mark callee-saved push instruction.
				749	// Define the current CFA rule to use the provided offset.
				750	assert(StackSize);
				751	unsigned CFIIndex = MMI.addFrameInst(
				752	MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
				753	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
				754	.addCFIIndex(CFIIndex);
				755	StackOffset += stackGrowth;
				756	}
				757
				758	if (NeedsWinEH) {
				759	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
				760	MachineInstr::FrameSetup);
				761	}
				762	}
				763
				764	// Realign stack after we pushed callee-saved registers (so that we'll be
				765	// able to calculate their offsets from the frame pointer).
				766	if (RegInfo->needsStackRealignment(MF)) {
				767	assert(HasFP && "There should be a frame pointer if stack is realigned.");
				768	uint64_t Val = -MaxAlign;
				769	MachineInstr *MI =
				770	BuildMI(MBB, MBBI, DL,
				771	TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
				772	.addReg(StackPtr)
				773	.addImm(Val)
				774	.setMIFlag(MachineInstr::FrameSetup);
				775
				776	// The EFLAGS implicit def is dead.
				777	MI->getOperand(3).setIsDead();
				778	}
				779
				780	// If there is an SUB32ri of ESP immediately before this instruction, merge
				781	// the two. This can be the case when tail call elimination is enabled and
				782	// the callee has more arguments then the caller.
				783	NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
				784
				785	// If there is an ADD32ri or SUB32ri of ESP immediately after this
				786	// instruction, merge the two instructions.
				787	mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
				788
				789	// Adjust stack pointer: ESP -= numbytes.
				790
				791	// Windows and cygwin/mingw require a prologue helper routine when allocating
				792	// more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
				793	// uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
				794	// stack and adjust the stack pointer in one go. The 64-bit version of
				795	// __chkstk is only responsible for probing the stack. The 64-bit prologue is
				796	// responsible for adjusting the stack pointer. Touching the stack at 4K
				797	// increments is necessary to ensure that the guard pages used by the OS
				798	// virtual memory manager are allocated in correct sequence.
				799	if (NumBytes >= StackProbeSize && UseStackProbe) {
				800	// Check whether EAX is livein for this function.
				801	bool isEAXAlive = isEAXLiveIn(MF);
				802
				803	if (isEAXAlive) {
				804	// Sanity check that EAX is not livein for this function.
				805	// It should not be, so throw an assert.
				806	assert(!Is64Bit && "EAX is livein in x64 case!");
				807
				808	// Save EAX
				809	BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
				810	.addReg(X86::EAX, RegState::Kill)
				811	.setMIFlag(MachineInstr::FrameSetup);
				812	}
				813
				814	if (Is64Bit) {
				815	// Handle the 64-bit Windows ABI case where we need to call __chkstk.
				816	// Function prologue is responsible for adjusting the stack pointer.
				817	BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
				818	.addImm(NumBytes)
				819	.setMIFlag(MachineInstr::FrameSetup);
				820	} else {
				821	// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
				822	// We'll also use 4 already allocated bytes for EAX.
				823	BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
				824	.addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
				825	.setMIFlag(MachineInstr::FrameSetup);
				826	}
				827
				828	// Save a pointer to the MI where we set AX.
				829	MachineBasicBlock::iterator SetRAX = MBBI;
				830	--SetRAX;
				831
				832	// Call __chkstk, __chkstk_ms, or __alloca.
				833	emitStackProbeCall(MF, MBB, MBBI, DL);
				834
				835	// Apply the frame setup flag to all inserted instrs.
				836	for (; SetRAX != MBBI; ++SetRAX)
				837	SetRAX->setFlag(MachineInstr::FrameSetup);
				838
				839	if (isEAXAlive) {
				840	// Restore EAX
				841	MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
				842	X86::EAX),
				843	StackPtr, false, NumBytes - 4);
				844	MI->setFlag(MachineInstr::FrameSetup);
				845	MBB.insert(MBBI, MI);
				846	}
				847	} else if (NumBytes) {
				848	emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
				849	UseLEA, TII, *RegInfo);
				850	}
				851
				852	int SEHFrameOffset = 0;
				853	if (NeedsWinEH) {
				854	if (HasFP) {
				855	// We need to set frame base offset low enough such that all saved
				856	// register offsets would be positive relative to it, but we can't
				857	// just use NumBytes, because .seh_setframe offset must be <=240.
				858	// So we pretend to have only allocated enough space to spill the
				859	// non-volatile registers.
				860	// We don't care about the rest of stack allocation, because unwinder
				861	// will restore SP to (BP - SEHFrameOffset)
				862	for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
				863	int offset = MFI->getObjectOffset(Info.getFrameIdx());
				864	SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
				865	}
				866	SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
				867
				868	// This only needs to account for XMM spill slots, GPR slots
				869	// are covered by the .seh_pushreg's emitted above.
				870	unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
				871	if (Size) {
				872	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
				873	.addImm(Size)
				874	.setMIFlag(MachineInstr::FrameSetup);
				875	}
				876
				877	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
				878	.addImm(FramePtr)
				879	.addImm(SEHFrameOffset)
				880	.setMIFlag(MachineInstr::FrameSetup);
				881	} else {
				882	// SP will be the base register for restoring XMMs
				883	if (NumBytes) {
				884	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
				885	.addImm(NumBytes)
				886	.setMIFlag(MachineInstr::FrameSetup);
				887	}
				888	}
				889	}
				890
				891	// Skip the rest of register spilling code
				892	while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
				893	++MBBI;
				894
				895	// Emit SEH info for non-GPRs
				896	if (NeedsWinEH) {
				897	for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
				898	unsigned Reg = Info.getReg();
				899	if (X86::GR64RegClass.contains(Reg) \|\| X86::GR32RegClass.contains(Reg))
				900	continue;
				901	assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
				902
				903	int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
				904	Offset += SEHFrameOffset;
				905
				906	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
				907	.addImm(Reg)
				908	.addImm(Offset)
				909	.setMIFlag(MachineInstr::FrameSetup);
				910	}
				911
				912	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
				913	.setMIFlag(MachineInstr::FrameSetup);
				914	}
				915
				916	// If we need a base pointer, set it up here. It's whatever the value
				917	// of the stack pointer is at this point. Any variable size objects
				918	// will be allocated after this, so we can still use the base pointer
				919	// to reference locals.
				920	if (RegInfo->hasBasePointer(MF)) {
				921	// Update the base pointer with the current stack pointer.
				922	unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
				923	BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
				924	.addReg(StackPtr)
				925	.setMIFlag(MachineInstr::FrameSetup);
				926	if (X86FI->getRestoreBasePointer()) {
				927	// Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain.
				928	unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
				929	addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
				930	FramePtr, true, X86FI->getRestoreBasePointerOffset())
				931	.addReg(StackPtr)
				932	.setMIFlag(MachineInstr::FrameSetup);
				933	}
				934	}
				935
				936	if (((!HasFP && NumBytes) \|\| PushedRegs) && NeedsDwarfCFI) {
				937	// Mark end of stack pointer adjustment.
				938	if (!HasFP && NumBytes) {
				939	// Define the current CFA rule to use the provided offset.
				940	assert(StackSize);
				941	unsigned CFIIndex = MMI.addFrameInst(
				942	MCCFIInstruction::createDefCfaOffset(nullptr,
				943	-StackSize + stackGrowth));
				944
				945	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
				946	.addCFIIndex(CFIIndex);
				947	}
				948
				949	// Emit DWARF info specifying the offsets of the callee-saved registers.
				950	if (PushedRegs)
				951	emitCalleeSavedFrameMoves(MBB, MBBI, DL);
				952	}
				953	}
				954
				955	void X86FrameLowering::emitEpilogue(MachineFunction &MF,
				956	MachineBasicBlock &MBB) const {
				957	const MachineFrameInfo *MFI = MF.getFrameInfo();
				958	X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
				959	const X86RegisterInfo *RegInfo =
				960	static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
				961	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				962	MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
				963	assert(MBBI != MBB.end() && "Returning block has no instructions");
				964	unsigned RetOpcode = MBBI->getOpcode();
				965	DebugLoc DL = MBBI->getDebugLoc();
				966	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				967	bool Is64Bit = STI.is64Bit();
				968	// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
				969	const bool Uses64BitFramePtr = STI.isTarget64BitLP64() \|\| STI.isTargetNaCl64();
				970	const bool Is64BitILP32 = STI.isTarget64BitILP32();
				971	bool UseLEA = STI.useLeaForSP();
				972	unsigned StackAlign = getStackAlignment();
				973	unsigned SlotSize = RegInfo->getSlotSize();
				974	unsigned FramePtr = RegInfo->getFrameRegister(MF);
				975	unsigned MachineFramePtr = Is64BitILP32 ?
				976	getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
				977	unsigned StackPtr = RegInfo->getStackRegister();
				978
				979	bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
				980	bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
				981
				982	switch (RetOpcode) {
				983	default:
				984	llvm_unreachable("Can only insert epilog into returning blocks");
				985	case X86::RETQ:
				986	case X86::RETL:
				987	case X86::RETIL:
				988	case X86::RETIQ:
				989	case X86::TCRETURNdi:
				990	case X86::TCRETURNri:
				991	case X86::TCRETURNmi:
				992	case X86::TCRETURNdi64:
				993	case X86::TCRETURNri64:
				994	case X86::TCRETURNmi64:
				995	case X86::EH_RETURN:
				996	case X86::EH_RETURN64:
				997	break; // These are ok
				998	}
				999
				1000	// Get the number of bytes to allocate from the FrameInfo.
				1001	uint64_t StackSize = MFI->getStackSize();
				1002	uint64_t MaxAlign = MFI->getMaxAlignment();
				1003	unsigned CSSize = X86FI->getCalleeSavedFrameSize();
				1004	uint64_t NumBytes = 0;
				1005
				1006	// If we're forcing a stack realignment we can't rely on just the frame
				1007	// info, we need to know the ABI stack alignment as well in case we
				1008	// have a call out. Otherwise just make sure we have some alignment - we'll
				1009	// go with the minimum.
				1010	if (ForceStackAlign) {
				1011	if (MFI->hasCalls())
				1012	MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
				1013	else
				1014	MaxAlign = MaxAlign ? MaxAlign : 4;
				1015	}
				1016
				1017	if (hasFP(MF)) {
				1018	// Calculate required stack adjustment.
				1019	uint64_t FrameSize = StackSize - SlotSize;
				1020	if (RegInfo->needsStackRealignment(MF)) {
				1021	// Callee-saved registers were pushed on stack before the stack
				1022	// was realigned.
				1023	FrameSize -= CSSize;
				1024	NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
				1025	} else {
				1026	NumBytes = FrameSize - CSSize;
				1027	}
				1028
				1029	// Pop EBP.
				1030	BuildMI(MBB, MBBI, DL,
				1031	TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
				1032	} else {
				1033	NumBytes = StackSize - CSSize;
				1034	}
				1035
				1036	// Skip the callee-saved pop instructions.
				1037	while (MBBI != MBB.begin()) {
				1038	MachineBasicBlock::iterator PI = std::prev(MBBI);
				1039	unsigned Opc = PI->getOpcode();
				1040
				1041	if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
				1042	!PI->isTerminator())
				1043	break;
				1044
				1045	--MBBI;
				1046	}
				1047	MachineBasicBlock::iterator FirstCSPop = MBBI;
				1048
				1049	DL = MBBI->getDebugLoc();
				1050
				1051	// If there is an ADD32ri or SUB32ri of ESP immediately before this
				1052	// instruction, merge the two instructions.
				1053	if (NumBytes \|\| MFI->hasVarSizedObjects())
				1054	mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
				1055
				1056	// If dynamic alloca is used, then reset esp to point to the last callee-saved
				1057	// slot before popping them off! Same applies for the case, when stack was
				1058	// realigned.
				1059	if (RegInfo->needsStackRealignment(MF) \|\| MFI->hasVarSizedObjects()) {
				1060	if (RegInfo->needsStackRealignment(MF))
				1061	MBBI = FirstCSPop;
				1062	if (CSSize != 0) {
				1063	unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
				1064	addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
				1065	FramePtr, false, -CSSize);
				1066	--MBBI;
				1067	} else {
				1068	unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
				1069	BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
				1070	.addReg(FramePtr);
				1071	--MBBI;
				1072	}
				1073	} else if (NumBytes) {
				1074	// Adjust stack pointer back: ESP += numbytes.
				1075	emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
				1076	TII, *RegInfo);
				1077	--MBBI;
				1078	}
				1079
				1080	// Windows unwinder will not invoke function's exception handler if IP is
				1081	// either in prologue or in epilogue. This behavior causes a problem when a
				1082	// call immediately precedes an epilogue, because the return address points
				1083	// into the epilogue. To cope with that, we insert an epilogue marker here,
				1084	// then replace it with a 'nop' if it ends up immediately after a CALL in the
				1085	// final emitted code.
				1086	if (NeedsWinEH)
				1087	BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
				1088
				1089	// We're returning from function via eh_return.
				1090	if (RetOpcode == X86::EH_RETURN \|\| RetOpcode == X86::EH_RETURN64) {
				1091	MBBI = MBB.getLastNonDebugInstr();
				1092	MachineOperand &DestAddr = MBBI->getOperand(0);
				1093	assert(DestAddr.isReg() && "Offset should be in register!");
				1094	BuildMI(MBB, MBBI, DL,
				1095	TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
				1096	StackPtr).addReg(DestAddr.getReg());
				1097	} else if (RetOpcode == X86::TCRETURNri \|\| RetOpcode == X86::TCRETURNdi \|\|
				1098	RetOpcode == X86::TCRETURNmi \|\|
				1099	RetOpcode == X86::TCRETURNri64 \|\| RetOpcode == X86::TCRETURNdi64 \|\|
				1100	RetOpcode == X86::TCRETURNmi64) {
				1101	bool isMem = RetOpcode == X86::TCRETURNmi \|\| RetOpcode == X86::TCRETURNmi64;
				1102	// Tail call return: adjust the stack pointer and jump to callee.
				1103	MBBI = MBB.getLastNonDebugInstr();
				1104	MachineOperand &JumpTarget = MBBI->getOperand(0);
				1105	MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
				1106	assert(StackAdjust.isImm() && "Expecting immediate value.");
				1107
				1108	// Adjust stack pointer.
				1109	int StackAdj = StackAdjust.getImm();
				1110	int MaxTCDelta = X86FI->getTCReturnAddrDelta();
				1111	int Offset = 0;
				1112	assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
				1113
				1114	// Incoporate the retaddr area.
				1115	Offset = StackAdj-MaxTCDelta;
				1116	assert(Offset >= 0 && "Offset should never be negative");
				1117
				1118	if (Offset) {
				1119	// Check for possible merge with preceding ADD instruction.
				1120	Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
				1121	emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
				1122	UseLEA, TII, *RegInfo);
				1123	}
				1124
				1125	// Jump to label or value in register.
				1126	bool IsWin64 = STI.isTargetWin64();
				1127	if (RetOpcode == X86::TCRETURNdi \|\| RetOpcode == X86::TCRETURNdi64) {
				1128	unsigned Op = (RetOpcode == X86::TCRETURNdi)
				1129	? X86::TAILJMPd
				1130	: (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64);
				1131	MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op));
				1132	if (JumpTarget.isGlobal())
				1133	MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
				1134	JumpTarget.getTargetFlags());
				1135	else {
				1136	assert(JumpTarget.isSymbol());
				1137	MIB.addExternalSymbol(JumpTarget.getSymbolName(),
				1138	JumpTarget.getTargetFlags());
				1139	}
				1140	} else if (RetOpcode == X86::TCRETURNmi \|\| RetOpcode == X86::TCRETURNmi64) {
				1141	unsigned Op = (RetOpcode == X86::TCRETURNmi)
				1142	? X86::TAILJMPm
				1143	: (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
				1144	MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op));
				1145	for (unsigned i = 0; i != 5; ++i)
				1146	MIB.addOperand(MBBI->getOperand(i));
				1147	} else if (RetOpcode == X86::TCRETURNri64) {
				1148	BuildMI(MBB, MBBI, DL,
				1149	TII.get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
				1150	.addReg(JumpTarget.getReg(), RegState::Kill);
				1151	} else {
				1152	BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
				1153	addReg(JumpTarget.getReg(), RegState::Kill);
				1154	}
				1155
				1156	MachineInstr *NewMI = std::prev(MBBI);
				1157	NewMI->copyImplicitOps(MF, MBBI);
				1158
				1159	// Delete the pseudo instruction TCRETURN.
				1160	MBB.erase(MBBI);
				1161	} else if ((RetOpcode == X86::RETQ \|\| RetOpcode == X86::RETL \|\|
				1162	RetOpcode == X86::RETIQ \|\| RetOpcode == X86::RETIL) &&
				1163	(X86FI->getTCReturnAddrDelta() < 0)) {
				1164	// Add the return addr area delta back since we are not tail calling.
				1165	int delta = -1*X86FI->getTCReturnAddrDelta();
				1166	MBBI = MBB.getLastNonDebugInstr();
				1167
				1168	// Check for possible merge with preceding ADD instruction.
				1169	delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
				1170	emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
				1171	*RegInfo);
				1172	}
				1173	}
				1174
				1175	int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
				1176	int FI) const {
				1177	const X86RegisterInfo *RegInfo =
				1178	static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
				1179	const MachineFrameInfo *MFI = MF.getFrameInfo();
				1180	int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
				1181	uint64_t StackSize = MFI->getStackSize();
				1182
				1183	if (RegInfo->hasBasePointer(MF)) {
				1184	assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
				1185	if (FI < 0) {
				1186	// Skip the saved EBP.
				1187	return Offset + RegInfo->getSlotSize();
				1188	} else {
				1189	assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
				1190	return Offset + StackSize;
				1191	}
				1192	} else if (RegInfo->needsStackRealignment(MF)) {
				1193	if (FI < 0) {
				1194	// Skip the saved EBP.
				1195	return Offset + RegInfo->getSlotSize();
				1196	} else {
				1197	assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
				1198	return Offset + StackSize;
				1199	}
				1200	// FIXME: Support tail calls
				1201	} else {
				1202	if (!hasFP(MF))
				1203	return Offset + StackSize;
				1204
				1205	// Skip the saved EBP.
				1206	Offset += RegInfo->getSlotSize();
				1207
				1208	// Skip the RETADDR move area
				1209	const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
				1210	int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
				1211	if (TailCallReturnAddrDelta < 0)
				1212	Offset -= TailCallReturnAddrDelta;
				1213	}
				1214
				1215	return Offset;
				1216	}
				1217
				1218	int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
				1219	unsigned &FrameReg) const {
				1220	const X86RegisterInfo *RegInfo =
				1221	static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
				1222	// We can't calculate offset from frame pointer if the stack is realigned,
				1223	// so enforce usage of stack/base pointer. The base pointer is used when we
				1224	// have dynamic allocas in addition to dynamic realignment.
				1225	if (RegInfo->hasBasePointer(MF))
				1226	FrameReg = RegInfo->getBaseRegister();
				1227	else if (RegInfo->needsStackRealignment(MF))
				1228	FrameReg = RegInfo->getStackRegister();
				1229	else
				1230	FrameReg = RegInfo->getFrameRegister(MF);
				1231	return getFrameIndexOffset(MF, FI);
				1232	}
				1233
				1234	// Simplified from getFrameIndexOffset keeping only StackPointer cases
				1235	int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
				1236	const MachineFrameInfo *MFI = MF.getFrameInfo();
				1237	// Does not include any dynamic realign.
				1238	const uint64_t StackSize = MFI->getStackSize();
				1239	{
				1240	#ifndef NDEBUG
				1241	const X86RegisterInfo *RegInfo =
				1242	static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
				1243	// Note: LLVM arranges the stack as:
				1244	// Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
				1245	// > "Stack Slots" (<--SP)
				1246	// We can always address StackSlots from RSP. We can usually (unless
				1247	// needsStackRealignment) address CSRs from RSP, but sometimes need to
				1248	// address them from RBP. FixedObjects can be placed anywhere in the stack
				1249	// frame depending on their specific requirements (i.e. we can actually
				1250	// refer to arguments to the function which are stored in the callers
				1251	// frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
				1252	// AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
				1253
				1254	assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
				1255
				1256	// We don't handle tail calls, and shouldn't be seeing them
				1257	// either.
				1258	int TailCallReturnAddrDelta =
				1259	MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
				1260	assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
				1261	#endif
				1262	}
				1263
				1264	// This is how the math works out:
				1265	//
				1266	// %rsp grows (i.e. gets lower) left to right. Each box below is
				1267	// one word (eight bytes). Obj0 is the stack slot we're trying to
				1268	// get to.
				1269	//
				1270	// ----------------------------------
				1271	// \| BP \| Obj0 \| Obj1 \| ... \| ObjN \|
				1272	// ----------------------------------
				1273	// ^ ^ ^ ^
				1274	// A B C E
				1275	//
				1276	// A is the incoming stack pointer.
				1277	// (B - A) is the local area offset (-8 for x86-64) [1]
				1278	// (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
				1279	//
				1280	// \|(E - B)\| is the StackSize (absolute value, positive). For a
				1281	// stack that grown down, this works out to be (B - E). [3]
				1282	//
				1283	// E is also the value of %rsp after stack has been set up, and we
				1284	// want (C - E) -- the value we can add to %rsp to get to Obj0. Now
				1285	// (C - E) == (C - A) - (B - A) + (B - E)
				1286	// { Using [1], [2] and [3] above }
				1287	// == getObjectOffset - LocalAreaOffset + StackSize
				1288	//
				1289
				1290	// Get the Offset from the StackPointer
				1291	int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
				1292
				1293	return Offset + StackSize;
				1294	}
				1295	// Simplified from getFrameIndexReference keeping only StackPointer cases
				1296	int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
				1297	unsigned &FrameReg) const {
				1298	const X86RegisterInfo *RegInfo =
				1299	static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
				1300
				1301	assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
				1302
				1303	FrameReg = RegInfo->getStackRegister();
				1304	return getFrameIndexOffsetFromSP(MF, FI);
				1305	}
				1306
				1307	bool X86FrameLowering::assignCalleeSavedSpillSlots(
				1308	MachineFunction &MF, const TargetRegisterInfo *TRI,
				1309	std::vector<CalleeSavedInfo> &CSI) const {
				1310	MachineFrameInfo *MFI = MF.getFrameInfo();
				1311	const X86RegisterInfo *RegInfo =
				1312	static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
				1313	unsigned SlotSize = RegInfo->getSlotSize();
				1314	X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
				1315
				1316	unsigned CalleeSavedFrameSize = 0;
				1317	int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
				1318
				1319	if (hasFP(MF)) {
				1320	// emitPrologue always spills frame register the first thing.
				1321	SpillSlotOffset -= SlotSize;
				1322	MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
				1323
				1324	// Since emitPrologue and emitEpilogue will handle spilling and restoring of
				1325	// the frame register, we can delete it from CSI list and not have to worry
				1326	// about avoiding it later.
				1327	unsigned FPReg = RegInfo->getFrameRegister(MF);
				1328	for (unsigned i = 0; i < CSI.size(); ++i) {
				1329	if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
				1330	CSI.erase(CSI.begin() + i);
				1331	break;
				1332	}
				1333	}
				1334	}
				1335
				1336	// Assign slots for GPRs. It increases frame size.
				1337	for (unsigned i = CSI.size(); i != 0; --i) {
				1338	unsigned Reg = CSI[i - 1].getReg();
				1339
				1340	if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
				1341	continue;
				1342
				1343	SpillSlotOffset -= SlotSize;
				1344	CalleeSavedFrameSize += SlotSize;
				1345
				1346	int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
				1347	CSI[i - 1].setFrameIdx(SlotIndex);
				1348	}
				1349
				1350	X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
				1351
				1352	// Assign slots for XMMs.
				1353	for (unsigned i = CSI.size(); i != 0; --i) {
				1354	unsigned Reg = CSI[i - 1].getReg();
				1355	if (X86::GR64RegClass.contains(Reg) \|\| X86::GR32RegClass.contains(Reg))
				1356	continue;
				1357
				1358	const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
				1359	// ensure alignment
				1360	SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
				1361	// spill into slot
				1362	SpillSlotOffset -= RC->getSize();
				1363	int SlotIndex =
				1364	MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
				1365	CSI[i - 1].setFrameIdx(SlotIndex);
				1366	MFI->ensureMaxAlignment(RC->getAlignment());
				1367	}
				1368
				1369	return true;
				1370	}
				1371
				1372	bool X86FrameLowering::spillCalleeSavedRegisters(
				1373	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
				1374	const std::vector<CalleeSavedInfo> &CSI,
				1375	const TargetRegisterInfo *TRI) const {
				1376	DebugLoc DL = MBB.findDebugLoc(MI);
				1377
				1378	MachineFunction &MF = *MBB.getParent();
				1379	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				1380	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				1381
				1382	// Push GPRs. It increases frame size.
				1383	unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
				1384	for (unsigned i = CSI.size(); i != 0; --i) {
				1385	unsigned Reg = CSI[i - 1].getReg();
				1386
				1387	if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
				1388	continue;
				1389	// Add the callee-saved register as live-in. It's killed at the spill.
				1390	MBB.addLiveIn(Reg);
				1391
				1392	BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
				1393	.setMIFlag(MachineInstr::FrameSetup);
				1394	}
				1395
				1396	// Make XMM regs spilled. X86 does not have ability of push/pop XMM.
				1397	// It can be done by spilling XMMs to stack frame.
				1398	for (unsigned i = CSI.size(); i != 0; --i) {
				1399	unsigned Reg = CSI[i-1].getReg();
				1400	if (X86::GR64RegClass.contains(Reg) \|\|
				1401	X86::GR32RegClass.contains(Reg))
				1402	continue;
				1403	// Add the callee-saved register as live-in. It's killed at the spill.
				1404	MBB.addLiveIn(Reg);
				1405	const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
				1406
				1407	TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
				1408	TRI);
				1409	--MI;
				1410	MI->setFlag(MachineInstr::FrameSetup);
				1411	++MI;
				1412	}
				1413
				1414	return true;
				1415	}
				1416
				1417	bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
				1418	MachineBasicBlock::iterator MI,
				1419	const std::vector<CalleeSavedInfo> &CSI,
				1420	const TargetRegisterInfo *TRI) const {
				1421	if (CSI.empty())
				1422	return false;
				1423
				1424	DebugLoc DL = MBB.findDebugLoc(MI);
				1425
				1426	MachineFunction &MF = *MBB.getParent();
				1427	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				1428	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				1429
				1430	// Reload XMMs from stack frame.
				1431	for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
				1432	unsigned Reg = CSI[i].getReg();
				1433	if (X86::GR64RegClass.contains(Reg) \|\|
				1434	X86::GR32RegClass.contains(Reg))
				1435	continue;
				1436
				1437	const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
				1438	TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
				1439	}
				1440
				1441	// POP GPRs.
				1442	unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
				1443	for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
				1444	unsigned Reg = CSI[i].getReg();
				1445	if (!X86::GR64RegClass.contains(Reg) &&
				1446	!X86::GR32RegClass.contains(Reg))
				1447	continue;
				1448
				1449	BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
				1450	}
				1451	return true;
				1452	}
				1453
				1454	void
				1455	X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
				1456	RegScavenger *RS) const {
				1457	MachineFrameInfo *MFI = MF.getFrameInfo();
				1458	const X86RegisterInfo *RegInfo =
				1459	static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
				1460	unsigned SlotSize = RegInfo->getSlotSize();
				1461
				1462	X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
				1463	int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
				1464
				1465	if (TailCallReturnAddrDelta < 0) {
				1466	// create RETURNADDR area
				1467	// arg
				1468	// arg
				1469	// RETADDR
				1470	// { ...
				1471	// RETADDR area
				1472	// ...
				1473	// }
				1474	// [EBP]
				1475	MFI->CreateFixedObject(-TailCallReturnAddrDelta,
				1476	TailCallReturnAddrDelta - SlotSize, true);
				1477	}
				1478
				1479	// Spill the BasePtr if it's used.
				1480	if (RegInfo->hasBasePointer(MF))
				1481	MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
				1482	}
				1483
				1484	static bool
				1485	HasNestArgument(const MachineFunction *MF) {
				1486	const Function *F = MF->getFunction();
				1487	for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
				1488	I != E; I++) {
				1489	if (I->hasNestAttr())
				1490	return true;
				1491	}
				1492	return false;
				1493	}
				1494
				1495	/// GetScratchRegister - Get a temp register for performing work in the
				1496	/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
				1497	/// and the properties of the function either one or two registers will be
				1498	/// needed. Set primary to true for the first register, false for the second.
				1499	static unsigned
				1500	GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
				1501	CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
				1502
				1503	// Erlang stuff.
				1504	if (CallingConvention == CallingConv::HiPE) {
				1505	if (Is64Bit)
				1506	return Primary ? X86::R14 : X86::R13;
				1507	else
				1508	return Primary ? X86::EBX : X86::EDI;
				1509	}
				1510
				1511	if (Is64Bit) {
				1512	if (IsLP64)
				1513	return Primary ? X86::R11 : X86::R12;
				1514	else
				1515	return Primary ? X86::R11D : X86::R12D;
				1516	}
				1517
				1518	bool IsNested = HasNestArgument(&MF);
				1519
				1520	if (CallingConvention == CallingConv::X86_FastCall \|\|
				1521	CallingConvention == CallingConv::Fast) {
				1522	if (IsNested)
				1523	report_fatal_error("Segmented stacks does not support fastcall with "
				1524	"nested function.");
				1525	return Primary ? X86::EAX : X86::ECX;
				1526	}
				1527	if (IsNested)
				1528	return Primary ? X86::EDX : X86::EAX;
				1529	return Primary ? X86::ECX : X86::EAX;
				1530	}
				1531
				1532	// The stack limit in the TCB is set to this many bytes above the actual stack
				1533	// limit.
				1534	static const uint64_t kSplitStackAvailable = 256;
				1535
				1536	void
				1537	X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
				1538	MachineBasicBlock &prologueMBB = MF.front();
				1539	MachineFrameInfo *MFI = MF.getFrameInfo();
				1540	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				1541	uint64_t StackSize;
				1542	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				1543	bool Is64Bit = STI.is64Bit();
				1544	const bool IsLP64 = STI.isTarget64BitLP64();
				1545	unsigned TlsReg, TlsOffset;
				1546	DebugLoc DL;
				1547
				1548	unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
				1549	assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
				1550	"Scratch register is live-in");
				1551
				1552	if (MF.getFunction()->isVarArg())
				1553	report_fatal_error("Segmented stacks do not support vararg functions.");
				1554	if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
				1555	!STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
				1556	!STI.isTargetDragonFly())
				1557	report_fatal_error("Segmented stacks not supported on this platform.");
				1558
				1559	// Eventually StackSize will be calculated by a link-time pass; which will
				1560	// also decide whether checking code needs to be injected into this particular
				1561	// prologue.
				1562	StackSize = MFI->getStackSize();
				1563
				1564	// Do not generate a prologue for functions with a stack of size zero
				1565	if (StackSize == 0)
				1566	return;
				1567
				1568	MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
				1569	MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
				1570	X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
				1571	bool IsNested = false;
				1572
				1573	// We need to know if the function has a nest argument only in 64 bit mode.
				1574	if (Is64Bit)
				1575	IsNested = HasNestArgument(&MF);
				1576
				1577	// The MOV R10, RAX needs to be in a different block, since the RET we emit in
				1578	// allocMBB needs to be last (terminating) instruction.
				1579
				1580	for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
				1581	e = prologueMBB.livein_end(); i != e; i++) {
				1582	allocMBB->addLiveIn(*i);
				1583	checkMBB->addLiveIn(*i);
				1584	}
				1585
				1586	if (IsNested)
				1587	allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
				1588
				1589	MF.push_front(allocMBB);
				1590	MF.push_front(checkMBB);
				1591
				1592	// When the frame size is less than 256 we just compare the stack
				1593	// boundary directly to the value of the stack pointer, per gcc.
				1594	bool CompareStackPointer = StackSize < kSplitStackAvailable;
				1595
				1596	// Read the limit off the current stacklet off the stack_guard location.
				1597	if (Is64Bit) {
				1598	if (STI.isTargetLinux()) {
				1599	TlsReg = X86::FS;
				1600	TlsOffset = IsLP64 ? 0x70 : 0x40;
				1601	} else if (STI.isTargetDarwin()) {
				1602	TlsReg = X86::GS;
				1603	TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
				1604	} else if (STI.isTargetWin64()) {
				1605	TlsReg = X86::GS;
				1606	TlsOffset = 0x28; // pvArbitrary, reserved for application use
				1607	} else if (STI.isTargetFreeBSD()) {
				1608	TlsReg = X86::FS;
				1609	TlsOffset = 0x18;
				1610	} else if (STI.isTargetDragonFly()) {
				1611	TlsReg = X86::FS;
				1612	TlsOffset = 0x20; // use tls_tcb.tcb_segstack
				1613	} else {
				1614	report_fatal_error("Segmented stacks not supported on this platform.");
				1615	}
				1616
				1617	if (CompareStackPointer)
				1618	ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
				1619	else
				1620	BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
				1621	.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
				1622
				1623	BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
				1624	.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
				1625	} else {
				1626	if (STI.isTargetLinux()) {
				1627	TlsReg = X86::GS;
				1628	TlsOffset = 0x30;
				1629	} else if (STI.isTargetDarwin()) {
				1630	TlsReg = X86::GS;
				1631	TlsOffset = 0x48 + 90*4;
				1632	} else if (STI.isTargetWin32()) {
				1633	TlsReg = X86::FS;
				1634	TlsOffset = 0x14; // pvArbitrary, reserved for application use
				1635	} else if (STI.isTargetDragonFly()) {
				1636	TlsReg = X86::FS;
				1637	TlsOffset = 0x10; // use tls_tcb.tcb_segstack
				1638	} else if (STI.isTargetFreeBSD()) {
				1639	report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
				1640	} else {
				1641	report_fatal_error("Segmented stacks not supported on this platform.");
				1642	}
				1643
				1644	if (CompareStackPointer)
				1645	ScratchReg = X86::ESP;
				1646	else
				1647	BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
				1648	.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
				1649
				1650	if (STI.isTargetLinux() \|\| STI.isTargetWin32() \|\| STI.isTargetWin64() \|\|
				1651	STI.isTargetDragonFly()) {
				1652	BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
				1653	.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
				1654	} else if (STI.isTargetDarwin()) {
				1655
				1656	// TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
				1657	unsigned ScratchReg2;
				1658	bool SaveScratch2;
				1659	if (CompareStackPointer) {
				1660	// The primary scratch register is available for holding the TLS offset.
				1661	ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
				1662	SaveScratch2 = false;
				1663	} else {
				1664	// Need to use a second register to hold the TLS offset
				1665	ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
				1666
				1667	// Unfortunately, with fastcc the second scratch register may hold an
				1668	// argument.
				1669	SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
				1670	}
				1671
				1672	// If Scratch2 is live-in then it needs to be saved.
				1673	assert((!MF.getRegInfo().isLiveIn(ScratchReg2) \|\| SaveScratch2) &&
				1674	"Scratch register is live-in and not saved");
				1675
				1676	if (SaveScratch2)
				1677	BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
				1678	.addReg(ScratchReg2, RegState::Kill);
				1679
				1680	BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
				1681	.addImm(TlsOffset);
				1682	BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
				1683	.addReg(ScratchReg)
				1684	.addReg(ScratchReg2).addImm(1).addReg(0)
				1685	.addImm(0)
				1686	.addReg(TlsReg);
				1687
				1688	if (SaveScratch2)
				1689	BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
				1690	}
				1691	}
				1692
				1693	// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
				1694	// It jumps to normal execution of the function body.
				1695	BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB);
				1696
				1697	// On 32 bit we first push the arguments size and then the frame size. On 64
				1698	// bit, we pass the stack frame size in r10 and the argument size in r11.
				1699	if (Is64Bit) {
				1700	// Functions with nested arguments use R10, so it needs to be saved across
				1701	// the call to _morestack
				1702
				1703	const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
				1704	const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
				1705	const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
				1706	const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
				1707	const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
				1708
				1709	if (IsNested)
				1710	BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
				1711
				1712	BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
				1713	.addImm(StackSize);
				1714	BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
				1715	.addImm(X86FI->getArgumentStackSize());
				1716	MF.getRegInfo().setPhysRegUsed(Reg10);
				1717	MF.getRegInfo().setPhysRegUsed(Reg11);
				1718	} else {
				1719	BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
				1720	.addImm(X86FI->getArgumentStackSize());
				1721	BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
				1722	.addImm(StackSize);
				1723	}
				1724
				1725	// __morestack is in libgcc
				1726	if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
				1727	// Under the large code model, we cannot assume that __morestack lives
				1728	// within 2^31 bytes of the call site, so we cannot use pc-relative
				1729	// addressing. We cannot perform the call via a temporary register,
				1730	// as the rax register may be used to store the static chain, and all
				1731	// other suitable registers may be either callee-save or used for
				1732	// parameter passing. We cannot use the stack at this point either
				1733	// because __morestack manipulates the stack directly.
				1734	//
				1735	// To avoid these issues, perform an indirect call via a read-only memory
				1736	// location containing the address.
				1737	//
				1738	// This solution is not perfect, as it assumes that the .rodata section
				1739	// is laid out within 2^31 bytes of each function body, but this seems
				1740	// to be sufficient for JIT.
				1741	BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
				1742	.addReg(X86::RIP)
				1743	.addImm(0)
				1744	.addReg(0)
				1745	.addExternalSymbol("__morestack_addr")
				1746	.addReg(0);
				1747	MF.getMMI().setUsesMorestackAddr(true);
				1748	} else {
				1749	if (Is64Bit)
				1750	BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
				1751	.addExternalSymbol("__morestack");
				1752	else
				1753	BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
				1754	.addExternalSymbol("__morestack");
				1755	}
				1756
				1757	if (IsNested)
				1758	BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
				1759	else
				1760	BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
				1761
				1762	allocMBB->addSuccessor(&prologueMBB);
				1763
				1764	checkMBB->addSuccessor(allocMBB);
				1765	checkMBB->addSuccessor(&prologueMBB);
				1766
				1767	#ifdef XDEBUG
				1768	MF.verify();
				1769	#endif
				1770	}
				1771
				1772	/// Erlang programs may need a special prologue to handle the stack size they
				1773	/// might need at runtime. That is because Erlang/OTP does not implement a C
				1774	/// stack but uses a custom implementation of hybrid stack/heap architecture.
				1775	/// (for more information see Eric Stenman's Ph.D. thesis:
				1776	/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
				1777	///
				1778	/// CheckStack:
				1779	/// temp0 = sp - MaxStack
				1780	/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
				1781	/// OldStart:
				1782	/// ...
				1783	/// IncStack:
				1784	/// call inc_stack # doubles the stack space
				1785	/// temp0 = sp - MaxStack
				1786	/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
				1787	void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
				1788	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				1789	MachineFrameInfo *MFI = MF.getFrameInfo();
				1790	const unsigned SlotSize =
				1791	static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo())
				1792	->getSlotSize();
				1793	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				1794	const bool Is64Bit = STI.is64Bit();
				1795	const bool IsLP64 = STI.isTarget64BitLP64();
				1796	DebugLoc DL;
				1797	// HiPE-specific values
				1798	const unsigned HipeLeafWords = 24;
				1799	const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
				1800	const unsigned Guaranteed = HipeLeafWords * SlotSize;
				1801	unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
				1802	MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
				1803	unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
				1804
				1805	assert(STI.isTargetLinux() &&
				1806	"HiPE prologue is only supported on Linux operating systems.");
				1807
				1808	// Compute the largest caller's frame that is needed to fit the callees'
				1809	// frames. This 'MaxStack' is computed from:
				1810	//
				1811	// a) the fixed frame size, which is the space needed for all spilled temps,
				1812	// b) outgoing on-stack parameter areas, and
				1813	// c) the minimum stack space this function needs to make available for the
				1814	// functions it calls (a tunable ABI property).
				1815	if (MFI->hasCalls()) {
				1816	unsigned MoreStackForCalls = 0;
				1817
				1818	for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
				1819	MBBI != MBBE; ++MBBI)
				1820	for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
				1821	MI != ME; ++MI) {
				1822	if (!MI->isCall())
				1823	continue;
				1824
				1825	// Get callee operand.
				1826	const MachineOperand &MO = MI->getOperand(0);
				1827
				1828	// Only take account of global function calls (no closures etc.).
				1829	if (!MO.isGlobal())
				1830	continue;
				1831
				1832	const Function *F = dyn_cast<Function>(MO.getGlobal());
				1833	if (!F)
				1834	continue;
				1835
				1836	// Do not update 'MaxStack' for primitive and built-in functions
				1837	// (encoded with names either starting with "erlang."/"bif_" or not
				1838	// having a ".", such as a simple <Module>.<Function>.<Arity>, or an
				1839	// "_", such as the BIF "suspend_0") as they are executed on another
				1840	// stack.
				1841	if (F->getName().find("erlang.") != StringRef::npos \|\|
				1842	F->getName().find("bif_") != StringRef::npos \|\|
				1843	F->getName().find_first_of("._") == StringRef::npos)
				1844	continue;
				1845
				1846	unsigned CalleeStkArity =
				1847	F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
				1848	if (HipeLeafWords - 1 > CalleeStkArity)
				1849	MoreStackForCalls = std::max(MoreStackForCalls,
				1850	(HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
				1851	}
				1852	MaxStack += MoreStackForCalls;
				1853	}
				1854
				1855	// If the stack frame needed is larger than the guaranteed then runtime checks
				1856	// and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
				1857	if (MaxStack > Guaranteed) {
				1858	MachineBasicBlock &prologueMBB = MF.front();
				1859	MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
				1860	MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
				1861
				1862	for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
				1863	E = prologueMBB.livein_end(); I != E; I++) {
				1864	stackCheckMBB->addLiveIn(*I);
				1865	incStackMBB->addLiveIn(*I);
				1866	}
				1867
				1868	MF.push_front(incStackMBB);
				1869	MF.push_front(stackCheckMBB);
				1870
				1871	unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
				1872	unsigned LEAop, CMPop, CALLop;
				1873	if (Is64Bit) {
				1874	SPReg = X86::RSP;
				1875	PReg = X86::RBP;
				1876	LEAop = X86::LEA64r;
				1877	CMPop = X86::CMP64rm;
				1878	CALLop = X86::CALL64pcrel32;
				1879	SPLimitOffset = 0x90;
				1880	} else {
				1881	SPReg = X86::ESP;
				1882	PReg = X86::EBP;
				1883	LEAop = X86::LEA32r;
				1884	CMPop = X86::CMP32rm;
				1885	CALLop = X86::CALLpcrel32;
				1886	SPLimitOffset = 0x4c;
				1887	}
				1888
				1889	ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
				1890	assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
				1891	"HiPE prologue scratch register is live-in");
				1892
				1893	// Create new MBB for StackCheck:
				1894	addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
				1895	SPReg, false, -MaxStack);
				1896	// SPLimitOffset is in a fixed heap location (pointed by BP).
				1897	addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
				1898	.addReg(ScratchReg), PReg, false, SPLimitOffset);
				1899	BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB);
				1900
				1901	// Create new MBB for IncStack:
				1902	BuildMI(incStackMBB, DL, TII.get(CALLop)).
				1903	addExternalSymbol("inc_stack_0");
				1904	addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
				1905	SPReg, false, -MaxStack);
				1906	addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
				1907	.addReg(ScratchReg), PReg, false, SPLimitOffset);
				1908	BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
				1909
				1910	stackCheckMBB->addSuccessor(&prologueMBB, 99);
				1911	stackCheckMBB->addSuccessor(incStackMBB, 1);
				1912	incStackMBB->addSuccessor(&prologueMBB, 99);
				1913	incStackMBB->addSuccessor(incStackMBB, 1);
				1914	}
				1915	#ifdef XDEBUG
				1916	MF.verify();
				1917	#endif
				1918	}
				1919
				1920	bool X86FrameLowering::
				1921	convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
				1922	MachineBasicBlock::iterator I, uint64_t Amount) const {
				1923	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				1924	const X86RegisterInfo &RegInfo = static_cast<const X86RegisterInfo >(
				1925	MF.getSubtarget().getRegisterInfo());
				1926	unsigned StackPtr = RegInfo.getStackRegister();
				1927
				1928	// Scan the call setup sequence for the pattern we're looking for.
				1929	// We only handle a simple case now - a sequence of MOV32mi or MOV32mr
				1930	// instructions, that push a sequence of 32-bit values onto the stack, with
				1931	// no gaps.
				1932	std::map<int64_t, MachineBasicBlock::iterator> MovMap;
				1933	do {
				1934	int Opcode = I->getOpcode();
				1935	if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
				1936	break;
				1937
				1938	// We only want movs of the form:
				1939	// movl imm/r32, k(%ecx)
				1940	// If we run into something else, bail
				1941	// Note that AddrBaseReg may, counterintuitively, not be a register...
				1942	if (!I->getOperand(X86::AddrBaseReg).isReg() \|\|
				1943	(I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) \|\|
				1944	!I->getOperand(X86::AddrScaleAmt).isImm() \|\|
				1945	(I->getOperand(X86::AddrScaleAmt).getImm() != 1) \|\|
				1946	(I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) \|\|
				1947	(I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) \|\|
				1948	!I->getOperand(X86::AddrDisp).isImm())
				1949	return false;
				1950
				1951	int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
				1952
				1953	// We don't want to consider the unaligned case.
				1954	if (StackDisp % 4)
				1955	return false;
				1956
				1957	// If the same stack slot is being filled twice, something's fishy.
				1958	if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
				1959	return false;
				1960
				1961	++I;
				1962	} while (I != MBB.end());
				1963
				1964	// We now expect the end of the sequence - a call and a stack adjust.
				1965	if (I == MBB.end())
				1966	return false;
				1967	if (!I->isCall())
				1968	return false;
				1969	MachineBasicBlock::iterator Call = I;
				1970	if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
				1971	return false;
				1972
				1973	// Now, go through the map, and see that we don't have any gaps,
				1974	// but only a series of 32-bit MOVs.
				1975	// Since std::map provides ordered iteration, the original order
				1976	// of the MOVs doesn't matter.
				1977	int64_t ExpectedDist = 0;
				1978	for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
				1979	++MMI, ExpectedDist += 4)
				1980	if (MMI->first != ExpectedDist)
				1981	return false;
				1982
				1983	// Ok, everything looks fine. Do the transformation.
				1984	DebugLoc DL = I->getDebugLoc();
				1985
				1986	// It's possible the original stack adjustment amount was larger than
				1987	// that done by the pushes. If so, we still need a SUB.
				1988	Amount -= ExpectedDist;
				1989	if (Amount) {
				1990	MachineInstr* Sub = BuildMI(MBB, Call, DL,
				1991	TII.get(getSUBriOpcode(false, Amount)), StackPtr)
				1992	.addReg(StackPtr).addImm(Amount);
				1993	Sub->getOperand(3).setIsDead();
				1994	}
				1995
				1996	// Now, iterate through the map in reverse order, and replace the movs
				1997	// with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
				1998	for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
				1999	MachineBasicBlock::iterator MOV = MMI->second;
				2000	MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
				2001
				2002	// Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
				2003	int PushOpcode = X86::PUSH32r;
				2004	if (MOV->getOpcode() == X86::MOV32mi)
				2005	PushOpcode = getPUSHiOpcode(false, PushOp);
				2006
				2007	BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp);
				2008	MBB.erase(MOV);
				2009	}
				2010
				2011	return true;
				2012	}
				2013
				2014	void X86FrameLowering::
				2015	eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
				2016	MachineBasicBlock::iterator I) const {
				2017	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
				2018	const X86RegisterInfo &RegInfo = static_cast<const X86RegisterInfo >(
				2019	MF.getSubtarget().getRegisterInfo());
				2020	unsigned StackPtr = RegInfo.getStackRegister();
				2021	bool reserveCallFrame = hasReservedCallFrame(MF);
				2022	int Opcode = I->getOpcode();
				2023	bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
				2024	const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
				2025	bool IsLP64 = STI.isTarget64BitLP64();
				2026	DebugLoc DL = I->getDebugLoc();
				2027	uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
				2028	uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
				2029	I = MBB.erase(I);
				2030
				2031	if (!reserveCallFrame) {
				2032	// If the stack pointer can be changed after prologue, turn the
				2033	// adjcallstackup instruction into a 'sub ESP, <amt>' and the
				2034	// adjcallstackdown instruction into 'add ESP, <amt>'
				2035	if (Amount == 0)
				2036	return;
				2037
				2038	// We need to keep the stack aligned properly. To do this, we round the
				2039	// amount of space needed for the outgoing arguments up to the next
				2040	// alignment boundary.
				2041	unsigned StackAlign = MF.getTarget()
				2042	.getSubtargetImpl()
				2043	->getFrameLowering()
				2044	->getStackAlignment();
				2045	Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
				2046
				2047	MachineInstr *New = nullptr;
				2048	if (Opcode == TII.getCallFrameSetupOpcode()) {
				2049	// Try to convert movs to the stack into pushes.
				2050	// We currently only look for a pattern that appears in 32-bit
				2051	// calling conventions.
				2052	if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
				2053	return;
				2054
				2055	New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
				2056	StackPtr)
				2057	.addReg(StackPtr)
				2058	.addImm(Amount);
				2059	} else {
				2060	assert(Opcode == TII.getCallFrameDestroyOpcode());
				2061
				2062	// Factor out the amount the callee already popped.
				2063	Amount -= CalleeAmt;
				2064
				2065	if (Amount) {
				2066	unsigned Opc = getADDriOpcode(IsLP64, Amount);
				2067	New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
				2068	.addReg(StackPtr).addImm(Amount);
				2069	}
				2070	}
				2071
				2072	if (New) {
				2073	// The EFLAGS implicit def is dead.
				2074	New->getOperand(3).setIsDead();
				2075
				2076	// Replace the pseudo instruction with a new instruction.
				2077	MBB.insert(I, New);
				2078	}
				2079
				2080	return;
				2081	}
				2082
				2083	if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
				2084	// If we are performing frame pointer elimination and if the callee pops
				2085	// something off the stack pointer, add it back. We do this until we have
				2086	// more advanced stack pointer tracking ability.
				2087	unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
				2088	MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
				2089	.addReg(StackPtr).addImm(CalleeAmt);
				2090
				2091	// The EFLAGS implicit def is dead.
				2092	New->getOperand(3).setIsDead();
				2093
				2094	// We are not tracking the stack pointer adjustment by the callee, so make
				2095	// sure we restore the stack pointer immediately after the call, there may
				2096	// be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
				2097	MachineBasicBlock::iterator B = MBB.begin();
				2098	while (I != B && !std::prev(I)->isCall())
				2099	--I;
				2100	MBB.insert(I, New);
				2101	}
				2102	}
				2103