Blame - lib/Target/ARM64/ARM64FastISel.cpp - fp2-dev/platform/external/llvm

blob: 51b0f7613ff9ed941b3ff572a3e3738274746f89 [file] [log] [blame]

Stephen Hines	36b5688	2014-04-23 16:57:46 -0700	[diff] [blame]	1	//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the ARM64-specific support for the FastISel class. Some
				11	// of the target-specific code is generated by tablegen in the file
				12	// ARM64GenFastISel.inc, which is #included here.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#include "ARM64.h"
				17	#include "ARM64TargetMachine.h"
				18	#include "ARM64Subtarget.h"
				19	#include "ARM64CallingConv.h"
				20	#include "MCTargetDesc/ARM64AddressingModes.h"
				21	#include "llvm/CodeGen/CallingConvLower.h"
				22	#include "llvm/CodeGen/FastISel.h"
				23	#include "llvm/CodeGen/FunctionLoweringInfo.h"
				24	#include "llvm/CodeGen/MachineConstantPool.h"
				25	#include "llvm/CodeGen/MachineFrameInfo.h"
				26	#include "llvm/CodeGen/MachineInstrBuilder.h"
				27	#include "llvm/CodeGen/MachineRegisterInfo.h"
				28	#include "llvm/IR/CallingConv.h"
				29	#include "llvm/IR/DataLayout.h"
				30	#include "llvm/IR/DerivedTypes.h"
				31	#include "llvm/IR/Function.h"
				32	#include "llvm/IR/GetElementPtrTypeIterator.h"
				33	#include "llvm/IR/GlobalAlias.h"
				34	#include "llvm/IR/GlobalVariable.h"
				35	#include "llvm/IR/Instructions.h"
				36	#include "llvm/IR/IntrinsicInst.h"
				37	#include "llvm/IR/Operator.h"
				38	#include "llvm/Support/CommandLine.h"
				39	using namespace llvm;
				40
				41	namespace {
				42
				43	class ARM64FastISel : public FastISel {
				44
				45	class Address {
				46	public:
				47	typedef enum {
				48	RegBase,
				49	FrameIndexBase
				50	} BaseKind;
				51
				52	private:
				53	BaseKind Kind;
				54	union {
				55	unsigned Reg;
				56	int FI;
				57	} Base;
				58	int64_t Offset;
				59
				60	public:
				61	Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
				62	void setKind(BaseKind K) { Kind = K; }
				63	BaseKind getKind() const { return Kind; }
				64	bool isRegBase() const { return Kind == RegBase; }
				65	bool isFIBase() const { return Kind == FrameIndexBase; }
				66	void setReg(unsigned Reg) {
				67	assert(isRegBase() && "Invalid base register access!");
				68	Base.Reg = Reg;
				69	}
				70	unsigned getReg() const {
				71	assert(isRegBase() && "Invalid base register access!");
				72	return Base.Reg;
				73	}
				74	void setFI(unsigned FI) {
				75	assert(isFIBase() && "Invalid base frame index access!");
				76	Base.FI = FI;
				77	}
				78	unsigned getFI() const {
				79	assert(isFIBase() && "Invalid base frame index access!");
				80	return Base.FI;
				81	}
				82	void setOffset(int64_t O) { Offset = O; }
				83	int64_t getOffset() { return Offset; }
				84
				85	bool isValid() { return isFIBase() \|\| (isRegBase() && getReg() != 0); }
				86	};
				87
				88	/// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
				89	/// make the right decision when generating code for different targets.
				90	const ARM64Subtarget *Subtarget;
				91	LLVMContext *Context;
				92
				93	private:
				94	// Selection routines.
				95	bool SelectLoad(const Instruction *I);
				96	bool SelectStore(const Instruction *I);
				97	bool SelectBranch(const Instruction *I);
				98	bool SelectIndirectBr(const Instruction *I);
				99	bool SelectCmp(const Instruction *I);
				100	bool SelectSelect(const Instruction *I);
				101	bool SelectFPExt(const Instruction *I);
				102	bool SelectFPTrunc(const Instruction *I);
				103	bool SelectFPToInt(const Instruction *I, bool Signed);
				104	bool SelectIntToFP(const Instruction *I, bool Signed);
				105	bool SelectRem(const Instruction *I, unsigned ISDOpcode);
				106	bool SelectCall(const Instruction I, const char IntrMemName);
				107	bool SelectIntrinsicCall(const IntrinsicInst &I);
				108	bool SelectRet(const Instruction *I);
				109	bool SelectTrunc(const Instruction *I);
				110	bool SelectIntExt(const Instruction *I);
				111	bool SelectMul(const Instruction *I);
				112
				113	// Utility helper routines.
				114	bool isTypeLegal(Type *Ty, MVT &VT);
				115	bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
				116	bool ComputeAddress(const Value *Obj, Address &Addr);
				117	bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
				118	bool UseUnscaled);
				119	void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
				120	unsigned Flags, bool UseUnscaled);
				121	bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
				122	bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
				123	unsigned Alignment);
				124	// Emit functions.
				125	bool EmitCmp(Value Src1Value, Value Src2Value, bool isZExt);
				126	bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
				127	bool UseUnscaled = false);
				128	bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
				129	bool UseUnscaled = false);
				130	unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
				131	unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
				132
				133	unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT);
				134	unsigned ARM64MaterializeGV(const GlobalValue *GV);
				135
				136	// Call handling routines.
				137	private:
				138	CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
				139	bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
				140	SmallVectorImpl<unsigned> &ArgRegs,
				141	SmallVectorImpl<MVT> &ArgVTs,
				142	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
				143	SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
				144	unsigned &NumBytes);
				145	bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
				146	const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
				147
				148	public:
				149	// Backend specific FastISel code.
				150	virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
				151	virtual unsigned TargetMaterializeConstant(const Constant *C);
				152
				153	explicit ARM64FastISel(FunctionLoweringInfo &funcInfo,
				154	const TargetLibraryInfo *libInfo)
				155	: FastISel(funcInfo, libInfo) {
				156	Subtarget = &TM.getSubtarget<ARM64Subtarget>();
				157	Context = &funcInfo.Fn->getContext();
				158	}
				159
				160	virtual bool TargetSelectInstruction(const Instruction *I);
				161
				162	#include "ARM64GenFastISel.inc"
				163	};
				164
				165	} // end anonymous namespace
				166
				167	#include "ARM64GenCallingConv.inc"
				168
				169	CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
				170	if (CC == CallingConv::WebKit_JS)
				171	return CC_ARM64_WebKit_JS;
				172	return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS;
				173	}
				174
				175	unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
				176	assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
				177	"Alloca should always return a pointer.");
				178
				179	// Don't handle dynamic allocas.
				180	if (!FuncInfo.StaticAllocaMap.count(AI))
				181	return 0;
				182
				183	DenseMap<const AllocaInst *, int>::iterator SI =
				184	FuncInfo.StaticAllocaMap.find(AI);
				185
				186	if (SI != FuncInfo.StaticAllocaMap.end()) {
				187	unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass);
				188	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
				189	ResultReg)
				190	.addFrameIndex(SI->second)
				191	.addImm(0)
				192	.addImm(0);
				193	return ResultReg;
				194	}
				195
				196	return 0;
				197	}
				198
				199	unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
				200	const APFloat Val = CFP->getValueAPF();
				201	bool is64bit = (VT == MVT::f64);
				202
				203	// This checks to see if we can use FMOV instructions to materialize
				204	// a constant, otherwise we have to materialize via the constant pool.
				205	if (TLI.isFPImmLegal(Val, VT)) {
				206	int Imm;
				207	unsigned Opc;
				208	if (is64bit) {
				209	Imm = ARM64_AM::getFP64Imm(Val);
				210	Opc = ARM64::FMOVDi;
				211	} else {
				212	Imm = ARM64_AM::getFP32Imm(Val);
				213	Opc = ARM64::FMOVSi;
				214	}
				215	unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
				216	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				217	.addImm(Imm);
				218	return ResultReg;
				219	}
				220
				221	// Materialize via constant pool. MachineConstantPool wants an explicit
				222	// alignment.
				223	unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
				224	if (Align == 0)
				225	Align = DL.getTypeAllocSize(CFP->getType());
				226
				227	unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
				228	unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
				229	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
				230	ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE);
				231
				232	unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui;
				233	unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
				234	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				235	.addReg(ADRPReg)
				236	.addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF \| ARM64II::MO_NC);
				237	return ResultReg;
				238	}
				239
				240	unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) {
				241	// We can't handle thread-local variables quickly yet. Unfortunately we have
				242	// to peer through any aliases to find out if that rule applies.
				243	const GlobalValue *TLSGV = GV;
				244	if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
				245	TLSGV = GA->getAliasedGlobal();
				246
				247	if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV))
				248	if (GVar->isThreadLocal())
				249	return 0;
				250
				251	unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
				252
				253	EVT DestEVT = TLI.getValueType(GV->getType(), true);
				254	if (!DestEVT.isSimple())
				255	return 0;
				256	MVT DestVT = DestEVT.getSimpleVT();
				257
				258	unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
				259	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				260
				261	if (OpFlags & ARM64II::MO_GOT) {
				262	// ADRP + LDRX
				263	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
				264	ADRPReg)
				265	.addGlobalAddress(GV, 0, ARM64II::MO_GOT \| ARM64II::MO_PAGE);
				266	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui),
				267	ResultReg)
				268	.addReg(ADRPReg)
				269	.addGlobalAddress(GV, 0, ARM64II::MO_GOT \| ARM64II::MO_PAGEOFF \|
				270	ARM64II::MO_NC);
				271	} else {
				272	// ADRP + ADDX
				273	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
				274	ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE);
				275	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
				276	ResultReg)
				277	.addReg(ADRPReg)
				278	.addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF \| ARM64II::MO_NC)
				279	.addImm(0);
				280	}
				281	return ResultReg;
				282	}
				283
				284	unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) {
				285	EVT CEVT = TLI.getValueType(C->getType(), true);
				286
				287	// Only handle simple types.
				288	if (!CEVT.isSimple())
				289	return 0;
				290	MVT VT = CEVT.getSimpleVT();
				291
				292	// FIXME: Handle ConstantInt.
				293	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
				294	return ARM64MaterializeFP(CFP, VT);
				295	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
				296	return ARM64MaterializeGV(GV);
				297
				298	return 0;
				299	}
				300
				301	// Computes the address to get to an object.
				302	bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
				303	const User *U = NULL;
				304	unsigned Opcode = Instruction::UserOp1;
				305	if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
				306	// Don't walk into other basic blocks unless the object is an alloca from
				307	// another block, otherwise it may not have a virtual register assigned.
				308	if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) \|\|
				309	FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
				310	Opcode = I->getOpcode();
				311	U = I;
				312	}
				313	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
				314	Opcode = C->getOpcode();
				315	U = C;
				316	}
				317
				318	if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
				319	if (Ty->getAddressSpace() > 255)
				320	// Fast instruction selection doesn't support the special
				321	// address spaces.
				322	return false;
				323
				324	switch (Opcode) {
				325	default:
				326	break;
				327	case Instruction::BitCast: {
				328	// Look through bitcasts.
				329	return ComputeAddress(U->getOperand(0), Addr);
				330	}
				331	case Instruction::IntToPtr: {
				332	// Look past no-op inttoptrs.
				333	if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
				334	return ComputeAddress(U->getOperand(0), Addr);
				335	break;
				336	}
				337	case Instruction::PtrToInt: {
				338	// Look past no-op ptrtoints.
				339	if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
				340	return ComputeAddress(U->getOperand(0), Addr);
				341	break;
				342	}
				343	case Instruction::GetElementPtr: {
				344	Address SavedAddr = Addr;
				345	uint64_t TmpOffset = Addr.getOffset();
				346
				347	// Iterate through the GEP folding the constants into offsets where
				348	// we can.
				349	gep_type_iterator GTI = gep_type_begin(U);
				350	for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
				351	++i, ++GTI) {
				352	const Value Op = i;
				353	if (StructType STy = dyn_cast<StructType>(GTI)) {
				354	const StructLayout *SL = DL.getStructLayout(STy);
				355	unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
				356	TmpOffset += SL->getElementOffset(Idx);
				357	} else {
				358	uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
				359	for (;;) {
				360	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
				361	// Constant-offset addressing.
				362	TmpOffset += CI->getSExtValue() * S;
				363	break;
				364	}
				365	if (canFoldAddIntoGEP(U, Op)) {
				366	// A compatible add with a constant operand. Fold the constant.
				367	ConstantInt *CI =
				368	cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
				369	TmpOffset += CI->getSExtValue() * S;
				370	// Iterate on the other operand.
				371	Op = cast<AddOperator>(Op)->getOperand(0);
				372	continue;
				373	}
				374	// Unsupported
				375	goto unsupported_gep;
				376	}
				377	}
				378	}
				379
				380	// Try to grab the base operand now.
				381	Addr.setOffset(TmpOffset);
				382	if (ComputeAddress(U->getOperand(0), Addr))
				383	return true;
				384
				385	// We failed, restore everything and try the other options.
				386	Addr = SavedAddr;
				387
				388	unsupported_gep:
				389	break;
				390	}
				391	case Instruction::Alloca: {
				392	const AllocaInst *AI = cast<AllocaInst>(Obj);
				393	DenseMap<const AllocaInst *, int>::iterator SI =
				394	FuncInfo.StaticAllocaMap.find(AI);
				395	if (SI != FuncInfo.StaticAllocaMap.end()) {
				396	Addr.setKind(Address::FrameIndexBase);
				397	Addr.setFI(SI->second);
				398	return true;
				399	}
				400	break;
				401	}
				402	}
				403
				404	// Try to get this in a register if nothing else has worked.
				405	if (!Addr.isValid())
				406	Addr.setReg(getRegForValue(Obj));
				407	return Addr.isValid();
				408	}
				409
				410	bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
				411	EVT evt = TLI.getValueType(Ty, true);
				412
				413	// Only handle simple types.
				414	if (evt == MVT::Other \|\| !evt.isSimple())
				415	return false;
				416	VT = evt.getSimpleVT();
				417
				418	// Handle all legal types, i.e. a register that will directly hold this
				419	// value.
				420	return TLI.isTypeLegal(VT);
				421	}
				422
				423	bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
				424	if (isTypeLegal(Ty, VT))
				425	return true;
				426
				427	// If this is a type than can be sign or zero-extended to a basic operation
				428	// go ahead and accept it now. For stores, this reflects truncation.
				429	if (VT == MVT::i1 \|\| VT == MVT::i8 \|\| VT == MVT::i16)
				430	return true;
				431
				432	return false;
				433	}
				434
				435	bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
				436	bool UseUnscaled) {
				437	bool needsLowering = false;
				438	int64_t Offset = Addr.getOffset();
				439	switch (VT.SimpleTy) {
				440	default:
				441	return false;
				442	case MVT::i1:
				443	case MVT::i8:
				444	case MVT::i16:
				445	case MVT::i32:
				446	case MVT::i64:
				447	case MVT::f32:
				448	case MVT::f64:
				449	if (!UseUnscaled)
				450	// Using scaled, 12-bit, unsigned immediate offsets.
				451	needsLowering = ((Offset & 0xfff) != Offset);
				452	else
				453	// Using unscaled, 9-bit, signed immediate offsets.
				454	needsLowering = (Offset > 256 \|\| Offset < -256);
				455	break;
				456	}
				457
				458	// FIXME: If this is a stack pointer and the offset needs to be simplified
				459	// then put the alloca address into a register, set the base type back to
				460	// register and continue. This should almost never happen.
				461	if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
				462	return false;
				463	}
				464
				465	// Since the offset is too large for the load/store instruction get the
				466	// reg+offset into a register.
				467	if (needsLowering) {
				468	uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
				469	unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
				470	UnscaledOffset, MVT::i64);
				471	if (ResultReg == 0)
				472	return false;
				473	Addr.setReg(ResultReg);
				474	Addr.setOffset(0);
				475	}
				476	return true;
				477	}
				478
				479	void ARM64FastISel::AddLoadStoreOperands(Address &Addr,
				480	const MachineInstrBuilder &MIB,
				481	unsigned Flags, bool UseUnscaled) {
				482	int64_t Offset = Addr.getOffset();
				483	// Frame base works a bit differently. Handle it separately.
				484	if (Addr.getKind() == Address::FrameIndexBase) {
				485	int FI = Addr.getFI();
				486	// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
				487	// and alignment should be based on the VT.
				488	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
				489	MachinePointerInfo::getFixedStack(FI, Offset), Flags,
				490	MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
				491	// Now add the rest of the operands.
				492	MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
				493	} else {
				494	// Now add the rest of the operands.
				495	MIB.addReg(Addr.getReg());
				496	MIB.addImm(Offset);
				497	}
				498	}
				499
				500	bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
				501	bool UseUnscaled) {
				502	// Negative offsets require unscaled, 9-bit, signed immediate offsets.
				503	// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
				504	if (!UseUnscaled && Addr.getOffset() < 0)
				505	UseUnscaled = true;
				506
				507	unsigned Opc;
				508	const TargetRegisterClass *RC;
				509	bool VTIsi1 = false;
				510	int64_t ScaleFactor = 0;
				511	switch (VT.SimpleTy) {
				512	default:
				513	return false;
				514	case MVT::i1:
				515	VTIsi1 = true;
				516	// Intentional fall-through.
				517	case MVT::i8:
				518	Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui;
				519	RC = &ARM64::GPR32RegClass;
				520	ScaleFactor = 1;
				521	break;
				522	case MVT::i16:
				523	Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui;
				524	RC = &ARM64::GPR32RegClass;
				525	ScaleFactor = 2;
				526	break;
				527	case MVT::i32:
				528	Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui;
				529	RC = &ARM64::GPR32RegClass;
				530	ScaleFactor = 4;
				531	break;
				532	case MVT::i64:
				533	Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui;
				534	RC = &ARM64::GPR64RegClass;
				535	ScaleFactor = 8;
				536	break;
				537	case MVT::f32:
				538	Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui;
				539	RC = TLI.getRegClassFor(VT);
				540	ScaleFactor = 4;
				541	break;
				542	case MVT::f64:
				543	Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui;
				544	RC = TLI.getRegClassFor(VT);
				545	ScaleFactor = 8;
				546	break;
				547	}
				548	// Scale the offset.
				549	if (!UseUnscaled) {
				550	int64_t Offset = Addr.getOffset();
				551	if (Offset & (ScaleFactor - 1))
				552	// Retry using an unscaled, 9-bit, signed immediate offset.
				553	return EmitLoad(VT, ResultReg, Addr, /UseUnscaled/ true);
				554
				555	Addr.setOffset(Offset / ScaleFactor);
				556	}
				557
				558	// Simplify this down to something we can handle.
				559	if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
				560	return false;
				561
				562	// Create the base instruction, then add the operands.
				563	ResultReg = createResultReg(RC);
				564	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				565	TII.get(Opc), ResultReg);
				566	AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
				567
				568	// Loading an i1 requires special handling.
				569	if (VTIsi1) {
				570	unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
				571	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
				572	ANDReg)
				573	.addReg(ResultReg)
				574	.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
				575	ResultReg = ANDReg;
				576	}
				577	return true;
				578	}
				579
				580	bool ARM64FastISel::SelectLoad(const Instruction *I) {
				581	MVT VT;
				582	// Verify we have a legal type before going any further. Currently, we handle
				583	// simple types that will directly fit in a register (i32/f32/i64/f64) or
				584	// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
				585	if (!isLoadStoreTypeLegal(I->getType(), VT) \|\| cast<LoadInst>(I)->isAtomic())
				586	return false;
				587
				588	// See if we can handle this address.
				589	Address Addr;
				590	if (!ComputeAddress(I->getOperand(0), Addr))
				591	return false;
				592
				593	unsigned ResultReg;
				594	if (!EmitLoad(VT, ResultReg, Addr))
				595	return false;
				596
				597	UpdateValueMap(I, ResultReg);
				598	return true;
				599	}
				600
				601	bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
				602	bool UseUnscaled) {
				603	// Negative offsets require unscaled, 9-bit, signed immediate offsets.
				604	// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
				605	if (!UseUnscaled && Addr.getOffset() < 0)
				606	UseUnscaled = true;
				607
				608	unsigned StrOpc;
				609	bool VTIsi1 = false;
				610	int64_t ScaleFactor = 0;
				611	// Using scaled, 12-bit, unsigned immediate offsets.
				612	switch (VT.SimpleTy) {
				613	default:
				614	return false;
				615	case MVT::i1:
				616	VTIsi1 = true;
				617	case MVT::i8:
				618	StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui;
				619	ScaleFactor = 1;
				620	break;
				621	case MVT::i16:
				622	StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui;
				623	ScaleFactor = 2;
				624	break;
				625	case MVT::i32:
				626	StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui;
				627	ScaleFactor = 4;
				628	break;
				629	case MVT::i64:
				630	StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui;
				631	ScaleFactor = 8;
				632	break;
				633	case MVT::f32:
				634	StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui;
				635	ScaleFactor = 4;
				636	break;
				637	case MVT::f64:
				638	StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui;
				639	ScaleFactor = 8;
				640	break;
				641	}
				642	// Scale the offset.
				643	if (!UseUnscaled) {
				644	int64_t Offset = Addr.getOffset();
				645	if (Offset & (ScaleFactor - 1))
				646	// Retry using an unscaled, 9-bit, signed immediate offset.
				647	return EmitStore(VT, SrcReg, Addr, /UseUnscaled/ true);
				648
				649	Addr.setOffset(Offset / ScaleFactor);
				650	}
				651
				652	// Simplify this down to something we can handle.
				653	if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
				654	return false;
				655
				656	// Storing an i1 requires special handling.
				657	if (VTIsi1) {
				658	unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
				659	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
				660	ANDReg)
				661	.addReg(SrcReg)
				662	.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
				663	SrcReg = ANDReg;
				664	}
				665	// Create the base instruction, then add the operands.
				666	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				667	TII.get(StrOpc)).addReg(SrcReg);
				668	AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
				669	return true;
				670	}
				671
				672	bool ARM64FastISel::SelectStore(const Instruction *I) {
				673	MVT VT;
				674	Value *Op0 = I->getOperand(0);
				675	// Verify we have a legal type before going any further. Currently, we handle
				676	// simple types that will directly fit in a register (i32/f32/i64/f64) or
				677	// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
				678	if (!isLoadStoreTypeLegal(Op0->getType(), VT) \|\|
				679	cast<StoreInst>(I)->isAtomic())
				680	return false;
				681
				682	// Get the value to be stored into a register.
				683	unsigned SrcReg = getRegForValue(Op0);
				684	if (SrcReg == 0)
				685	return false;
				686
				687	// See if we can handle this address.
				688	Address Addr;
				689	if (!ComputeAddress(I->getOperand(1), Addr))
				690	return false;
				691
				692	if (!EmitStore(VT, SrcReg, Addr))
				693	return false;
				694	return true;
				695	}
				696
				697	static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
				698	switch (Pred) {
				699	case CmpInst::FCMP_ONE:
				700	case CmpInst::FCMP_UEQ:
				701	default:
				702	// AL is our "false" for now. The other two need more compares.
				703	return ARM64CC::AL;
				704	case CmpInst::ICMP_EQ:
				705	case CmpInst::FCMP_OEQ:
				706	return ARM64CC::EQ;
				707	case CmpInst::ICMP_SGT:
				708	case CmpInst::FCMP_OGT:
				709	return ARM64CC::GT;
				710	case CmpInst::ICMP_SGE:
				711	case CmpInst::FCMP_OGE:
				712	return ARM64CC::GE;
				713	case CmpInst::ICMP_UGT:
				714	case CmpInst::FCMP_UGT:
				715	return ARM64CC::HI;
				716	case CmpInst::FCMP_OLT:
				717	return ARM64CC::MI;
				718	case CmpInst::ICMP_ULE:
				719	case CmpInst::FCMP_OLE:
				720	return ARM64CC::LS;
				721	case CmpInst::FCMP_ORD:
				722	return ARM64CC::VC;
				723	case CmpInst::FCMP_UNO:
				724	return ARM64CC::VS;
				725	case CmpInst::FCMP_UGE:
				726	return ARM64CC::PL;
				727	case CmpInst::ICMP_SLT:
				728	case CmpInst::FCMP_ULT:
				729	return ARM64CC::LT;
				730	case CmpInst::ICMP_SLE:
				731	case CmpInst::FCMP_ULE:
				732	return ARM64CC::LE;
				733	case CmpInst::FCMP_UNE:
				734	case CmpInst::ICMP_NE:
				735	return ARM64CC::NE;
				736	case CmpInst::ICMP_UGE:
				737	return ARM64CC::CS;
				738	case CmpInst::ICMP_ULT:
				739	return ARM64CC::CC;
				740	}
				741	}
				742
				743	bool ARM64FastISel::SelectBranch(const Instruction *I) {
				744	const BranchInst *BI = cast<BranchInst>(I);
				745	MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
				746	MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
				747
				748	if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
				749	if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
				750	// We may not handle every CC for now.
				751	ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
				752	if (CC == ARM64CC::AL)
				753	return false;
				754
				755	// Emit the cmp.
				756	if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
				757	return false;
				758
				759	// Emit the branch.
				760	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
				761	.addImm(CC)
				762	.addMBB(TBB);
				763	FuncInfo.MBB->addSuccessor(TBB);
				764
				765	FastEmitBranch(FBB, DbgLoc);
				766	return true;
				767	}
				768	} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
				769	MVT SrcVT;
				770	if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
				771	(isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
				772	unsigned CondReg = getRegForValue(TI->getOperand(0));
				773	if (CondReg == 0)
				774	return false;
				775
				776	// Issue an extract_subreg to get the lower 32-bits.
				777	if (SrcVT == MVT::i64)
				778	CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /Kill=/true,
				779	ARM64::sub_32);
				780
				781	unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
				782	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
				783	ANDReg)
				784	.addReg(CondReg)
				785	.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
				786	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
				787	.addReg(ANDReg)
				788	.addReg(ANDReg)
				789	.addImm(0)
				790	.addImm(0);
				791
				792	unsigned CC = ARM64CC::NE;
				793	if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
				794	std::swap(TBB, FBB);
				795	CC = ARM64CC::EQ;
				796	}
				797	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
				798	.addImm(CC)
				799	.addMBB(TBB);
				800	FuncInfo.MBB->addSuccessor(TBB);
				801	FastEmitBranch(FBB, DbgLoc);
				802	return true;
				803	}
				804	} else if (const ConstantInt *CI =
				805	dyn_cast<ConstantInt>(BI->getCondition())) {
				806	uint64_t Imm = CI->getZExtValue();
				807	MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
				808	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B))
				809	.addMBB(Target);
				810	FuncInfo.MBB->addSuccessor(Target);
				811	return true;
				812	}
				813
				814	unsigned CondReg = getRegForValue(BI->getCondition());
				815	if (CondReg == 0)
				816	return false;
				817
				818	// We've been divorced from our compare! Our block was split, and
				819	// now our compare lives in a predecessor block. We musn't
				820	// re-compare here, as the children of the compare aren't guaranteed
				821	// live across the block boundary (we could check for this).
				822	// Regardless, the compare has been done in the predecessor block,
				823	// and it left a value for us in a virtual register. Ergo, we test
				824	// the one-bit value left in the virtual register.
				825	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri),
				826	ARM64::WZR)
				827	.addReg(CondReg)
				828	.addImm(0)
				829	.addImm(0);
				830
				831	unsigned CC = ARM64CC::NE;
				832	if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
				833	std::swap(TBB, FBB);
				834	CC = ARM64CC::EQ;
				835	}
				836
				837	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
				838	.addImm(CC)
				839	.addMBB(TBB);
				840	FuncInfo.MBB->addSuccessor(TBB);
				841	FastEmitBranch(FBB, DbgLoc);
				842	return true;
				843	}
				844
				845	bool ARM64FastISel::SelectIndirectBr(const Instruction *I) {
				846	const IndirectBrInst *BI = cast<IndirectBrInst>(I);
				847	unsigned AddrReg = getRegForValue(BI->getOperand(0));
				848	if (AddrReg == 0)
				849	return false;
				850
				851	// Emit the indirect branch.
				852	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR))
				853	.addReg(AddrReg);
				854
				855	// Make sure the CFG is up-to-date.
				856	for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
				857	FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
				858
				859	return true;
				860	}
				861
				862	bool ARM64FastISel::EmitCmp(Value Src1Value, Value Src2Value, bool isZExt) {
				863	Type *Ty = Src1Value->getType();
				864	EVT SrcEVT = TLI.getValueType(Ty, true);
				865	if (!SrcEVT.isSimple())
				866	return false;
				867	MVT SrcVT = SrcEVT.getSimpleVT();
				868
				869	// Check to see if the 2nd operand is a constant that we can encode directly
				870	// in the compare.
				871	uint64_t Imm;
				872	bool UseImm = false;
				873	bool isNegativeImm = false;
				874	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
				875	if (SrcVT == MVT::i64 \|\| SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\|
				876	SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
				877	const APInt &CIVal = ConstInt->getValue();
				878
				879	Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
				880	if (CIVal.isNegative()) {
				881	isNegativeImm = true;
				882	Imm = -Imm;
				883	}
				884	// FIXME: We can handle more immediates using shifts.
				885	UseImm = ((Imm & 0xfff) == Imm);
				886	}
				887	} else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
				888	if (SrcVT == MVT::f32 \|\| SrcVT == MVT::f64)
				889	if (ConstFP->isZero() && !ConstFP->isNegative())
				890	UseImm = true;
				891	}
				892
				893	unsigned ZReg;
				894	unsigned CmpOpc;
				895	bool isICmp = true;
				896	bool needsExt = false;
				897	switch (SrcVT.SimpleTy) {
				898	default:
				899	return false;
				900	case MVT::i1:
				901	case MVT::i8:
				902	case MVT::i16:
				903	needsExt = true;
				904	// Intentional fall-through.
				905	case MVT::i32:
				906	ZReg = ARM64::WZR;
				907	if (UseImm)
				908	CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri;
				909	else
				910	CmpOpc = ARM64::SUBSWrr;
				911	break;
				912	case MVT::i64:
				913	ZReg = ARM64::XZR;
				914	if (UseImm)
				915	CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri;
				916	else
				917	CmpOpc = ARM64::SUBSXrr;
				918	break;
				919	case MVT::f32:
				920	isICmp = false;
				921	CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr;
				922	break;
				923	case MVT::f64:
				924	isICmp = false;
				925	CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr;
				926	break;
				927	}
				928
				929	unsigned SrcReg1 = getRegForValue(Src1Value);
				930	if (SrcReg1 == 0)
				931	return false;
				932
				933	unsigned SrcReg2;
				934	if (!UseImm) {
				935	SrcReg2 = getRegForValue(Src2Value);
				936	if (SrcReg2 == 0)
				937	return false;
				938	}
				939
				940	// We have i1, i8, or i16, we need to either zero extend or sign extend.
				941	if (needsExt) {
				942	SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
				943	if (SrcReg1 == 0)
				944	return false;
				945	if (!UseImm) {
				946	SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
				947	if (SrcReg2 == 0)
				948	return false;
				949	}
				950	}
				951
				952	if (isICmp) {
				953	if (UseImm)
				954	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				955	.addReg(ZReg)
				956	.addReg(SrcReg1)
				957	.addImm(Imm)
				958	.addImm(0);
				959	else
				960	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				961	.addReg(ZReg)
				962	.addReg(SrcReg1)
				963	.addReg(SrcReg2);
				964	} else {
				965	if (UseImm)
				966	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				967	.addReg(SrcReg1);
				968	else
				969	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				970	.addReg(SrcReg1)
				971	.addReg(SrcReg2);
				972	}
				973	return true;
				974	}
				975
				976	bool ARM64FastISel::SelectCmp(const Instruction *I) {
				977	const CmpInst *CI = cast<CmpInst>(I);
				978
				979	// We may not handle every CC for now.
				980	ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
				981	if (CC == ARM64CC::AL)
				982	return false;
				983
				984	// Emit the cmp.
				985	if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
				986	return false;
				987
				988	// Now set a register based on the comparison.
				989	ARM64CC::CondCode invertedCC = getInvertedCondCode(CC);
				990	unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
				991	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr),
				992	ResultReg)
				993	.addReg(ARM64::WZR)
				994	.addReg(ARM64::WZR)
				995	.addImm(invertedCC);
				996
				997	UpdateValueMap(I, ResultReg);
				998	return true;
				999	}
				1000
				1001	bool ARM64FastISel::SelectSelect(const Instruction *I) {
				1002	const SelectInst *SI = cast<SelectInst>(I);
				1003
				1004	EVT DestEVT = TLI.getValueType(SI->getType(), true);
				1005	if (!DestEVT.isSimple())
				1006	return false;
				1007
				1008	MVT DestVT = DestEVT.getSimpleVT();
				1009	if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
				1010	DestVT != MVT::f64)
				1011	return false;
				1012
				1013	unsigned CondReg = getRegForValue(SI->getCondition());
				1014	if (CondReg == 0)
				1015	return false;
				1016	unsigned TrueReg = getRegForValue(SI->getTrueValue());
				1017	if (TrueReg == 0)
				1018	return false;
				1019	unsigned FalseReg = getRegForValue(SI->getFalseValue());
				1020	if (FalseReg == 0)
				1021	return false;
				1022
				1023	unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
				1024	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
				1025	ANDReg)
				1026	.addReg(CondReg)
				1027	.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
				1028
				1029	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
				1030	.addReg(ANDReg)
				1031	.addReg(ANDReg)
				1032	.addImm(0)
				1033	.addImm(0);
				1034
				1035	unsigned SelectOpc;
				1036	switch (DestVT.SimpleTy) {
				1037	default:
				1038	return false;
				1039	case MVT::i32:
				1040	SelectOpc = ARM64::CSELWr;
				1041	break;
				1042	case MVT::i64:
				1043	SelectOpc = ARM64::CSELXr;
				1044	break;
				1045	case MVT::f32:
				1046	SelectOpc = ARM64::FCSELSrrr;
				1047	break;
				1048	case MVT::f64:
				1049	SelectOpc = ARM64::FCSELDrrr;
				1050	break;
				1051	}
				1052
				1053	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1054	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
				1055	ResultReg)
				1056	.addReg(TrueReg)
				1057	.addReg(FalseReg)
				1058	.addImm(ARM64CC::NE);
				1059
				1060	UpdateValueMap(I, ResultReg);
				1061	return true;
				1062	}
				1063
				1064	bool ARM64FastISel::SelectFPExt(const Instruction *I) {
				1065	Value *V = I->getOperand(0);
				1066	if (!I->getType()->isDoubleTy() \|\| !V->getType()->isFloatTy())
				1067	return false;
				1068
				1069	unsigned Op = getRegForValue(V);
				1070	if (Op == 0)
				1071	return false;
				1072
				1073	unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass);
				1074	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr),
				1075	ResultReg).addReg(Op);
				1076	UpdateValueMap(I, ResultReg);
				1077	return true;
				1078	}
				1079
				1080	bool ARM64FastISel::SelectFPTrunc(const Instruction *I) {
				1081	Value *V = I->getOperand(0);
				1082	if (!I->getType()->isFloatTy() \|\| !V->getType()->isDoubleTy())
				1083	return false;
				1084
				1085	unsigned Op = getRegForValue(V);
				1086	if (Op == 0)
				1087	return false;
				1088
				1089	unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass);
				1090	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr),
				1091	ResultReg).addReg(Op);
				1092	UpdateValueMap(I, ResultReg);
				1093	return true;
				1094	}
				1095
				1096	// FPToUI and FPToSI
				1097	bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
				1098	MVT DestVT;
				1099	if (!isTypeLegal(I->getType(), DestVT) \|\| DestVT.isVector())
				1100	return false;
				1101
				1102	unsigned SrcReg = getRegForValue(I->getOperand(0));
				1103	if (SrcReg == 0)
				1104	return false;
				1105
				1106	EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
				1107
				1108	unsigned Opc;
				1109	if (SrcVT == MVT::f64) {
				1110	if (Signed)
				1111	Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr;
				1112	else
				1113	Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr;
				1114	} else {
				1115	if (Signed)
				1116	Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr;
				1117	else
				1118	Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr;
				1119	}
				1120	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1121	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1122	.addReg(SrcReg);
				1123	UpdateValueMap(I, ResultReg);
				1124	return true;
				1125	}
				1126
				1127	bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
				1128	MVT DestVT;
				1129	if (!isTypeLegal(I->getType(), DestVT) \|\| DestVT.isVector())
				1130	return false;
				1131
				1132	unsigned SrcReg = getRegForValue(I->getOperand(0));
				1133	if (SrcReg == 0)
				1134	return false;
				1135
				1136	EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
				1137
				1138	// Handle sign-extension.
				1139	if (SrcVT == MVT::i16 \|\| SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
				1140	SrcReg =
				1141	EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /isZExt/ !Signed);
				1142	if (SrcReg == 0)
				1143	return false;
				1144	}
				1145
				1146	unsigned Opc;
				1147	if (SrcVT == MVT::i64) {
				1148	if (Signed)
				1149	Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri;
				1150	else
				1151	Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri;
				1152	} else {
				1153	if (Signed)
				1154	Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri;
				1155	else
				1156	Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri;
				1157	}
				1158
				1159	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1160	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1161	.addReg(SrcReg);
				1162	UpdateValueMap(I, ResultReg);
				1163	return true;
				1164	}
				1165
				1166	bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
				1167	SmallVectorImpl<unsigned> &ArgRegs,
				1168	SmallVectorImpl<MVT> &ArgVTs,
				1169	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
				1170	SmallVectorImpl<unsigned> &RegArgs,
				1171	CallingConv::ID CC, unsigned &NumBytes) {
				1172	SmallVector<CCValAssign, 16> ArgLocs;
				1173	CCState CCInfo(CC, false, FuncInfo.MF, TM, ArgLocs, Context);
				1174	CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
				1175
				1176	// Get a count of how many bytes are to be pushed on the stack.
				1177	NumBytes = CCInfo.getNextStackOffset();
				1178
				1179	// Issue CALLSEQ_START
				1180	unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
				1181	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
				1182	.addImm(NumBytes);
				1183
				1184	// Process the args.
				1185	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1186	CCValAssign &VA = ArgLocs[i];
				1187	unsigned Arg = ArgRegs[VA.getValNo()];
				1188	MVT ArgVT = ArgVTs[VA.getValNo()];
				1189
				1190	// Handle arg promotion: SExt, ZExt, AExt.
				1191	switch (VA.getLocInfo()) {
				1192	case CCValAssign::Full:
				1193	break;
				1194	case CCValAssign::SExt: {
				1195	MVT DestVT = VA.getLocVT();
				1196	MVT SrcVT = ArgVT;
				1197	Arg = EmitIntExt(SrcVT, Arg, DestVT, /isZExt/ false);
				1198	if (Arg == 0)
				1199	return false;
				1200	ArgVT = DestVT;
				1201	break;
				1202	}
				1203	case CCValAssign::AExt:
				1204	// Intentional fall-through.
				1205	case CCValAssign::ZExt: {
				1206	MVT DestVT = VA.getLocVT();
				1207	MVT SrcVT = ArgVT;
				1208	Arg = EmitIntExt(SrcVT, Arg, DestVT, /isZExt/ true);
				1209	if (Arg == 0)
				1210	return false;
				1211	ArgVT = DestVT;
				1212	break;
				1213	}
				1214	default:
				1215	llvm_unreachable("Unknown arg promotion!");
				1216	}
				1217
				1218	// Now copy/store arg to correct locations.
				1219	if (VA.isRegLoc() && !VA.needsCustom()) {
				1220	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1221	TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
				1222	RegArgs.push_back(VA.getLocReg());
				1223	} else if (VA.needsCustom()) {
				1224	// FIXME: Handle custom args.
				1225	return false;
				1226	} else {
				1227	assert(VA.isMemLoc() && "Assuming store on stack.");
				1228
				1229	// Need to store on the stack.
				1230	Address Addr;
				1231	Addr.setKind(Address::RegBase);
				1232	Addr.setReg(ARM64::SP);
				1233	Addr.setOffset(VA.getLocMemOffset());
				1234
				1235	if (!EmitStore(ArgVT, Arg, Addr))
				1236	return false;
				1237	}
				1238	}
				1239	return true;
				1240	}
				1241
				1242	bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
				1243	const Instruction *I, CallingConv::ID CC,
				1244	unsigned &NumBytes) {
				1245	// Issue CALLSEQ_END
				1246	unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
				1247	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
				1248	.addImm(NumBytes)
				1249	.addImm(0);
				1250
				1251	// Now the return value.
				1252	if (RetVT != MVT::isVoid) {
				1253	SmallVector<CCValAssign, 16> RVLocs;
				1254	CCState CCInfo(CC, false, FuncInfo.MF, TM, RVLocs, Context);
				1255	CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
				1256
				1257	// Only handle a single return value.
				1258	if (RVLocs.size() != 1)
				1259	return false;
				1260
				1261	// Copy all of the result registers out of their specified physreg.
				1262	MVT CopyVT = RVLocs[0].getValVT();
				1263	unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
				1264	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1265	TII.get(TargetOpcode::COPY),
				1266	ResultReg).addReg(RVLocs[0].getLocReg());
				1267	UsedRegs.push_back(RVLocs[0].getLocReg());
				1268
				1269	// Finally update the result.
				1270	UpdateValueMap(I, ResultReg);
				1271	}
				1272
				1273	return true;
				1274	}
				1275
				1276	bool ARM64FastISel::SelectCall(const Instruction *I,
				1277	const char *IntrMemName = 0) {
				1278	const CallInst *CI = cast<CallInst>(I);
				1279	const Value *Callee = CI->getCalledValue();
				1280
				1281	// Don't handle inline asm or intrinsics.
				1282	if (isa<InlineAsm>(Callee))
				1283	return false;
				1284
				1285	// Only handle global variable Callees.
				1286	const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
				1287	if (!GV)
				1288	return false;
				1289
				1290	// Check the calling convention.
				1291	ImmutableCallSite CS(CI);
				1292	CallingConv::ID CC = CS.getCallingConv();
				1293
				1294	// Let SDISel handle vararg functions.
				1295	PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
				1296	FunctionType *FTy = cast<FunctionType>(PT->getElementType());
				1297	if (FTy->isVarArg())
				1298	return false;
				1299
				1300	// Handle simple calls for now.
				1301	MVT RetVT;
				1302	Type *RetTy = I->getType();
				1303	if (RetTy->isVoidTy())
				1304	RetVT = MVT::isVoid;
				1305	else if (!isTypeLegal(RetTy, RetVT))
				1306	return false;
				1307
				1308	// Set up the argument vectors.
				1309	SmallVector<Value *, 8> Args;
				1310	SmallVector<unsigned, 8> ArgRegs;
				1311	SmallVector<MVT, 8> ArgVTs;
				1312	SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
				1313	Args.reserve(CS.arg_size());
				1314	ArgRegs.reserve(CS.arg_size());
				1315	ArgVTs.reserve(CS.arg_size());
				1316	ArgFlags.reserve(CS.arg_size());
				1317
				1318	for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
				1319	i != e; ++i) {
				1320	// If we're lowering a memory intrinsic instead of a regular call, skip the
				1321	// last two arguments, which shouldn't be passed to the underlying function.
				1322	if (IntrMemName && e - i <= 2)
				1323	break;
				1324
				1325	unsigned Arg = getRegForValue(*i);
				1326	if (Arg == 0)
				1327	return false;
				1328
				1329	ISD::ArgFlagsTy Flags;
				1330	unsigned AttrInd = i - CS.arg_begin() + 1;
				1331	if (CS.paramHasAttr(AttrInd, Attribute::SExt))
				1332	Flags.setSExt();
				1333	if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
				1334	Flags.setZExt();
				1335
				1336	// FIXME: Only handle easy calls for now.
				1337	if (CS.paramHasAttr(AttrInd, Attribute::InReg) \|\|
				1338	CS.paramHasAttr(AttrInd, Attribute::StructRet) \|\|
				1339	CS.paramHasAttr(AttrInd, Attribute::Nest) \|\|
				1340	CS.paramHasAttr(AttrInd, Attribute::ByVal))
				1341	return false;
				1342
				1343	MVT ArgVT;
				1344	Type ArgTy = (i)->getType();
				1345	if (!isTypeLegal(ArgTy, ArgVT) &&
				1346	!(ArgVT == MVT::i1 \|\| ArgVT == MVT::i8 \|\| ArgVT == MVT::i16))
				1347	return false;
				1348
				1349	// We don't handle vector parameters yet.
				1350	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > 64)
				1351	return false;
				1352
				1353	unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
				1354	Flags.setOrigAlign(OriginalAlignment);
				1355
				1356	Args.push_back(*i);
				1357	ArgRegs.push_back(Arg);
				1358	ArgVTs.push_back(ArgVT);
				1359	ArgFlags.push_back(Flags);
				1360	}
				1361
				1362	// Handle the arguments now that we've gotten them.
				1363	SmallVector<unsigned, 4> RegArgs;
				1364	unsigned NumBytes;
				1365	if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
				1366	return false;
				1367
				1368	// Issue the call.
				1369	MachineInstrBuilder MIB;
				1370	MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL));
				1371	if (!IntrMemName)
				1372	MIB.addGlobalAddress(GV, 0, 0);
				1373	else
				1374	MIB.addExternalSymbol(IntrMemName, 0);
				1375
				1376	// Add implicit physical register uses to the call.
				1377	for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
				1378	MIB.addReg(RegArgs[i], RegState::Implicit);
				1379
				1380	// Add a register mask with the call-preserved registers.
				1381	// Proper defs for return values will be added by setPhysRegsDeadExcept().
				1382	MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
				1383
				1384	// Finish off the call including any return values.
				1385	SmallVector<unsigned, 4> UsedRegs;
				1386	if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
				1387	return false;
				1388
				1389	// Set all unused physreg defs as dead.
				1390	static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
				1391
				1392	return true;
				1393	}
				1394
				1395	bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
				1396	if (Alignment)
				1397	return Len / Alignment <= 4;
				1398	else
				1399	return Len < 32;
				1400	}
				1401
				1402	bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
				1403	unsigned Alignment) {
				1404	// Make sure we don't bloat code by inlining very large memcpy's.
				1405	if (!IsMemCpySmall(Len, Alignment))
				1406	return false;
				1407
				1408	int64_t UnscaledOffset = 0;
				1409	Address OrigDest = Dest;
				1410	Address OrigSrc = Src;
				1411
				1412	while (Len) {
				1413	MVT VT;
				1414	if (!Alignment \|\| Alignment >= 8) {
				1415	if (Len >= 8)
				1416	VT = MVT::i64;
				1417	else if (Len >= 4)
				1418	VT = MVT::i32;
				1419	else if (Len >= 2)
				1420	VT = MVT::i16;
				1421	else {
				1422	VT = MVT::i8;
				1423	}
				1424	} else {
				1425	// Bound based on alignment.
				1426	if (Len >= 4 && Alignment == 4)
				1427	VT = MVT::i32;
				1428	else if (Len >= 2 && Alignment == 2)
				1429	VT = MVT::i16;
				1430	else {
				1431	VT = MVT::i8;
				1432	}
				1433	}
				1434
				1435	bool RV;
				1436	unsigned ResultReg;
				1437	RV = EmitLoad(VT, ResultReg, Src);
				1438	assert(RV == true && "Should be able to handle this load.");
				1439	RV = EmitStore(VT, ResultReg, Dest);
				1440	assert(RV == true && "Should be able to handle this store.");
				1441	(void)RV;
				1442
				1443	int64_t Size = VT.getSizeInBits() / 8;
				1444	Len -= Size;
				1445	UnscaledOffset += Size;
				1446
				1447	// We need to recompute the unscaled offset for each iteration.
				1448	Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
				1449	Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
				1450	}
				1451
				1452	return true;
				1453	}
				1454
				1455	bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
				1456	// FIXME: Handle more intrinsics.
				1457	switch (I.getIntrinsicID()) {
				1458	default:
				1459	return false;
				1460	case Intrinsic::memcpy:
				1461	case Intrinsic::memmove: {
				1462	const MemTransferInst &MTI = cast<MemTransferInst>(I);
				1463	// Don't handle volatile.
				1464	if (MTI.isVolatile())
				1465	return false;
				1466
				1467	// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
				1468	// we would emit dead code because we don't currently handle memmoves.
				1469	bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
				1470	if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
				1471	// Small memcpy's are common enough that we want to do them without a call
				1472	// if possible.
				1473	uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
				1474	unsigned Alignment = MTI.getAlignment();
				1475	if (IsMemCpySmall(Len, Alignment)) {
				1476	Address Dest, Src;
				1477	if (!ComputeAddress(MTI.getRawDest(), Dest) \|\|
				1478	!ComputeAddress(MTI.getRawSource(), Src))
				1479	return false;
				1480	if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
				1481	return true;
				1482	}
				1483	}
				1484
				1485	if (!MTI.getLength()->getType()->isIntegerTy(64))
				1486	return false;
				1487
				1488	if (MTI.getSourceAddressSpace() > 255 \|\| MTI.getDestAddressSpace() > 255)
				1489	// Fast instruction selection doesn't support the special
				1490	// address spaces.
				1491	return false;
				1492
				1493	const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
				1494	return SelectCall(&I, IntrMemName);
				1495	}
				1496	case Intrinsic::memset: {
				1497	const MemSetInst &MSI = cast<MemSetInst>(I);
				1498	// Don't handle volatile.
				1499	if (MSI.isVolatile())
				1500	return false;
				1501
				1502	if (!MSI.getLength()->getType()->isIntegerTy(64))
				1503	return false;
				1504
				1505	if (MSI.getDestAddressSpace() > 255)
				1506	// Fast instruction selection doesn't support the special
				1507	// address spaces.
				1508	return false;
				1509
				1510	return SelectCall(&I, "memset");
				1511	}
				1512	case Intrinsic::trap: {
				1513	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK))
				1514	.addImm(1);
				1515	return true;
				1516	}
				1517	}
				1518	return false;
				1519	}
				1520
				1521	bool ARM64FastISel::SelectRet(const Instruction *I) {
				1522	const ReturnInst *Ret = cast<ReturnInst>(I);
				1523	const Function &F = *I->getParent()->getParent();
				1524
				1525	if (!FuncInfo.CanLowerReturn)
				1526	return false;
				1527
				1528	if (F.isVarArg())
				1529	return false;
				1530
				1531	// Build a list of return value registers.
				1532	SmallVector<unsigned, 4> RetRegs;
				1533
				1534	if (Ret->getNumOperands() > 0) {
				1535	CallingConv::ID CC = F.getCallingConv();
				1536	SmallVector<ISD::OutputArg, 4> Outs;
				1537	GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
				1538
				1539	// Analyze operands of the call, assigning locations to each operand.
				1540	SmallVector<CCValAssign, 16> ValLocs;
				1541	CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
				1542	I->getContext());
				1543	CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
				1544	: RetCC_ARM64_AAPCS;
				1545	CCInfo.AnalyzeReturn(Outs, RetCC);
				1546
				1547	// Only handle a single return value for now.
				1548	if (ValLocs.size() != 1)
				1549	return false;
				1550
				1551	CCValAssign &VA = ValLocs[0];
				1552	const Value *RV = Ret->getOperand(0);
				1553
				1554	// Don't bother handling odd stuff for now.
				1555	if (VA.getLocInfo() != CCValAssign::Full)
				1556	return false;
				1557	// Only handle register returns for now.
				1558	if (!VA.isRegLoc())
				1559	return false;
				1560	unsigned Reg = getRegForValue(RV);
				1561	if (Reg == 0)
				1562	return false;
				1563
				1564	unsigned SrcReg = Reg + VA.getValNo();
				1565	unsigned DestReg = VA.getLocReg();
				1566	// Avoid a cross-class copy. This is very unlikely.
				1567	if (!MRI.getRegClass(SrcReg)->contains(DestReg))
				1568	return false;
				1569
				1570	EVT RVEVT = TLI.getValueType(RV->getType());
				1571	if (!RVEVT.isSimple())
				1572	return false;
				1573	MVT RVVT = RVEVT.getSimpleVT();
				1574	MVT DestVT = VA.getValVT();
				1575	// Special handling for extended integers.
				1576	if (RVVT != DestVT) {
				1577	if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
				1578	return false;
				1579
				1580	if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
				1581	return false;
				1582
				1583	bool isZExt = Outs[0].Flags.isZExt();
				1584	SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
				1585	if (SrcReg == 0)
				1586	return false;
				1587	}
				1588
				1589	// Make the copy.
				1590	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1591	TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
				1592
				1593	// Add register to return instruction.
				1594	RetRegs.push_back(VA.getLocReg());
				1595	}
				1596
				1597	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1598	TII.get(ARM64::RET_ReallyLR));
				1599	for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
				1600	MIB.addReg(RetRegs[i], RegState::Implicit);
				1601	return true;
				1602	}
				1603
				1604	bool ARM64FastISel::SelectTrunc(const Instruction *I) {
				1605	Type *DestTy = I->getType();
				1606	Value *Op = I->getOperand(0);
				1607	Type *SrcTy = Op->getType();
				1608
				1609	EVT SrcEVT = TLI.getValueType(SrcTy, true);
				1610	EVT DestEVT = TLI.getValueType(DestTy, true);
				1611	if (!SrcEVT.isSimple())
				1612	return false;
				1613	if (!DestEVT.isSimple())
				1614	return false;
				1615
				1616	MVT SrcVT = SrcEVT.getSimpleVT();
				1617	MVT DestVT = DestEVT.getSimpleVT();
				1618
				1619	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
				1620	SrcVT != MVT::i8)
				1621	return false;
				1622	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
				1623	DestVT != MVT::i1)
				1624	return false;
				1625
				1626	unsigned SrcReg = getRegForValue(Op);
				1627	if (!SrcReg)
				1628	return false;
				1629
				1630	// If we're truncating from i64 to a smaller non-legal type then generate an
				1631	// AND. Otherwise, we know the high bits are undefined and a truncate doesn't
				1632	// generate any code.
				1633	if (SrcVT == MVT::i64) {
				1634	uint64_t Mask = 0;
				1635	switch (DestVT.SimpleTy) {
				1636	default:
				1637	// Trunc i64 to i32 is handled by the target-independent fast-isel.
				1638	return false;
				1639	case MVT::i1:
				1640	Mask = 0x1;
				1641	break;
				1642	case MVT::i8:
				1643	Mask = 0xff;
				1644	break;
				1645	case MVT::i16:
				1646	Mask = 0xffff;
				1647	break;
				1648	}
				1649	// Issue an extract_subreg to get the lower 32-bits.
				1650	unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /Kill=/true,
				1651	ARM64::sub_32);
				1652	// Create the AND instruction which performs the actual truncation.
				1653	unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
				1654	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
				1655	ANDReg)
				1656	.addReg(Reg32)
				1657	.addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32));
				1658	SrcReg = ANDReg;
				1659	}
				1660
				1661	UpdateValueMap(I, SrcReg);
				1662	return true;
				1663	}
				1664
				1665	unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
				1666	assert((DestVT == MVT::i8 \|\| DestVT == MVT::i16 \|\| DestVT == MVT::i32 \|\|
				1667	DestVT == MVT::i64) &&
				1668	"Unexpected value type.");
				1669	// Handle i8 and i16 as i32.
				1670	if (DestVT == MVT::i8 \|\| DestVT == MVT::i16)
				1671	DestVT = MVT::i32;
				1672
				1673	if (isZExt) {
				1674	unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
				1675	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
				1676	ResultReg)
				1677	.addReg(SrcReg)
				1678	.addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
				1679
				1680	if (DestVT == MVT::i64) {
				1681	// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
				1682	// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
				1683	unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass);
				1684	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1685	TII.get(ARM64::SUBREG_TO_REG), Reg64)
				1686	.addImm(0)
				1687	.addReg(ResultReg)
				1688	.addImm(ARM64::sub_32);
				1689	ResultReg = Reg64;
				1690	}
				1691	return ResultReg;
				1692	} else {
				1693	if (DestVT == MVT::i64) {
				1694	// FIXME: We're SExt i1 to i64.
				1695	return 0;
				1696	}
				1697	unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
				1698	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri),
				1699	ResultReg)
				1700	.addReg(SrcReg)
				1701	.addImm(0)
				1702	.addImm(0);
				1703	return ResultReg;
				1704	}
				1705	}
				1706
				1707	unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
				1708	bool isZExt) {
				1709	assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
				1710	unsigned Opc;
				1711	unsigned Imm = 0;
				1712
				1713	switch (SrcVT.SimpleTy) {
				1714	default:
				1715	return 0;
				1716	case MVT::i1:
				1717	return Emiti1Ext(SrcReg, DestVT, isZExt);
				1718	case MVT::i8:
				1719	if (DestVT == MVT::i64)
				1720	Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
				1721	else
				1722	Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
				1723	Imm = 7;
				1724	break;
				1725	case MVT::i16:
				1726	if (DestVT == MVT::i64)
				1727	Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
				1728	else
				1729	Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
				1730	Imm = 15;
				1731	break;
				1732	case MVT::i32:
				1733	assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
				1734	Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
				1735	Imm = 31;
				1736	break;
				1737	}
				1738
				1739	// Handle i8 and i16 as i32.
				1740	if (DestVT == MVT::i8 \|\| DestVT == MVT::i16)
				1741	DestVT = MVT::i32;
				1742
				1743	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1744	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1745	.addReg(SrcReg)
				1746	.addImm(0)
				1747	.addImm(Imm);
				1748
				1749	return ResultReg;
				1750	}
				1751
				1752	bool ARM64FastISel::SelectIntExt(const Instruction *I) {
				1753	// On ARM, in general, integer casts don't involve legal types; this code
				1754	// handles promotable integers. The high bits for a type smaller than
				1755	// the register size are assumed to be undefined.
				1756	Type *DestTy = I->getType();
				1757	Value *Src = I->getOperand(0);
				1758	Type *SrcTy = Src->getType();
				1759
				1760	bool isZExt = isa<ZExtInst>(I);
				1761	unsigned SrcReg = getRegForValue(Src);
				1762	if (!SrcReg)
				1763	return false;
				1764
				1765	EVT SrcEVT = TLI.getValueType(SrcTy, true);
				1766	EVT DestEVT = TLI.getValueType(DestTy, true);
				1767	if (!SrcEVT.isSimple())
				1768	return false;
				1769	if (!DestEVT.isSimple())
				1770	return false;
				1771
				1772	MVT SrcVT = SrcEVT.getSimpleVT();
				1773	MVT DestVT = DestEVT.getSimpleVT();
				1774	unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
				1775	if (ResultReg == 0)
				1776	return false;
				1777	UpdateValueMap(I, ResultReg);
				1778	return true;
				1779	}
				1780
				1781	bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
				1782	EVT DestEVT = TLI.getValueType(I->getType(), true);
				1783	if (!DestEVT.isSimple())
				1784	return false;
				1785
				1786	MVT DestVT = DestEVT.getSimpleVT();
				1787	if (DestVT != MVT::i64 && DestVT != MVT::i32)
				1788	return false;
				1789
				1790	unsigned DivOpc;
				1791	bool is64bit = (DestVT == MVT::i64);
				1792	switch (ISDOpcode) {
				1793	default:
				1794	return false;
				1795	case ISD::SREM:
				1796	DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr;
				1797	break;
				1798	case ISD::UREM:
				1799	DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr;
				1800	break;
				1801	}
				1802	unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr;
				1803	unsigned Src0Reg = getRegForValue(I->getOperand(0));
				1804	if (!Src0Reg)
				1805	return false;
				1806
				1807	unsigned Src1Reg = getRegForValue(I->getOperand(1));
				1808	if (!Src1Reg)
				1809	return false;
				1810
				1811	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1812	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), ResultReg)
				1813	.addReg(Src0Reg)
				1814	.addReg(Src1Reg);
				1815	// The remainder is computed as numerator – (quotient * denominator) using the
				1816	// MSUB instruction.
				1817	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
				1818	.addReg(ResultReg)
				1819	.addReg(Src1Reg)
				1820	.addReg(Src0Reg);
				1821	UpdateValueMap(I, ResultReg);
				1822	return true;
				1823	}
				1824
				1825	bool ARM64FastISel::SelectMul(const Instruction *I) {
				1826	EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
				1827	if (!SrcEVT.isSimple())
				1828	return false;
				1829	MVT SrcVT = SrcEVT.getSimpleVT();
				1830
				1831	// Must be simple value type. Don't handle vectors.
				1832	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
				1833	SrcVT != MVT::i8)
				1834	return false;
				1835
				1836	unsigned Opc;
				1837	unsigned ZReg;
				1838	switch (SrcVT.SimpleTy) {
				1839	default:
				1840	return false;
				1841	case MVT::i8:
				1842	case MVT::i16:
				1843	case MVT::i32:
				1844	ZReg = ARM64::WZR;
				1845	Opc = ARM64::MADDWrrr;
				1846	break;
				1847	case MVT::i64:
				1848	ZReg = ARM64::XZR;
				1849	Opc = ARM64::MADDXrrr;
				1850	break;
				1851	}
				1852
				1853	unsigned Src0Reg = getRegForValue(I->getOperand(0));
				1854	if (!Src0Reg)
				1855	return false;
				1856
				1857	unsigned Src1Reg = getRegForValue(I->getOperand(1));
				1858	if (!Src1Reg)
				1859	return false;
				1860
				1861	// Create the base instruction, then add the operands.
				1862	unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
				1863	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1864	.addReg(Src0Reg)
				1865	.addReg(Src1Reg)
				1866	.addReg(ZReg);
				1867	UpdateValueMap(I, ResultReg);
				1868	return true;
				1869	}
				1870
				1871	bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) {
				1872	switch (I->getOpcode()) {
				1873	default:
				1874	break;
				1875	case Instruction::Load:
				1876	return SelectLoad(I);
				1877	case Instruction::Store:
				1878	return SelectStore(I);
				1879	case Instruction::Br:
				1880	return SelectBranch(I);
				1881	case Instruction::IndirectBr:
				1882	return SelectIndirectBr(I);
				1883	case Instruction::FCmp:
				1884	case Instruction::ICmp:
				1885	return SelectCmp(I);
				1886	case Instruction::Select:
				1887	return SelectSelect(I);
				1888	case Instruction::FPExt:
				1889	return SelectFPExt(I);
				1890	case Instruction::FPTrunc:
				1891	return SelectFPTrunc(I);
				1892	case Instruction::FPToSI:
				1893	return SelectFPToInt(I, /Signed=/true);
				1894	case Instruction::FPToUI:
				1895	return SelectFPToInt(I, /Signed=/false);
				1896	case Instruction::SIToFP:
				1897	return SelectIntToFP(I, /Signed=/true);
				1898	case Instruction::UIToFP:
				1899	return SelectIntToFP(I, /Signed=/false);
				1900	case Instruction::SRem:
				1901	return SelectRem(I, ISD::SREM);
				1902	case Instruction::URem:
				1903	return SelectRem(I, ISD::UREM);
				1904	case Instruction::Call:
				1905	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
				1906	return SelectIntrinsicCall(*II);
				1907	return SelectCall(I);
				1908	case Instruction::Ret:
				1909	return SelectRet(I);
				1910	case Instruction::Trunc:
				1911	return SelectTrunc(I);
				1912	case Instruction::ZExt:
				1913	case Instruction::SExt:
				1914	return SelectIntExt(I);
				1915	case Instruction::Mul:
				1916	// FIXME: This really should be handled by the target-independent selector.
				1917	return SelectMul(I);
				1918	}
				1919	return false;
				1920	// Silence warnings.
				1921	(void)&CC_ARM64_DarwinPCS_VarArg;
				1922	}
				1923
				1924	namespace llvm {
				1925	llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo,
				1926	const TargetLibraryInfo *libInfo) {
				1927	return new ARM64FastISel(funcInfo, libInfo);
				1928	}
				1929	}