Blame - llvm/lib/Target/AArch64/AArch64FastISel.cpp - toolchain/llvm-project

blob: 2164d77b79006814773802e60018da5b91112606 [file] [log] [blame]

Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1	//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the AArch64-specific support for the FastISel class. Some
				11	// of the target-specific code is generated by tablegen in the file
				12	// AArch64GenFastISel.inc, which is #included here.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#include "AArch64.h"
				17	#include "AArch64TargetMachine.h"
				18	#include "AArch64Subtarget.h"
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	19	#include "MCTargetDesc/AArch64AddressingModes.h"
				20	#include "llvm/CodeGen/CallingConvLower.h"
				21	#include "llvm/CodeGen/FastISel.h"
				22	#include "llvm/CodeGen/FunctionLoweringInfo.h"
				23	#include "llvm/CodeGen/MachineConstantPool.h"
				24	#include "llvm/CodeGen/MachineFrameInfo.h"
				25	#include "llvm/CodeGen/MachineInstrBuilder.h"
				26	#include "llvm/CodeGen/MachineRegisterInfo.h"
				27	#include "llvm/IR/CallingConv.h"
				28	#include "llvm/IR/DataLayout.h"
				29	#include "llvm/IR/DerivedTypes.h"
				30	#include "llvm/IR/Function.h"
				31	#include "llvm/IR/GetElementPtrTypeIterator.h"
				32	#include "llvm/IR/GlobalAlias.h"
				33	#include "llvm/IR/GlobalVariable.h"
				34	#include "llvm/IR/Instructions.h"
				35	#include "llvm/IR/IntrinsicInst.h"
				36	#include "llvm/IR/Operator.h"
				37	#include "llvm/Support/CommandLine.h"
				38	using namespace llvm;
				39
				40	namespace {
				41
				42	class AArch64FastISel : public FastISel {
				43
				44	class Address {
				45	public:
				46	typedef enum {
				47	RegBase,
				48	FrameIndexBase
				49	} BaseKind;
				50
				51	private:
				52	BaseKind Kind;
				53	union {
				54	unsigned Reg;
				55	int FI;
				56	} Base;
				57	int64_t Offset;
				58
				59	public:
				60	Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
				61	void setKind(BaseKind K) { Kind = K; }
				62	BaseKind getKind() const { return Kind; }
				63	bool isRegBase() const { return Kind == RegBase; }
				64	bool isFIBase() const { return Kind == FrameIndexBase; }
				65	void setReg(unsigned Reg) {
				66	assert(isRegBase() && "Invalid base register access!");
				67	Base.Reg = Reg;
				68	}
				69	unsigned getReg() const {
				70	assert(isRegBase() && "Invalid base register access!");
				71	return Base.Reg;
				72	}
				73	void setFI(unsigned FI) {
				74	assert(isFIBase() && "Invalid base frame index access!");
				75	Base.FI = FI;
				76	}
				77	unsigned getFI() const {
				78	assert(isFIBase() && "Invalid base frame index access!");
				79	return Base.FI;
				80	}
				81	void setOffset(int64_t O) { Offset = O; }
				82	int64_t getOffset() { return Offset; }
				83
				84	bool isValid() { return isFIBase() \|\| (isRegBase() && getReg() != 0); }
				85	};
				86
				87	/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
				88	/// make the right decision when generating code for different targets.
				89	const AArch64Subtarget *Subtarget;
				90	LLVMContext *Context;
				91
				92	private:
				93	// Selection routines.
				94	bool SelectLoad(const Instruction *I);
				95	bool SelectStore(const Instruction *I);
				96	bool SelectBranch(const Instruction *I);
				97	bool SelectIndirectBr(const Instruction *I);
				98	bool SelectCmp(const Instruction *I);
				99	bool SelectSelect(const Instruction *I);
				100	bool SelectFPExt(const Instruction *I);
				101	bool SelectFPTrunc(const Instruction *I);
				102	bool SelectFPToInt(const Instruction *I, bool Signed);
				103	bool SelectIntToFP(const Instruction *I, bool Signed);
				104	bool SelectRem(const Instruction *I, unsigned ISDOpcode);
				105	bool SelectCall(const Instruction I, const char IntrMemName);
				106	bool SelectIntrinsicCall(const IntrinsicInst &I);
				107	bool SelectRet(const Instruction *I);
				108	bool SelectTrunc(const Instruction *I);
				109	bool SelectIntExt(const Instruction *I);
				110	bool SelectMul(const Instruction *I);
				111
				112	// Utility helper routines.
				113	bool isTypeLegal(Type *Ty, MVT &VT);
				114	bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
				115	bool ComputeAddress(const Value *Obj, Address &Addr);
				116	bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
				117	bool UseUnscaled);
				118	void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
				119	unsigned Flags, bool UseUnscaled);
				120	bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
				121	bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
				122	unsigned Alignment);
				123	// Emit functions.
				124	bool EmitCmp(Value Src1Value, Value Src2Value, bool isZExt);
				125	bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
				126	bool UseUnscaled = false);
				127	bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
				128	bool UseUnscaled = false);
				129	unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
				130	unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
				131
				132	unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
				133	unsigned AArch64MaterializeGV(const GlobalValue *GV);
				134
				135	// Call handling routines.
				136	private:
				137	CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
				138	bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
				139	SmallVectorImpl<unsigned> &ArgRegs,
				140	SmallVectorImpl<MVT> &ArgVTs,
				141	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
				142	SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
				143	unsigned &NumBytes);
				144	bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
				145	const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
				146
				147	public:
				148	// Backend specific FastISel code.
				149	unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
				150	unsigned TargetMaterializeConstant(const Constant *C) override;
				151
				152	explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
				153	const TargetLibraryInfo *libInfo)
				154	: FastISel(funcInfo, libInfo) {
				155	Subtarget = &TM.getSubtarget<AArch64Subtarget>();
				156	Context = &funcInfo.Fn->getContext();
				157	}
				158
				159	bool TargetSelectInstruction(const Instruction *I) override;
				160
				161	#include "AArch64GenFastISel.inc"
				162	};
				163
				164	} // end anonymous namespace
				165
				166	#include "AArch64GenCallingConv.inc"
				167
				168	CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
				169	if (CC == CallingConv::WebKit_JS)
				170	return CC_AArch64_WebKit_JS;
				171	return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
				172	}
				173
				174	unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
				175	assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
				176	"Alloca should always return a pointer.");
				177
				178	// Don't handle dynamic allocas.
				179	if (!FuncInfo.StaticAllocaMap.count(AI))
				180	return 0;
				181
				182	DenseMap<const AllocaInst *, int>::iterator SI =
				183	FuncInfo.StaticAllocaMap.find(AI);
				184
				185	if (SI != FuncInfo.StaticAllocaMap.end()) {
				186	unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
				187	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
				188	ResultReg)
				189	.addFrameIndex(SI->second)
				190	.addImm(0)
				191	.addImm(0);
				192	return ResultReg;
				193	}
				194
				195	return 0;
				196	}
				197
				198	unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
				199	if (VT != MVT::f32 && VT != MVT::f64)
				200	return 0;
				201
				202	const APFloat Val = CFP->getValueAPF();
				203	bool is64bit = (VT == MVT::f64);
				204
				205	// This checks to see if we can use FMOV instructions to materialize
				206	// a constant, otherwise we have to materialize via the constant pool.
				207	if (TLI.isFPImmLegal(Val, VT)) {
				208	int Imm;
				209	unsigned Opc;
				210	if (is64bit) {
				211	Imm = AArch64_AM::getFP64Imm(Val);
				212	Opc = AArch64::FMOVDi;
				213	} else {
				214	Imm = AArch64_AM::getFP32Imm(Val);
				215	Opc = AArch64::FMOVSi;
				216	}
				217	unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
				218	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				219	.addImm(Imm);
				220	return ResultReg;
				221	}
				222
				223	// Materialize via constant pool. MachineConstantPool wants an explicit
				224	// alignment.
				225	unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
				226	if (Align == 0)
				227	Align = DL.getTypeAllocSize(CFP->getType());
				228
				229	unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
				230	unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
				231	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
				232	ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
				233
				234	unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
				235	unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
				236	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				237	.addReg(ADRPReg)
				238	.addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
				239	return ResultReg;
				240	}
				241
				242	unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
Rafael Espindola	59f7eba	2014-05-28 18:15:43 +0000	[diff] [blame]	243	// We can't handle thread-local variables quickly yet.
				244	if (GV->isThreadLocal())
				245	return 0;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	246
Tim Northover	391f93a	2014-05-24 19:45:41 +0000	[diff] [blame]	247	// MachO still uses GOT for large code-model accesses, but ELF requires
				248	// movz/movk sequences, which FastISel doesn't handle yet.
				249	if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
				250	return 0;
				251
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	252	unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
				253
				254	EVT DestEVT = TLI.getValueType(GV->getType(), true);
				255	if (!DestEVT.isSimple())
				256	return 0;
				257
				258	unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
				259	unsigned ResultReg;
				260
				261	if (OpFlags & AArch64II::MO_GOT) {
				262	// ADRP + LDRX
				263	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
				264	ADRPReg)
				265	.addGlobalAddress(GV, 0, AArch64II::MO_GOT \| AArch64II::MO_PAGE);
				266
				267	ResultReg = createResultReg(&AArch64::GPR64RegClass);
				268	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
				269	ResultReg)
				270	.addReg(ADRPReg)
				271	.addGlobalAddress(GV, 0, AArch64II::MO_GOT \| AArch64II::MO_PAGEOFF \|
				272	AArch64II::MO_NC);
				273	} else {
				274	// ADRP + ADDX
				275	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
				276	ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
				277
				278	ResultReg = createResultReg(&AArch64::GPR64spRegClass);
				279	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
				280	ResultReg)
				281	.addReg(ADRPReg)
				282	.addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF \| AArch64II::MO_NC)
				283	.addImm(0);
				284	}
				285	return ResultReg;
				286	}
				287
				288	unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
				289	EVT CEVT = TLI.getValueType(C->getType(), true);
				290
				291	// Only handle simple types.
				292	if (!CEVT.isSimple())
				293	return 0;
				294	MVT VT = CEVT.getSimpleVT();
				295
				296	// FIXME: Handle ConstantInt.
				297	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
				298	return AArch64MaterializeFP(CFP, VT);
				299	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
				300	return AArch64MaterializeGV(GV);
				301
				302	return 0;
				303	}
				304
				305	// Computes the address to get to an object.
				306	bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
				307	const User *U = nullptr;
				308	unsigned Opcode = Instruction::UserOp1;
				309	if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
				310	// Don't walk into other basic blocks unless the object is an alloca from
				311	// another block, otherwise it may not have a virtual register assigned.
				312	if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) \|\|
				313	FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
				314	Opcode = I->getOpcode();
				315	U = I;
				316	}
				317	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
				318	Opcode = C->getOpcode();
				319	U = C;
				320	}
				321
				322	if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
				323	if (Ty->getAddressSpace() > 255)
				324	// Fast instruction selection doesn't support the special
				325	// address spaces.
				326	return false;
				327
				328	switch (Opcode) {
				329	default:
				330	break;
				331	case Instruction::BitCast: {
				332	// Look through bitcasts.
				333	return ComputeAddress(U->getOperand(0), Addr);
				334	}
				335	case Instruction::IntToPtr: {
				336	// Look past no-op inttoptrs.
				337	if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
				338	return ComputeAddress(U->getOperand(0), Addr);
				339	break;
				340	}
				341	case Instruction::PtrToInt: {
				342	// Look past no-op ptrtoints.
				343	if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
				344	return ComputeAddress(U->getOperand(0), Addr);
				345	break;
				346	}
				347	case Instruction::GetElementPtr: {
				348	Address SavedAddr = Addr;
				349	uint64_t TmpOffset = Addr.getOffset();
				350
				351	// Iterate through the GEP folding the constants into offsets where
				352	// we can.
				353	gep_type_iterator GTI = gep_type_begin(U);
				354	for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
				355	++i, ++GTI) {
				356	const Value Op = i;
				357	if (StructType STy = dyn_cast<StructType>(GTI)) {
				358	const StructLayout *SL = DL.getStructLayout(STy);
				359	unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
				360	TmpOffset += SL->getElementOffset(Idx);
				361	} else {
				362	uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
				363	for (;;) {
				364	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
				365	// Constant-offset addressing.
				366	TmpOffset += CI->getSExtValue() * S;
				367	break;
				368	}
				369	if (canFoldAddIntoGEP(U, Op)) {
				370	// A compatible add with a constant operand. Fold the constant.
				371	ConstantInt *CI =
				372	cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
				373	TmpOffset += CI->getSExtValue() * S;
				374	// Iterate on the other operand.
				375	Op = cast<AddOperator>(Op)->getOperand(0);
				376	continue;
				377	}
				378	// Unsupported
				379	goto unsupported_gep;
				380	}
				381	}
				382	}
				383
				384	// Try to grab the base operand now.
				385	Addr.setOffset(TmpOffset);
				386	if (ComputeAddress(U->getOperand(0), Addr))
				387	return true;
				388
				389	// We failed, restore everything and try the other options.
				390	Addr = SavedAddr;
				391
				392	unsupported_gep:
				393	break;
				394	}
				395	case Instruction::Alloca: {
				396	const AllocaInst *AI = cast<AllocaInst>(Obj);
				397	DenseMap<const AllocaInst *, int>::iterator SI =
				398	FuncInfo.StaticAllocaMap.find(AI);
				399	if (SI != FuncInfo.StaticAllocaMap.end()) {
				400	Addr.setKind(Address::FrameIndexBase);
				401	Addr.setFI(SI->second);
				402	return true;
				403	}
				404	break;
				405	}
				406	}
				407
				408	// Try to get this in a register if nothing else has worked.
				409	if (!Addr.isValid())
				410	Addr.setReg(getRegForValue(Obj));
				411	return Addr.isValid();
				412	}
				413
				414	bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
				415	EVT evt = TLI.getValueType(Ty, true);
				416
				417	// Only handle simple types.
				418	if (evt == MVT::Other \|\| !evt.isSimple())
				419	return false;
				420	VT = evt.getSimpleVT();
				421
				422	// This is a legal type, but it's not something we handle in fast-isel.
				423	if (VT == MVT::f128)
				424	return false;
				425
				426	// Handle all other legal types, i.e. a register that will directly hold this
				427	// value.
				428	return TLI.isTypeLegal(VT);
				429	}
				430
				431	bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
				432	if (isTypeLegal(Ty, VT))
				433	return true;
				434
				435	// If this is a type than can be sign or zero-extended to a basic operation
				436	// go ahead and accept it now. For stores, this reflects truncation.
				437	if (VT == MVT::i1 \|\| VT == MVT::i8 \|\| VT == MVT::i16)
				438	return true;
				439
				440	return false;
				441	}
				442
				443	bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
				444	int64_t ScaleFactor, bool UseUnscaled) {
				445	bool needsLowering = false;
				446	int64_t Offset = Addr.getOffset();
				447	switch (VT.SimpleTy) {
				448	default:
				449	return false;
				450	case MVT::i1:
				451	case MVT::i8:
				452	case MVT::i16:
				453	case MVT::i32:
				454	case MVT::i64:
				455	case MVT::f32:
				456	case MVT::f64:
				457	if (!UseUnscaled)
				458	// Using scaled, 12-bit, unsigned immediate offsets.
				459	needsLowering = ((Offset & 0xfff) != Offset);
				460	else
				461	// Using unscaled, 9-bit, signed immediate offsets.
				462	needsLowering = (Offset > 256 \|\| Offset < -256);
				463	break;
				464	}
				465
Tim Northover	c141ad4	2014-06-10 09:52:44 +0000	[diff] [blame]	466	//If this is a stack pointer and the offset needs to be simplified then put
				467	// the alloca address into a register, set the base type back to register and
				468	// continue. This should almost never happen.
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	469	if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
Tim Northover	c141ad4	2014-06-10 09:52:44 +0000	[diff] [blame]	470	unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
				471	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
				472	ResultReg)
				473	.addFrameIndex(Addr.getFI())
				474	.addImm(0)
				475	.addImm(0);
				476	Addr.setKind(Address::RegBase);
				477	Addr.setReg(ResultReg);
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	478	}
				479
				480	// Since the offset is too large for the load/store instruction get the
				481	// reg+offset into a register.
				482	if (needsLowering) {
				483	uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
				484	unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
				485	UnscaledOffset, MVT::i64);
				486	if (ResultReg == 0)
				487	return false;
				488	Addr.setReg(ResultReg);
				489	Addr.setOffset(0);
				490	}
				491	return true;
				492	}
				493
				494	void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
				495	const MachineInstrBuilder &MIB,
				496	unsigned Flags, bool UseUnscaled) {
				497	int64_t Offset = Addr.getOffset();
				498	// Frame base works a bit differently. Handle it separately.
				499	if (Addr.getKind() == Address::FrameIndexBase) {
				500	int FI = Addr.getFI();
				501	// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
				502	// and alignment should be based on the VT.
				503	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
				504	MachinePointerInfo::getFixedStack(FI, Offset), Flags,
				505	MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
				506	// Now add the rest of the operands.
				507	MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
				508	} else {
				509	// Now add the rest of the operands.
				510	MIB.addReg(Addr.getReg());
				511	MIB.addImm(Offset);
				512	}
				513	}
				514
				515	bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
				516	bool UseUnscaled) {
				517	// Negative offsets require unscaled, 9-bit, signed immediate offsets.
				518	// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
				519	if (!UseUnscaled && Addr.getOffset() < 0)
				520	UseUnscaled = true;
				521
				522	unsigned Opc;
				523	const TargetRegisterClass *RC;
				524	bool VTIsi1 = false;
				525	int64_t ScaleFactor = 0;
				526	switch (VT.SimpleTy) {
				527	default:
				528	return false;
				529	case MVT::i1:
				530	VTIsi1 = true;
				531	// Intentional fall-through.
				532	case MVT::i8:
				533	Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
				534	RC = &AArch64::GPR32RegClass;
				535	ScaleFactor = 1;
				536	break;
				537	case MVT::i16:
				538	Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
				539	RC = &AArch64::GPR32RegClass;
				540	ScaleFactor = 2;
				541	break;
				542	case MVT::i32:
				543	Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
				544	RC = &AArch64::GPR32RegClass;
				545	ScaleFactor = 4;
				546	break;
				547	case MVT::i64:
				548	Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
				549	RC = &AArch64::GPR64RegClass;
				550	ScaleFactor = 8;
				551	break;
				552	case MVT::f32:
				553	Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
				554	RC = TLI.getRegClassFor(VT);
				555	ScaleFactor = 4;
				556	break;
				557	case MVT::f64:
				558	Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
				559	RC = TLI.getRegClassFor(VT);
				560	ScaleFactor = 8;
				561	break;
				562	}
				563	// Scale the offset.
				564	if (!UseUnscaled) {
				565	int64_t Offset = Addr.getOffset();
				566	if (Offset & (ScaleFactor - 1))
				567	// Retry using an unscaled, 9-bit, signed immediate offset.
				568	return EmitLoad(VT, ResultReg, Addr, /UseUnscaled/ true);
				569
				570	Addr.setOffset(Offset / ScaleFactor);
				571	}
				572
				573	// Simplify this down to something we can handle.
				574	if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
				575	return false;
				576
				577	// Create the base instruction, then add the operands.
				578	ResultReg = createResultReg(RC);
				579	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				580	TII.get(Opc), ResultReg);
				581	AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
				582
				583	// Loading an i1 requires special handling.
				584	if (VTIsi1) {
				585	MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
				586	unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
				587	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
				588	ANDReg)
				589	.addReg(ResultReg)
				590	.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
				591	ResultReg = ANDReg;
				592	}
				593	return true;
				594	}
				595
				596	bool AArch64FastISel::SelectLoad(const Instruction *I) {
				597	MVT VT;
				598	// Verify we have a legal type before going any further. Currently, we handle
				599	// simple types that will directly fit in a register (i32/f32/i64/f64) or
				600	// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
				601	if (!isLoadStoreTypeLegal(I->getType(), VT) \|\| cast<LoadInst>(I)->isAtomic())
				602	return false;
				603
				604	// See if we can handle this address.
				605	Address Addr;
				606	if (!ComputeAddress(I->getOperand(0), Addr))
				607	return false;
				608
				609	unsigned ResultReg;
				610	if (!EmitLoad(VT, ResultReg, Addr))
				611	return false;
				612
				613	UpdateValueMap(I, ResultReg);
				614	return true;
				615	}
				616
				617	bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
				618	bool UseUnscaled) {
				619	// Negative offsets require unscaled, 9-bit, signed immediate offsets.
				620	// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
				621	if (!UseUnscaled && Addr.getOffset() < 0)
				622	UseUnscaled = true;
				623
				624	unsigned StrOpc;
				625	bool VTIsi1 = false;
				626	int64_t ScaleFactor = 0;
				627	// Using scaled, 12-bit, unsigned immediate offsets.
				628	switch (VT.SimpleTy) {
				629	default:
				630	return false;
				631	case MVT::i1:
				632	VTIsi1 = true;
				633	case MVT::i8:
				634	StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
				635	ScaleFactor = 1;
				636	break;
				637	case MVT::i16:
				638	StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
				639	ScaleFactor = 2;
				640	break;
				641	case MVT::i32:
				642	StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
				643	ScaleFactor = 4;
				644	break;
				645	case MVT::i64:
				646	StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
				647	ScaleFactor = 8;
				648	break;
				649	case MVT::f32:
				650	StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
				651	ScaleFactor = 4;
				652	break;
				653	case MVT::f64:
				654	StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
				655	ScaleFactor = 8;
				656	break;
				657	}
				658	// Scale the offset.
				659	if (!UseUnscaled) {
				660	int64_t Offset = Addr.getOffset();
				661	if (Offset & (ScaleFactor - 1))
				662	// Retry using an unscaled, 9-bit, signed immediate offset.
				663	return EmitStore(VT, SrcReg, Addr, /UseUnscaled/ true);
				664
				665	Addr.setOffset(Offset / ScaleFactor);
				666	}
				667
				668	// Simplify this down to something we can handle.
				669	if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
				670	return false;
				671
				672	// Storing an i1 requires special handling.
				673	if (VTIsi1) {
				674	MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
				675	unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
				676	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
				677	ANDReg)
				678	.addReg(SrcReg)
				679	.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
				680	SrcReg = ANDReg;
				681	}
				682	// Create the base instruction, then add the operands.
				683	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				684	TII.get(StrOpc)).addReg(SrcReg);
				685	AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
				686	return true;
				687	}
				688
				689	bool AArch64FastISel::SelectStore(const Instruction *I) {
				690	MVT VT;
				691	Value *Op0 = I->getOperand(0);
				692	// Verify we have a legal type before going any further. Currently, we handle
				693	// simple types that will directly fit in a register (i32/f32/i64/f64) or
				694	// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
				695	if (!isLoadStoreTypeLegal(Op0->getType(), VT) \|\|
				696	cast<StoreInst>(I)->isAtomic())
				697	return false;
				698
				699	// Get the value to be stored into a register.
				700	unsigned SrcReg = getRegForValue(Op0);
				701	if (SrcReg == 0)
				702	return false;
				703
				704	// See if we can handle this address.
				705	Address Addr;
				706	if (!ComputeAddress(I->getOperand(1), Addr))
				707	return false;
				708
				709	if (!EmitStore(VT, SrcReg, Addr))
				710	return false;
				711	return true;
				712	}
				713
				714	static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
				715	switch (Pred) {
				716	case CmpInst::FCMP_ONE:
				717	case CmpInst::FCMP_UEQ:
				718	default:
				719	// AL is our "false" for now. The other two need more compares.
				720	return AArch64CC::AL;
				721	case CmpInst::ICMP_EQ:
				722	case CmpInst::FCMP_OEQ:
				723	return AArch64CC::EQ;
				724	case CmpInst::ICMP_SGT:
				725	case CmpInst::FCMP_OGT:
				726	return AArch64CC::GT;
				727	case CmpInst::ICMP_SGE:
				728	case CmpInst::FCMP_OGE:
				729	return AArch64CC::GE;
				730	case CmpInst::ICMP_UGT:
				731	case CmpInst::FCMP_UGT:
				732	return AArch64CC::HI;
				733	case CmpInst::FCMP_OLT:
				734	return AArch64CC::MI;
				735	case CmpInst::ICMP_ULE:
				736	case CmpInst::FCMP_OLE:
				737	return AArch64CC::LS;
				738	case CmpInst::FCMP_ORD:
				739	return AArch64CC::VC;
				740	case CmpInst::FCMP_UNO:
				741	return AArch64CC::VS;
				742	case CmpInst::FCMP_UGE:
				743	return AArch64CC::PL;
				744	case CmpInst::ICMP_SLT:
				745	case CmpInst::FCMP_ULT:
				746	return AArch64CC::LT;
				747	case CmpInst::ICMP_SLE:
				748	case CmpInst::FCMP_ULE:
				749	return AArch64CC::LE;
				750	case CmpInst::FCMP_UNE:
				751	case CmpInst::ICMP_NE:
				752	return AArch64CC::NE;
				753	case CmpInst::ICMP_UGE:
				754	return AArch64CC::HS;
				755	case CmpInst::ICMP_ULT:
				756	return AArch64CC::LO;
				757	}
				758	}
				759
				760	bool AArch64FastISel::SelectBranch(const Instruction *I) {
				761	const BranchInst *BI = cast<BranchInst>(I);
				762	MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
				763	MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
				764
				765	if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
				766	if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
				767	// We may not handle every CC for now.
				768	AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
				769	if (CC == AArch64CC::AL)
				770	return false;
				771
				772	// Emit the cmp.
				773	if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
				774	return false;
				775
				776	// Emit the branch.
				777	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
				778	.addImm(CC)
				779	.addMBB(TBB);
				780	FuncInfo.MBB->addSuccessor(TBB);
				781
				782	FastEmitBranch(FBB, DbgLoc);
				783	return true;
				784	}
				785	} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
				786	MVT SrcVT;
				787	if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
				788	(isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
				789	unsigned CondReg = getRegForValue(TI->getOperand(0));
				790	if (CondReg == 0)
				791	return false;
				792
				793	// Issue an extract_subreg to get the lower 32-bits.
				794	if (SrcVT == MVT::i64)
				795	CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /Kill=/true,
				796	AArch64::sub_32);
				797
				798	MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
				799	unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
				800	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				801	TII.get(AArch64::ANDWri), ANDReg)
				802	.addReg(CondReg)
				803	.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
				804	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				805	TII.get(AArch64::SUBSWri))
				806	.addReg(ANDReg)
				807	.addReg(ANDReg)
				808	.addImm(0)
				809	.addImm(0);
				810
				811	unsigned CC = AArch64CC::NE;
				812	if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
				813	std::swap(TBB, FBB);
				814	CC = AArch64CC::EQ;
				815	}
				816	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
				817	.addImm(CC)
				818	.addMBB(TBB);
				819	FuncInfo.MBB->addSuccessor(TBB);
				820	FastEmitBranch(FBB, DbgLoc);
				821	return true;
				822	}
				823	} else if (const ConstantInt *CI =
				824	dyn_cast<ConstantInt>(BI->getCondition())) {
				825	uint64_t Imm = CI->getZExtValue();
				826	MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
				827	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
				828	.addMBB(Target);
				829	FuncInfo.MBB->addSuccessor(Target);
				830	return true;
				831	}
				832
				833	unsigned CondReg = getRegForValue(BI->getCondition());
				834	if (CondReg == 0)
				835	return false;
				836
				837	// We've been divorced from our compare! Our block was split, and
				838	// now our compare lives in a predecessor block. We musn't
				839	// re-compare here, as the children of the compare aren't guaranteed
				840	// live across the block boundary (we could check for this).
				841	// Regardless, the compare has been done in the predecessor block,
				842	// and it left a value for us in a virtual register. Ergo, we test
				843	// the one-bit value left in the virtual register.
				844	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
				845	AArch64::WZR)
				846	.addReg(CondReg)
				847	.addImm(0)
				848	.addImm(0);
				849
				850	unsigned CC = AArch64CC::NE;
				851	if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
				852	std::swap(TBB, FBB);
				853	CC = AArch64CC::EQ;
				854	}
				855
				856	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
				857	.addImm(CC)
				858	.addMBB(TBB);
				859	FuncInfo.MBB->addSuccessor(TBB);
				860	FastEmitBranch(FBB, DbgLoc);
				861	return true;
				862	}
				863
				864	bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
				865	const IndirectBrInst *BI = cast<IndirectBrInst>(I);
				866	unsigned AddrReg = getRegForValue(BI->getOperand(0));
				867	if (AddrReg == 0)
				868	return false;
				869
				870	// Emit the indirect branch.
				871	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
				872	.addReg(AddrReg);
				873
				874	// Make sure the CFG is up-to-date.
				875	for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
				876	FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
				877
				878	return true;
				879	}
				880
				881	bool AArch64FastISel::EmitCmp(Value Src1Value, Value Src2Value, bool isZExt) {
				882	Type *Ty = Src1Value->getType();
				883	EVT SrcEVT = TLI.getValueType(Ty, true);
				884	if (!SrcEVT.isSimple())
				885	return false;
				886	MVT SrcVT = SrcEVT.getSimpleVT();
				887
				888	// Check to see if the 2nd operand is a constant that we can encode directly
				889	// in the compare.
				890	uint64_t Imm;
				891	bool UseImm = false;
				892	bool isNegativeImm = false;
				893	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
				894	if (SrcVT == MVT::i64 \|\| SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\|
				895	SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
				896	const APInt &CIVal = ConstInt->getValue();
				897
				898	Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
				899	if (CIVal.isNegative()) {
				900	isNegativeImm = true;
				901	Imm = -Imm;
				902	}
				903	// FIXME: We can handle more immediates using shifts.
				904	UseImm = ((Imm & 0xfff) == Imm);
				905	}
				906	} else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
				907	if (SrcVT == MVT::f32 \|\| SrcVT == MVT::f64)
				908	if (ConstFP->isZero() && !ConstFP->isNegative())
				909	UseImm = true;
				910	}
				911
				912	unsigned ZReg;
				913	unsigned CmpOpc;
				914	bool isICmp = true;
				915	bool needsExt = false;
				916	switch (SrcVT.SimpleTy) {
				917	default:
				918	return false;
				919	case MVT::i1:
				920	case MVT::i8:
				921	case MVT::i16:
				922	needsExt = true;
				923	// Intentional fall-through.
				924	case MVT::i32:
				925	ZReg = AArch64::WZR;
				926	if (UseImm)
				927	CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
				928	else
				929	CmpOpc = AArch64::SUBSWrr;
				930	break;
				931	case MVT::i64:
				932	ZReg = AArch64::XZR;
				933	if (UseImm)
				934	CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
				935	else
				936	CmpOpc = AArch64::SUBSXrr;
				937	break;
				938	case MVT::f32:
				939	isICmp = false;
				940	CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
				941	break;
				942	case MVT::f64:
				943	isICmp = false;
				944	CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
				945	break;
				946	}
				947
				948	unsigned SrcReg1 = getRegForValue(Src1Value);
				949	if (SrcReg1 == 0)
				950	return false;
				951
				952	unsigned SrcReg2;
				953	if (!UseImm) {
				954	SrcReg2 = getRegForValue(Src2Value);
				955	if (SrcReg2 == 0)
				956	return false;
				957	}
				958
				959	// We have i1, i8, or i16, we need to either zero extend or sign extend.
				960	if (needsExt) {
				961	SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
				962	if (SrcReg1 == 0)
				963	return false;
				964	if (!UseImm) {
				965	SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
				966	if (SrcReg2 == 0)
				967	return false;
				968	}
				969	}
				970
				971	if (isICmp) {
				972	if (UseImm)
				973	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				974	.addReg(ZReg)
				975	.addReg(SrcReg1)
				976	.addImm(Imm)
				977	.addImm(0);
				978	else
				979	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				980	.addReg(ZReg)
				981	.addReg(SrcReg1)
				982	.addReg(SrcReg2);
				983	} else {
				984	if (UseImm)
				985	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				986	.addReg(SrcReg1);
				987	else
				988	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
				989	.addReg(SrcReg1)
				990	.addReg(SrcReg2);
				991	}
				992	return true;
				993	}
				994
				995	bool AArch64FastISel::SelectCmp(const Instruction *I) {
				996	const CmpInst *CI = cast<CmpInst>(I);
				997
				998	// We may not handle every CC for now.
				999	AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
				1000	if (CC == AArch64CC::AL)
				1001	return false;
				1002
				1003	// Emit the cmp.
				1004	if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
				1005	return false;
				1006
				1007	// Now set a register based on the comparison.
				1008	AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
				1009	unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
				1010	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
				1011	ResultReg)
				1012	.addReg(AArch64::WZR)
				1013	.addReg(AArch64::WZR)
				1014	.addImm(invertedCC);
				1015
				1016	UpdateValueMap(I, ResultReg);
				1017	return true;
				1018	}
				1019
				1020	bool AArch64FastISel::SelectSelect(const Instruction *I) {
				1021	const SelectInst *SI = cast<SelectInst>(I);
				1022
				1023	EVT DestEVT = TLI.getValueType(SI->getType(), true);
				1024	if (!DestEVT.isSimple())
				1025	return false;
				1026
				1027	MVT DestVT = DestEVT.getSimpleVT();
				1028	if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
				1029	DestVT != MVT::f64)
				1030	return false;
				1031
				1032	unsigned CondReg = getRegForValue(SI->getCondition());
				1033	if (CondReg == 0)
				1034	return false;
				1035	unsigned TrueReg = getRegForValue(SI->getTrueValue());
				1036	if (TrueReg == 0)
				1037	return false;
				1038	unsigned FalseReg = getRegForValue(SI->getFalseValue());
				1039	if (FalseReg == 0)
				1040	return false;
				1041
				1042
				1043	MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
				1044	unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
				1045	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
				1046	ANDReg)
				1047	.addReg(CondReg)
				1048	.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
				1049
				1050	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
				1051	.addReg(ANDReg)
				1052	.addReg(ANDReg)
				1053	.addImm(0)
				1054	.addImm(0);
				1055
				1056	unsigned SelectOpc;
				1057	switch (DestVT.SimpleTy) {
				1058	default:
				1059	return false;
				1060	case MVT::i32:
				1061	SelectOpc = AArch64::CSELWr;
				1062	break;
				1063	case MVT::i64:
				1064	SelectOpc = AArch64::CSELXr;
				1065	break;
				1066	case MVT::f32:
				1067	SelectOpc = AArch64::FCSELSrrr;
				1068	break;
				1069	case MVT::f64:
				1070	SelectOpc = AArch64::FCSELDrrr;
				1071	break;
				1072	}
				1073
				1074	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1075	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
				1076	ResultReg)
				1077	.addReg(TrueReg)
				1078	.addReg(FalseReg)
				1079	.addImm(AArch64CC::NE);
				1080
				1081	UpdateValueMap(I, ResultReg);
				1082	return true;
				1083	}
				1084
				1085	bool AArch64FastISel::SelectFPExt(const Instruction *I) {
				1086	Value *V = I->getOperand(0);
				1087	if (!I->getType()->isDoubleTy() \|\| !V->getType()->isFloatTy())
				1088	return false;
				1089
				1090	unsigned Op = getRegForValue(V);
				1091	if (Op == 0)
				1092	return false;
				1093
				1094	unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
				1095	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
				1096	ResultReg).addReg(Op);
				1097	UpdateValueMap(I, ResultReg);
				1098	return true;
				1099	}
				1100
				1101	bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
				1102	Value *V = I->getOperand(0);
				1103	if (!I->getType()->isFloatTy() \|\| !V->getType()->isDoubleTy())
				1104	return false;
				1105
				1106	unsigned Op = getRegForValue(V);
				1107	if (Op == 0)
				1108	return false;
				1109
				1110	unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
				1111	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
				1112	ResultReg).addReg(Op);
				1113	UpdateValueMap(I, ResultReg);
				1114	return true;
				1115	}
				1116
				1117	// FPToUI and FPToSI
				1118	bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
				1119	MVT DestVT;
				1120	if (!isTypeLegal(I->getType(), DestVT) \|\| DestVT.isVector())
				1121	return false;
				1122
				1123	unsigned SrcReg = getRegForValue(I->getOperand(0));
				1124	if (SrcReg == 0)
				1125	return false;
				1126
				1127	EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
				1128	if (SrcVT == MVT::f128)
				1129	return false;
				1130
				1131	unsigned Opc;
				1132	if (SrcVT == MVT::f64) {
				1133	if (Signed)
				1134	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
				1135	else
				1136	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
				1137	} else {
				1138	if (Signed)
				1139	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
				1140	else
				1141	Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
				1142	}
				1143	unsigned ResultReg = createResultReg(
				1144	DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
				1145	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1146	.addReg(SrcReg);
				1147	UpdateValueMap(I, ResultReg);
				1148	return true;
				1149	}
				1150
				1151	bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
				1152	MVT DestVT;
				1153	if (!isTypeLegal(I->getType(), DestVT) \|\| DestVT.isVector())
				1154	return false;
				1155	assert ((DestVT == MVT::f32 \|\| DestVT == MVT::f64) &&
				1156	"Unexpected value type.");
				1157
				1158	unsigned SrcReg = getRegForValue(I->getOperand(0));
				1159	if (SrcReg == 0)
				1160	return false;
				1161
				1162	EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
				1163
				1164	// Handle sign-extension.
				1165	if (SrcVT == MVT::i16 \|\| SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
				1166	SrcReg =
				1167	EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /isZExt/ !Signed);
				1168	if (SrcReg == 0)
				1169	return false;
				1170	}
				1171
				1172	MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
				1173	: &AArch64::GPR32RegClass);
				1174
				1175	unsigned Opc;
				1176	if (SrcVT == MVT::i64) {
				1177	if (Signed)
				1178	Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
				1179	else
				1180	Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
				1181	} else {
				1182	if (Signed)
				1183	Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
				1184	else
				1185	Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
				1186	}
				1187
				1188	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1189	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1190	.addReg(SrcReg);
				1191	UpdateValueMap(I, ResultReg);
				1192	return true;
				1193	}
				1194
				1195	bool AArch64FastISel::ProcessCallArgs(
				1196	SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs,
				1197	SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
				1198	SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
				1199	unsigned &NumBytes) {
				1200	SmallVector<CCValAssign, 16> ArgLocs;
				1201	CCState CCInfo(CC, false, FuncInfo.MF, TM, ArgLocs, Context);
				1202	CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
				1203
				1204	// Get a count of how many bytes are to be pushed on the stack.
				1205	NumBytes = CCInfo.getNextStackOffset();
				1206
				1207	// Issue CALLSEQ_START
				1208	unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
				1209	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
				1210	.addImm(NumBytes);
				1211
				1212	// Process the args.
				1213	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1214	CCValAssign &VA = ArgLocs[i];
				1215	unsigned Arg = ArgRegs[VA.getValNo()];
				1216	MVT ArgVT = ArgVTs[VA.getValNo()];
				1217
				1218	// Handle arg promotion: SExt, ZExt, AExt.
				1219	switch (VA.getLocInfo()) {
				1220	case CCValAssign::Full:
				1221	break;
				1222	case CCValAssign::SExt: {
				1223	MVT DestVT = VA.getLocVT();
				1224	MVT SrcVT = ArgVT;
				1225	Arg = EmitIntExt(SrcVT, Arg, DestVT, /isZExt/ false);
				1226	if (Arg == 0)
				1227	return false;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1228	break;
				1229	}
				1230	case CCValAssign::AExt:
				1231	// Intentional fall-through.
				1232	case CCValAssign::ZExt: {
				1233	MVT DestVT = VA.getLocVT();
				1234	MVT SrcVT = ArgVT;
				1235	Arg = EmitIntExt(SrcVT, Arg, DestVT, /isZExt/ true);
				1236	if (Arg == 0)
				1237	return false;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1238	break;
				1239	}
				1240	default:
				1241	llvm_unreachable("Unknown arg promotion!");
				1242	}
				1243
				1244	// Now copy/store arg to correct locations.
				1245	if (VA.isRegLoc() && !VA.needsCustom()) {
				1246	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1247	TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
				1248	RegArgs.push_back(VA.getLocReg());
				1249	} else if (VA.needsCustom()) {
				1250	// FIXME: Handle custom args.
				1251	return false;
				1252	} else {
				1253	assert(VA.isMemLoc() && "Assuming store on stack.");
				1254
				1255	// Need to store on the stack.
Tim Northover	6890add	2014-06-03 13:54:53 +0000	[diff] [blame]	1256	unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1257
				1258	unsigned BEAlign = 0;
				1259	if (ArgSize < 8 && !Subtarget->isLittleEndian())
				1260	BEAlign = 8 - ArgSize;
				1261
				1262	Address Addr;
				1263	Addr.setKind(Address::RegBase);
				1264	Addr.setReg(AArch64::SP);
				1265	Addr.setOffset(VA.getLocMemOffset() + BEAlign);
				1266
				1267	if (!EmitStore(ArgVT, Arg, Addr))
				1268	return false;
				1269	}
				1270	}
				1271	return true;
				1272	}
				1273
				1274	bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
				1275	const Instruction *I, CallingConv::ID CC,
				1276	unsigned &NumBytes) {
				1277	// Issue CALLSEQ_END
				1278	unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
				1279	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
				1280	.addImm(NumBytes)
				1281	.addImm(0);
				1282
				1283	// Now the return value.
				1284	if (RetVT != MVT::isVoid) {
				1285	SmallVector<CCValAssign, 16> RVLocs;
				1286	CCState CCInfo(CC, false, FuncInfo.MF, TM, RVLocs, Context);
				1287	CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
				1288
				1289	// Only handle a single return value.
				1290	if (RVLocs.size() != 1)
				1291	return false;
				1292
				1293	// Copy all of the result registers out of their specified physreg.
				1294	MVT CopyVT = RVLocs[0].getValVT();
				1295	unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
				1296	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1297	TII.get(TargetOpcode::COPY),
				1298	ResultReg).addReg(RVLocs[0].getLocReg());
				1299	UsedRegs.push_back(RVLocs[0].getLocReg());
				1300
				1301	// Finally update the result.
				1302	UpdateValueMap(I, ResultReg);
				1303	}
				1304
				1305	return true;
				1306	}
				1307
				1308	bool AArch64FastISel::SelectCall(const Instruction *I,
				1309	const char *IntrMemName = nullptr) {
				1310	const CallInst *CI = cast<CallInst>(I);
				1311	const Value *Callee = CI->getCalledValue();
				1312
				1313	// Don't handle inline asm or intrinsics.
				1314	if (isa<InlineAsm>(Callee))
				1315	return false;
				1316
				1317	// Only handle global variable Callees.
				1318	const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
				1319	if (!GV)
				1320	return false;
				1321
				1322	// Check the calling convention.
				1323	ImmutableCallSite CS(CI);
				1324	CallingConv::ID CC = CS.getCallingConv();
				1325
				1326	// Let SDISel handle vararg functions.
				1327	PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
				1328	FunctionType *FTy = cast<FunctionType>(PT->getElementType());
				1329	if (FTy->isVarArg())
				1330	return false;
				1331
				1332	// Handle simple calls for now.
				1333	MVT RetVT;
				1334	Type *RetTy = I->getType();
				1335	if (RetTy->isVoidTy())
				1336	RetVT = MVT::isVoid;
				1337	else if (!isTypeLegal(RetTy, RetVT))
				1338	return false;
				1339
				1340	// Set up the argument vectors.
				1341	SmallVector<Value *, 8> Args;
				1342	SmallVector<unsigned, 8> ArgRegs;
				1343	SmallVector<MVT, 8> ArgVTs;
				1344	SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
				1345	Args.reserve(CS.arg_size());
				1346	ArgRegs.reserve(CS.arg_size());
				1347	ArgVTs.reserve(CS.arg_size());
				1348	ArgFlags.reserve(CS.arg_size());
				1349
				1350	for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
				1351	i != e; ++i) {
				1352	// If we're lowering a memory intrinsic instead of a regular call, skip the
				1353	// last two arguments, which shouldn't be passed to the underlying function.
				1354	if (IntrMemName && e - i <= 2)
				1355	break;
				1356
				1357	unsigned Arg = getRegForValue(*i);
				1358	if (Arg == 0)
				1359	return false;
				1360
				1361	ISD::ArgFlagsTy Flags;
				1362	unsigned AttrInd = i - CS.arg_begin() + 1;
				1363	if (CS.paramHasAttr(AttrInd, Attribute::SExt))
				1364	Flags.setSExt();
				1365	if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
				1366	Flags.setZExt();
				1367
				1368	// FIXME: Only handle easy calls for now.
				1369	if (CS.paramHasAttr(AttrInd, Attribute::InReg) \|\|
				1370	CS.paramHasAttr(AttrInd, Attribute::StructRet) \|\|
				1371	CS.paramHasAttr(AttrInd, Attribute::Nest) \|\|
				1372	CS.paramHasAttr(AttrInd, Attribute::ByVal))
				1373	return false;
				1374
				1375	MVT ArgVT;
				1376	Type ArgTy = (i)->getType();
				1377	if (!isTypeLegal(ArgTy, ArgVT) &&
				1378	!(ArgVT == MVT::i1 \|\| ArgVT == MVT::i8 \|\| ArgVT == MVT::i16))
				1379	return false;
				1380
				1381	// We don't handle vector parameters yet.
				1382	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > 64)
				1383	return false;
				1384
				1385	unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
				1386	Flags.setOrigAlign(OriginalAlignment);
				1387
				1388	Args.push_back(*i);
				1389	ArgRegs.push_back(Arg);
				1390	ArgVTs.push_back(ArgVT);
				1391	ArgFlags.push_back(Flags);
				1392	}
				1393
				1394	// Handle the arguments now that we've gotten them.
				1395	SmallVector<unsigned, 4> RegArgs;
				1396	unsigned NumBytes;
				1397	if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
				1398	return false;
				1399
				1400	// Issue the call.
				1401	MachineInstrBuilder MIB;
				1402	MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
				1403	if (!IntrMemName)
				1404	MIB.addGlobalAddress(GV, 0, 0);
				1405	else
				1406	MIB.addExternalSymbol(IntrMemName, 0);
				1407
				1408	// Add implicit physical register uses to the call.
				1409	for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
				1410	MIB.addReg(RegArgs[i], RegState::Implicit);
				1411
				1412	// Add a register mask with the call-preserved registers.
				1413	// Proper defs for return values will be added by setPhysRegsDeadExcept().
				1414	MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
				1415
				1416	// Finish off the call including any return values.
				1417	SmallVector<unsigned, 4> UsedRegs;
				1418	if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
				1419	return false;
				1420
				1421	// Set all unused physreg defs as dead.
				1422	static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
				1423
				1424	return true;
				1425	}
				1426
				1427	bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
				1428	if (Alignment)
				1429	return Len / Alignment <= 4;
				1430	else
				1431	return Len < 32;
				1432	}
				1433
				1434	bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
				1435	uint64_t Len, unsigned Alignment) {
				1436	// Make sure we don't bloat code by inlining very large memcpy's.
				1437	if (!IsMemCpySmall(Len, Alignment))
				1438	return false;
				1439
				1440	int64_t UnscaledOffset = 0;
				1441	Address OrigDest = Dest;
				1442	Address OrigSrc = Src;
				1443
				1444	while (Len) {
				1445	MVT VT;
				1446	if (!Alignment \|\| Alignment >= 8) {
				1447	if (Len >= 8)
				1448	VT = MVT::i64;
				1449	else if (Len >= 4)
				1450	VT = MVT::i32;
				1451	else if (Len >= 2)
				1452	VT = MVT::i16;
				1453	else {
				1454	VT = MVT::i8;
				1455	}
				1456	} else {
				1457	// Bound based on alignment.
				1458	if (Len >= 4 && Alignment == 4)
				1459	VT = MVT::i32;
				1460	else if (Len >= 2 && Alignment == 2)
				1461	VT = MVT::i16;
				1462	else {
				1463	VT = MVT::i8;
				1464	}
				1465	}
				1466
				1467	bool RV;
				1468	unsigned ResultReg;
				1469	RV = EmitLoad(VT, ResultReg, Src);
Tim Northover	c19445d	2014-06-10 09:52:40 +0000	[diff] [blame]	1470	if (!RV)
				1471	return false;
				1472
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1473	RV = EmitStore(VT, ResultReg, Dest);
Tim Northover	c19445d	2014-06-10 09:52:40 +0000	[diff] [blame]	1474	if (!RV)
				1475	return false;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1476
				1477	int64_t Size = VT.getSizeInBits() / 8;
				1478	Len -= Size;
				1479	UnscaledOffset += Size;
				1480
				1481	// We need to recompute the unscaled offset for each iteration.
				1482	Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
				1483	Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
				1484	}
				1485
				1486	return true;
				1487	}
				1488
				1489	bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
				1490	// FIXME: Handle more intrinsics.
				1491	switch (I.getIntrinsicID()) {
				1492	default:
				1493	return false;
				1494	case Intrinsic::memcpy:
				1495	case Intrinsic::memmove: {
				1496	const MemTransferInst &MTI = cast<MemTransferInst>(I);
				1497	// Don't handle volatile.
				1498	if (MTI.isVolatile())
				1499	return false;
				1500
				1501	// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
				1502	// we would emit dead code because we don't currently handle memmoves.
				1503	bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
				1504	if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
				1505	// Small memcpy's are common enough that we want to do them without a call
				1506	// if possible.
				1507	uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
				1508	unsigned Alignment = MTI.getAlignment();
				1509	if (IsMemCpySmall(Len, Alignment)) {
				1510	Address Dest, Src;
				1511	if (!ComputeAddress(MTI.getRawDest(), Dest) \|\|
				1512	!ComputeAddress(MTI.getRawSource(), Src))
				1513	return false;
				1514	if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
				1515	return true;
				1516	}
				1517	}
				1518
				1519	if (!MTI.getLength()->getType()->isIntegerTy(64))
				1520	return false;
				1521
				1522	if (MTI.getSourceAddressSpace() > 255 \|\| MTI.getDestAddressSpace() > 255)
				1523	// Fast instruction selection doesn't support the special
				1524	// address spaces.
				1525	return false;
				1526
				1527	const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
				1528	return SelectCall(&I, IntrMemName);
				1529	}
				1530	case Intrinsic::memset: {
				1531	const MemSetInst &MSI = cast<MemSetInst>(I);
				1532	// Don't handle volatile.
				1533	if (MSI.isVolatile())
				1534	return false;
				1535
				1536	if (!MSI.getLength()->getType()->isIntegerTy(64))
				1537	return false;
				1538
				1539	if (MSI.getDestAddressSpace() > 255)
				1540	// Fast instruction selection doesn't support the special
				1541	// address spaces.
				1542	return false;
				1543
				1544	return SelectCall(&I, "memset");
				1545	}
				1546	case Intrinsic::trap: {
				1547	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
				1548	.addImm(1);
				1549	return true;
				1550	}
				1551	}
				1552	return false;
				1553	}
				1554
				1555	bool AArch64FastISel::SelectRet(const Instruction *I) {
				1556	const ReturnInst *Ret = cast<ReturnInst>(I);
				1557	const Function &F = *I->getParent()->getParent();
				1558
				1559	if (!FuncInfo.CanLowerReturn)
				1560	return false;
				1561
				1562	if (F.isVarArg())
				1563	return false;
				1564
				1565	// Build a list of return value registers.
				1566	SmallVector<unsigned, 4> RetRegs;
				1567
				1568	if (Ret->getNumOperands() > 0) {
				1569	CallingConv::ID CC = F.getCallingConv();
				1570	SmallVector<ISD::OutputArg, 4> Outs;
				1571	GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
				1572
				1573	// Analyze operands of the call, assigning locations to each operand.
				1574	SmallVector<CCValAssign, 16> ValLocs;
				1575	CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
				1576	I->getContext());
				1577	CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
				1578	: RetCC_AArch64_AAPCS;
				1579	CCInfo.AnalyzeReturn(Outs, RetCC);
				1580
				1581	// Only handle a single return value for now.
				1582	if (ValLocs.size() != 1)
				1583	return false;
				1584
				1585	CCValAssign &VA = ValLocs[0];
				1586	const Value *RV = Ret->getOperand(0);
				1587
				1588	// Don't bother handling odd stuff for now.
				1589	if (VA.getLocInfo() != CCValAssign::Full)
				1590	return false;
				1591	// Only handle register returns for now.
				1592	if (!VA.isRegLoc())
				1593	return false;
				1594	unsigned Reg = getRegForValue(RV);
				1595	if (Reg == 0)
				1596	return false;
				1597
				1598	unsigned SrcReg = Reg + VA.getValNo();
				1599	unsigned DestReg = VA.getLocReg();
				1600	// Avoid a cross-class copy. This is very unlikely.
				1601	if (!MRI.getRegClass(SrcReg)->contains(DestReg))
				1602	return false;
				1603
				1604	EVT RVEVT = TLI.getValueType(RV->getType());
				1605	if (!RVEVT.isSimple())
				1606	return false;
				1607
				1608	// Vectors (of > 1 lane) in big endian need tricky handling.
				1609	if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
				1610	return false;
				1611
				1612	MVT RVVT = RVEVT.getSimpleVT();
				1613	if (RVVT == MVT::f128)
				1614	return false;
				1615	MVT DestVT = VA.getValVT();
				1616	// Special handling for extended integers.
				1617	if (RVVT != DestVT) {
				1618	if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
				1619	return false;
				1620
				1621	if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
				1622	return false;
				1623
				1624	bool isZExt = Outs[0].Flags.isZExt();
				1625	SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
				1626	if (SrcReg == 0)
				1627	return false;
				1628	}
				1629
				1630	// Make the copy.
				1631	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1632	TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
				1633
				1634	// Add register to return instruction.
				1635	RetRegs.push_back(VA.getLocReg());
				1636	}
				1637
				1638	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1639	TII.get(AArch64::RET_ReallyLR));
				1640	for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
				1641	MIB.addReg(RetRegs[i], RegState::Implicit);
				1642	return true;
				1643	}
				1644
				1645	bool AArch64FastISel::SelectTrunc(const Instruction *I) {
				1646	Type *DestTy = I->getType();
				1647	Value *Op = I->getOperand(0);
				1648	Type *SrcTy = Op->getType();
				1649
				1650	EVT SrcEVT = TLI.getValueType(SrcTy, true);
				1651	EVT DestEVT = TLI.getValueType(DestTy, true);
				1652	if (!SrcEVT.isSimple())
				1653	return false;
				1654	if (!DestEVT.isSimple())
				1655	return false;
				1656
				1657	MVT SrcVT = SrcEVT.getSimpleVT();
				1658	MVT DestVT = DestEVT.getSimpleVT();
				1659
				1660	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
				1661	SrcVT != MVT::i8)
				1662	return false;
				1663	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
				1664	DestVT != MVT::i1)
				1665	return false;
				1666
				1667	unsigned SrcReg = getRegForValue(Op);
				1668	if (!SrcReg)
				1669	return false;
				1670
				1671	// If we're truncating from i64 to a smaller non-legal type then generate an
				1672	// AND. Otherwise, we know the high bits are undefined and a truncate doesn't
				1673	// generate any code.
				1674	if (SrcVT == MVT::i64) {
				1675	uint64_t Mask = 0;
				1676	switch (DestVT.SimpleTy) {
				1677	default:
				1678	// Trunc i64 to i32 is handled by the target-independent fast-isel.
				1679	return false;
				1680	case MVT::i1:
				1681	Mask = 0x1;
				1682	break;
				1683	case MVT::i8:
				1684	Mask = 0xff;
				1685	break;
				1686	case MVT::i16:
				1687	Mask = 0xffff;
				1688	break;
				1689	}
				1690	// Issue an extract_subreg to get the lower 32-bits.
				1691	unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /Kill=/true,
				1692	AArch64::sub_32);
				1693	MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
				1694	// Create the AND instruction which performs the actual truncation.
				1695	unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
				1696	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
				1697	ANDReg)
				1698	.addReg(Reg32)
				1699	.addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
				1700	SrcReg = ANDReg;
				1701	}
				1702
				1703	UpdateValueMap(I, SrcReg);
				1704	return true;
				1705	}
				1706
				1707	unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
				1708	assert((DestVT == MVT::i8 \|\| DestVT == MVT::i16 \|\| DestVT == MVT::i32 \|\|
				1709	DestVT == MVT::i64) &&
				1710	"Unexpected value type.");
				1711	// Handle i8 and i16 as i32.
				1712	if (DestVT == MVT::i8 \|\| DestVT == MVT::i16)
				1713	DestVT = MVT::i32;
				1714
				1715	if (isZExt) {
				1716	MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
				1717	unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
				1718	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
				1719	ResultReg)
				1720	.addReg(SrcReg)
				1721	.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
				1722
				1723	if (DestVT == MVT::i64) {
				1724	// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
				1725	// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
				1726	unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
				1727	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1728	TII.get(AArch64::SUBREG_TO_REG), Reg64)
				1729	.addImm(0)
				1730	.addReg(ResultReg)
				1731	.addImm(AArch64::sub_32);
				1732	ResultReg = Reg64;
				1733	}
				1734	return ResultReg;
				1735	} else {
				1736	if (DestVT == MVT::i64) {
				1737	// FIXME: We're SExt i1 to i64.
				1738	return 0;
				1739	}
				1740	unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
				1741	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
				1742	ResultReg)
				1743	.addReg(SrcReg)
				1744	.addImm(0)
				1745	.addImm(0);
				1746	return ResultReg;
				1747	}
				1748	}
				1749
				1750	unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
				1751	bool isZExt) {
				1752	assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
Louis Gerbarg	4c5b405	2014-07-07 21:37:51 +0000	[diff] [blame]	1753
Louis Gerbarg	1ce0c37bf	2014-07-09 17:54:32 +0000	[diff] [blame]	1754	// FastISel does not have plumbing to deal with extensions where the SrcVT or
				1755	// DestVT are odd things, so test to make sure that they are both types we can
				1756	// handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
				1757	// bail out to SelectionDAG.
				1758	if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
				1759	(DestVT != MVT::i32) && (DestVT != MVT::i64)) \|\|
				1760	((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
				1761	(SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
Louis Gerbarg	4c5b405	2014-07-07 21:37:51 +0000	[diff] [blame]	1762	return 0;
				1763
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1764	unsigned Opc;
				1765	unsigned Imm = 0;
				1766
				1767	switch (SrcVT.SimpleTy) {
				1768	default:
				1769	return 0;
				1770	case MVT::i1:
				1771	return Emiti1Ext(SrcReg, DestVT, isZExt);
				1772	case MVT::i8:
				1773	if (DestVT == MVT::i64)
				1774	Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
				1775	else
				1776	Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
				1777	Imm = 7;
				1778	break;
				1779	case MVT::i16:
				1780	if (DestVT == MVT::i64)
				1781	Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
				1782	else
				1783	Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
				1784	Imm = 15;
				1785	break;
				1786	case MVT::i32:
				1787	assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
				1788	Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
				1789	Imm = 31;
				1790	break;
				1791	}
				1792
				1793	// Handle i8 and i16 as i32.
				1794	if (DestVT == MVT::i8 \|\| DestVT == MVT::i16)
				1795	DestVT = MVT::i32;
				1796	else if (DestVT == MVT::i64) {
				1797	unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
				1798	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
				1799	TII.get(AArch64::SUBREG_TO_REG), Src64)
				1800	.addImm(0)
				1801	.addReg(SrcReg)
				1802	.addImm(AArch64::sub_32);
				1803	SrcReg = Src64;
				1804	}
				1805
				1806	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1807	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1808	.addReg(SrcReg)
				1809	.addImm(0)
				1810	.addImm(Imm);
				1811
				1812	return ResultReg;
				1813	}
				1814
				1815	bool AArch64FastISel::SelectIntExt(const Instruction *I) {
				1816	// On ARM, in general, integer casts don't involve legal types; this code
				1817	// handles promotable integers. The high bits for a type smaller than
				1818	// the register size are assumed to be undefined.
				1819	Type *DestTy = I->getType();
				1820	Value *Src = I->getOperand(0);
				1821	Type *SrcTy = Src->getType();
				1822
				1823	bool isZExt = isa<ZExtInst>(I);
				1824	unsigned SrcReg = getRegForValue(Src);
				1825	if (!SrcReg)
				1826	return false;
				1827
				1828	EVT SrcEVT = TLI.getValueType(SrcTy, true);
				1829	EVT DestEVT = TLI.getValueType(DestTy, true);
				1830	if (!SrcEVT.isSimple())
				1831	return false;
				1832	if (!DestEVT.isSimple())
				1833	return false;
				1834
				1835	MVT SrcVT = SrcEVT.getSimpleVT();
				1836	MVT DestVT = DestEVT.getSimpleVT();
				1837	unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
				1838	if (ResultReg == 0)
				1839	return false;
				1840	UpdateValueMap(I, ResultReg);
				1841	return true;
				1842	}
				1843
				1844	bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
				1845	EVT DestEVT = TLI.getValueType(I->getType(), true);
				1846	if (!DestEVT.isSimple())
				1847	return false;
				1848
				1849	MVT DestVT = DestEVT.getSimpleVT();
				1850	if (DestVT != MVT::i64 && DestVT != MVT::i32)
				1851	return false;
				1852
				1853	unsigned DivOpc;
				1854	bool is64bit = (DestVT == MVT::i64);
				1855	switch (ISDOpcode) {
				1856	default:
				1857	return false;
				1858	case ISD::SREM:
				1859	DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
				1860	break;
				1861	case ISD::UREM:
				1862	DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
				1863	break;
				1864	}
				1865	unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
				1866	unsigned Src0Reg = getRegForValue(I->getOperand(0));
				1867	if (!Src0Reg)
				1868	return false;
				1869
				1870	unsigned Src1Reg = getRegForValue(I->getOperand(1));
				1871	if (!Src1Reg)
				1872	return false;
				1873
				1874	unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
				1875	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
				1876	.addReg(Src0Reg)
				1877	.addReg(Src1Reg);
				1878	// The remainder is computed as numerator - (quotient * denominator) using the
				1879	// MSUB instruction.
				1880	unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
				1881	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
				1882	.addReg(QuotReg)
				1883	.addReg(Src1Reg)
				1884	.addReg(Src0Reg);
				1885	UpdateValueMap(I, ResultReg);
				1886	return true;
				1887	}
				1888
				1889	bool AArch64FastISel::SelectMul(const Instruction *I) {
				1890	EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
				1891	if (!SrcEVT.isSimple())
				1892	return false;
				1893	MVT SrcVT = SrcEVT.getSimpleVT();
				1894
				1895	// Must be simple value type. Don't handle vectors.
				1896	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
				1897	SrcVT != MVT::i8)
				1898	return false;
				1899
				1900	unsigned Opc;
				1901	unsigned ZReg;
				1902	switch (SrcVT.SimpleTy) {
				1903	default:
				1904	return false;
				1905	case MVT::i8:
				1906	case MVT::i16:
				1907	case MVT::i32:
				1908	ZReg = AArch64::WZR;
				1909	Opc = AArch64::MADDWrrr;
Tim Northover	fee2ade	2014-07-10 14:18:46 +0000	[diff] [blame^]	1910	SrcVT = MVT::i32;
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1911	break;
				1912	case MVT::i64:
				1913	ZReg = AArch64::XZR;
				1914	Opc = AArch64::MADDXrrr;
				1915	break;
				1916	}
				1917
				1918	unsigned Src0Reg = getRegForValue(I->getOperand(0));
				1919	if (!Src0Reg)
				1920	return false;
				1921
				1922	unsigned Src1Reg = getRegForValue(I->getOperand(1));
				1923	if (!Src1Reg)
				1924	return false;
				1925
				1926	// Create the base instruction, then add the operands.
				1927	unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
				1928	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
				1929	.addReg(Src0Reg)
				1930	.addReg(Src1Reg)
				1931	.addReg(ZReg);
				1932	UpdateValueMap(I, ResultReg);
				1933	return true;
				1934	}
				1935
				1936	bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
				1937	switch (I->getOpcode()) {
				1938	default:
				1939	break;
				1940	case Instruction::Load:
				1941	return SelectLoad(I);
				1942	case Instruction::Store:
				1943	return SelectStore(I);
				1944	case Instruction::Br:
				1945	return SelectBranch(I);
				1946	case Instruction::IndirectBr:
				1947	return SelectIndirectBr(I);
				1948	case Instruction::FCmp:
				1949	case Instruction::ICmp:
				1950	return SelectCmp(I);
				1951	case Instruction::Select:
				1952	return SelectSelect(I);
				1953	case Instruction::FPExt:
				1954	return SelectFPExt(I);
				1955	case Instruction::FPTrunc:
				1956	return SelectFPTrunc(I);
				1957	case Instruction::FPToSI:
				1958	return SelectFPToInt(I, /Signed=/true);
				1959	case Instruction::FPToUI:
				1960	return SelectFPToInt(I, /Signed=/false);
				1961	case Instruction::SIToFP:
				1962	return SelectIntToFP(I, /Signed=/true);
				1963	case Instruction::UIToFP:
				1964	return SelectIntToFP(I, /Signed=/false);
				1965	case Instruction::SRem:
				1966	return SelectRem(I, ISD::SREM);
				1967	case Instruction::URem:
				1968	return SelectRem(I, ISD::UREM);
				1969	case Instruction::Call:
				1970	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
				1971	return SelectIntrinsicCall(*II);
				1972	return SelectCall(I);
				1973	case Instruction::Ret:
				1974	return SelectRet(I);
				1975	case Instruction::Trunc:
				1976	return SelectTrunc(I);
				1977	case Instruction::ZExt:
				1978	case Instruction::SExt:
				1979	return SelectIntExt(I);
				1980	case Instruction::Mul:
				1981	// FIXME: This really should be handled by the target-independent selector.
				1982	return SelectMul(I);
				1983	}
				1984	return false;
				1985	// Silence warnings.
				1986	(void)&CC_AArch64_DarwinPCS_VarArg;
				1987	}
				1988
				1989	namespace llvm {
				1990	llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
				1991	const TargetLibraryInfo *libInfo) {
				1992	return new AArch64FastISel(funcInfo, libInfo);
				1993	}
				1994	}