Blame - llvm/lib/Target/AArch64/AArch64ISelLowering.cpp - toolchain/llvm-project

blob: b68c43a63cbe83a7e92d60118457bbcdc1fafd4d [file] [log] [blame]

Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1	//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the interfaces that AArch64 uses to lower LLVM code into a
				11	// selection DAG.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#define DEBUG_TYPE "aarch64-isel"
				16	#include "AArch64.h"
				17	#include "AArch64ISelLowering.h"
				18	#include "AArch64MachineFunctionInfo.h"
				19	#include "AArch64TargetMachine.h"
				20	#include "AArch64TargetObjectFile.h"
Tim Northover	969afbe	2013-02-05 13:24:47 +0000	[diff] [blame]	21	#include "Utils/AArch64BaseInfo.h"
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	22	#include "llvm/CodeGen/Analysis.h"
				23	#include "llvm/CodeGen/CallingConvLower.h"
				24	#include "llvm/CodeGen/MachineFrameInfo.h"
				25	#include "llvm/CodeGen/MachineInstrBuilder.h"
				26	#include "llvm/CodeGen/MachineRegisterInfo.h"
				27	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
				28	#include "llvm/IR/CallingConv.h"
				29
				30	using namespace llvm;
				31
				32	static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
				33	const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
				34
				35	if (Subtarget->isTargetLinux())
				36	return new AArch64LinuxTargetObjectFile();
				37	if (Subtarget->isTargetELF())
				38	return new TargetLoweringObjectFileELF();
				39	llvm_unreachable("unknown subtarget type");
				40	}
				41
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	42	AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
Bill Wendling	496dc33	2013-06-07 05:00:11 +0000	[diff] [blame]	43	: TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	44
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	45	const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
				46
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	47	// SIMD compares set the entire lane's bits to 1
				48	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
				49
				50	// Scalar register <-> type mapping
				51	addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
				52	addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
				53	addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
				54	addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
				55	addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
				56	addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
				57
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	58	if (Subtarget->hasNEON()) {
				59	// And the vectors
				60	addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
				61	addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
				62	addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
				63	addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
				64	addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
				65	addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
				66	addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
				67	addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
				68	addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
				69	addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
				70	addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
				71	}
				72
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	73	computeRegisterProperties();
				74
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	75	// We combine OR nodes for bitfield and NEON BSL operations.
				76	setTargetDAGCombine(ISD::OR);
				77
				78	setTargetDAGCombine(ISD::AND);
				79	setTargetDAGCombine(ISD::SRA);
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	80	setTargetDAGCombine(ISD::SRL);
Hao Liu	cd8b02d	2013-08-15 08:26:11 +0000	[diff] [blame]	81	setTargetDAGCombine(ISD::SHL);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	82
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	83	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
				84
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	85	// AArch64 does not have i1 loads, or much of anything for i1 really.
				86	setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
				87	setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				88	setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
				89
				90	setStackPointerRegisterToSaveRestore(AArch64::XSP);
				91	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
				92	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
				93	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
				94
				95	// We'll lower globals to wrappers for selection.
				96	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				97	setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
				98
				99	// A64 instructions have the comparison predicate attached to the user of the
				100	// result, but having a separate comparison is valuable for matching.
				101	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
				102	setOperationAction(ISD::BR_CC, MVT::i64, Custom);
				103	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
				104	setOperationAction(ISD::BR_CC, MVT::f64, Custom);
				105
				106	setOperationAction(ISD::SELECT, MVT::i32, Custom);
				107	setOperationAction(ISD::SELECT, MVT::i64, Custom);
				108	setOperationAction(ISD::SELECT, MVT::f32, Custom);
				109	setOperationAction(ISD::SELECT, MVT::f64, Custom);
				110
				111	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				112	setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
				113	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				114	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
				115
				116	setOperationAction(ISD::BRCOND, MVT::Other, Custom);
				117
				118	setOperationAction(ISD::SETCC, MVT::i32, Custom);
				119	setOperationAction(ISD::SETCC, MVT::i64, Custom);
				120	setOperationAction(ISD::SETCC, MVT::f32, Custom);
				121	setOperationAction(ISD::SETCC, MVT::f64, Custom);
				122
				123	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				124	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				125	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				126
				127	setOperationAction(ISD::VASTART, MVT::Other, Custom);
				128	setOperationAction(ISD::VACOPY, MVT::Other, Custom);
				129	setOperationAction(ISD::VAEND, MVT::Other, Expand);
				130	setOperationAction(ISD::VAARG, MVT::Other, Expand);
				131
				132	setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
				133
				134	setOperationAction(ISD::ROTL, MVT::i32, Expand);
				135	setOperationAction(ISD::ROTL, MVT::i64, Expand);
				136
				137	setOperationAction(ISD::UREM, MVT::i32, Expand);
				138	setOperationAction(ISD::UREM, MVT::i64, Expand);
				139	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
				140	setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
				141
				142	setOperationAction(ISD::SREM, MVT::i32, Expand);
				143	setOperationAction(ISD::SREM, MVT::i64, Expand);
				144	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
				145	setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
				146
				147	setOperationAction(ISD::CTPOP, MVT::i32, Expand);
				148	setOperationAction(ISD::CTPOP, MVT::i64, Expand);
				149
				150	// Legal floating-point operations.
				151	setOperationAction(ISD::FABS, MVT::f32, Legal);
				152	setOperationAction(ISD::FABS, MVT::f64, Legal);
				153
				154	setOperationAction(ISD::FCEIL, MVT::f32, Legal);
				155	setOperationAction(ISD::FCEIL, MVT::f64, Legal);
				156
				157	setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
				158	setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
				159
				160	setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
				161	setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
				162
				163	setOperationAction(ISD::FNEG, MVT::f32, Legal);
				164	setOperationAction(ISD::FNEG, MVT::f64, Legal);
				165
				166	setOperationAction(ISD::FRINT, MVT::f32, Legal);
				167	setOperationAction(ISD::FRINT, MVT::f64, Legal);
				168
				169	setOperationAction(ISD::FSQRT, MVT::f32, Legal);
				170	setOperationAction(ISD::FSQRT, MVT::f64, Legal);
				171
				172	setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
				173	setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
				174
				175	setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
				176	setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
				177	setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
				178
				179	// Illegal floating-point operations.
				180	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				181	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				182
				183	setOperationAction(ISD::FCOS, MVT::f32, Expand);
				184	setOperationAction(ISD::FCOS, MVT::f64, Expand);
				185
				186	setOperationAction(ISD::FEXP, MVT::f32, Expand);
				187	setOperationAction(ISD::FEXP, MVT::f64, Expand);
				188
				189	setOperationAction(ISD::FEXP2, MVT::f32, Expand);
				190	setOperationAction(ISD::FEXP2, MVT::f64, Expand);
				191
				192	setOperationAction(ISD::FLOG, MVT::f32, Expand);
				193	setOperationAction(ISD::FLOG, MVT::f64, Expand);
				194
				195	setOperationAction(ISD::FLOG2, MVT::f32, Expand);
				196	setOperationAction(ISD::FLOG2, MVT::f64, Expand);
				197
				198	setOperationAction(ISD::FLOG10, MVT::f32, Expand);
				199	setOperationAction(ISD::FLOG10, MVT::f64, Expand);
				200
				201	setOperationAction(ISD::FPOW, MVT::f32, Expand);
				202	setOperationAction(ISD::FPOW, MVT::f64, Expand);
				203
				204	setOperationAction(ISD::FPOWI, MVT::f32, Expand);
				205	setOperationAction(ISD::FPOWI, MVT::f64, Expand);
				206
				207	setOperationAction(ISD::FREM, MVT::f32, Expand);
				208	setOperationAction(ISD::FREM, MVT::f64, Expand);
				209
				210	setOperationAction(ISD::FSIN, MVT::f32, Expand);
				211	setOperationAction(ISD::FSIN, MVT::f64, Expand);
				212
Tim Northover	95f4892	2013-03-08 13:55:07 +0000	[diff] [blame]	213	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
				214	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	215
				216	// Virtually no operation on f128 is legal, but LLVM can't expand them when
				217	// there's a valid register class, so we need custom operations in most cases.
				218	setOperationAction(ISD::FABS, MVT::f128, Expand);
				219	setOperationAction(ISD::FADD, MVT::f128, Custom);
				220	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
				221	setOperationAction(ISD::FCOS, MVT::f128, Expand);
				222	setOperationAction(ISD::FDIV, MVT::f128, Custom);
				223	setOperationAction(ISD::FMA, MVT::f128, Expand);
				224	setOperationAction(ISD::FMUL, MVT::f128, Custom);
				225	setOperationAction(ISD::FNEG, MVT::f128, Expand);
				226	setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
				227	setOperationAction(ISD::FP_ROUND, MVT::f128, Expand);
				228	setOperationAction(ISD::FPOW, MVT::f128, Expand);
				229	setOperationAction(ISD::FREM, MVT::f128, Expand);
				230	setOperationAction(ISD::FRINT, MVT::f128, Expand);
				231	setOperationAction(ISD::FSIN, MVT::f128, Expand);
Tim Northover	95f4892	2013-03-08 13:55:07 +0000	[diff] [blame]	232	setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	233	setOperationAction(ISD::FSQRT, MVT::f128, Expand);
				234	setOperationAction(ISD::FSUB, MVT::f128, Custom);
				235	setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
				236	setOperationAction(ISD::SETCC, MVT::f128, Custom);
				237	setOperationAction(ISD::BR_CC, MVT::f128, Custom);
				238	setOperationAction(ISD::SELECT, MVT::f128, Expand);
				239	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
				240	setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
				241
				242	// Lowering for many of the conversions is actually specified by the non-f128
				243	// type. The LowerXXX function will be trivial when f128 isn't involved.
				244	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
				245	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				246	setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
				247	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
				248	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				249	setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
				250	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
				251	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				252	setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
				253	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
				254	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				255	setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
				256	setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
				257	setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
				258
				259	// This prevents LLVM trying to compress double constants into a floating
				260	// constant-pool entry and trying to load from there. It's of doubtful benefit
				261	// for A64: we'd need LDR followed by FCVT, I believe.
				262	setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
				263	setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
				264	setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
				265
				266	setTruncStoreAction(MVT::f128, MVT::f64, Expand);
				267	setTruncStoreAction(MVT::f128, MVT::f32, Expand);
				268	setTruncStoreAction(MVT::f128, MVT::f16, Expand);
				269	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
				270	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
				271	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
				272
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	273	setExceptionPointerRegister(AArch64::X0);
				274	setExceptionSelectorRegister(AArch64::X1);
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	275
				276	if (Subtarget->hasNEON()) {
				277	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
				278	setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
				279	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
				280	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
				281	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
				282	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
				283	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
				284	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
				285	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
				286	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
				287	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
				288
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	289	setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
				290
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	291	setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
				292	setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
				293	setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
				294	setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
				295	setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
				296	setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
				297	setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
				298	setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
				299	setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
				300	setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
				301	}
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	302	}
				303
Matt Arsenault	75865923	2013-05-18 00:21:46 +0000	[diff] [blame]	304	EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	305	// It's reasonably important that this value matches the "natural" legal
				306	// promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
				307	// in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
				308	if (!VT.isVector()) return MVT::i32;
				309	return VT.changeVectorElementTypeToInteger();
				310	}
				311
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	312	static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
				313	unsigned &LdrOpc,
				314	unsigned &StrOpc) {
Craig Topper	e952ad0	2013-07-15 07:22:00 +0000	[diff] [blame]	315	static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
				316	AArch64::LDXR_word, AArch64::LDXR_dword};
				317	static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
				318	AArch64::LDAXR_word, AArch64::LDAXR_dword};
				319	static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
				320	AArch64::STXR_word, AArch64::STXR_dword};
				321	static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword,
				322	AArch64::STLXR_word, AArch64::STLXR_dword};
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	323
Craig Topper	e952ad0	2013-07-15 07:22:00 +0000	[diff] [blame]	324	const unsigned LoadOps, StoreOps;
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	325	if (Ord == Acquire \|\| Ord == AcquireRelease \|\| Ord == SequentiallyConsistent)
				326	LoadOps = LoadAcqs;
				327	else
				328	LoadOps = LoadBares;
				329
				330	if (Ord == Release \|\| Ord == AcquireRelease \|\| Ord == SequentiallyConsistent)
				331	StoreOps = StoreRels;
				332	else
				333	StoreOps = StoreBares;
				334
				335	assert(isPowerOf2_32(Size) && Size <= 8 &&
				336	"unsupported size for atomic binary op!");
				337
				338	LdrOpc = LoadOps[Log2_32(Size)];
				339	StrOpc = StoreOps[Log2_32(Size)];
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	340	}
				341
				342	MachineBasicBlock *
				343	AArch64TargetLowering::emitAtomicBinary(MachineInstr MI, MachineBasicBlock BB,
				344	unsigned Size,
				345	unsigned BinOpcode) const {
				346	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
				347	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				348
				349	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				350	MachineFunction *MF = BB->getParent();
				351	MachineFunction::iterator It = BB;
				352	++It;
				353
				354	unsigned dest = MI->getOperand(0).getReg();
				355	unsigned ptr = MI->getOperand(1).getReg();
				356	unsigned incr = MI->getOperand(2).getReg();
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	357	AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	358	DebugLoc dl = MI->getDebugLoc();
				359
				360	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
				361
				362	unsigned ldrOpc, strOpc;
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	363	getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	364
				365	MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
				366	MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
				367	MF->insert(It, loopMBB);
				368	MF->insert(It, exitMBB);
				369
				370	// Transfer the remainder of BB and its successor edges to exitMBB.
				371	exitMBB->splice(exitMBB->begin(), BB,
				372	llvm::next(MachineBasicBlock::iterator(MI)),
				373	BB->end());
				374	exitMBB->transferSuccessorsAndUpdatePHIs(BB);
				375
				376	const TargetRegisterClass *TRC
				377	= Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
				378	unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
				379
				380	// thisMBB:
				381	// ...
				382	// fallthrough --> loopMBB
				383	BB->addSuccessor(loopMBB);
				384
				385	// loopMBB:
				386	// ldxr dest, ptr
				387	// <binop> scratch, dest, incr
				388	// stxr stxr_status, scratch, ptr
Tim Northover	9fafdf6	2013-02-28 13:52:07 +0000	[diff] [blame]	389	// cbnz stxr_status, loopMBB
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	390	// fallthrough --> exitMBB
				391	BB = loopMBB;
				392	BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
				393	if (BinOpcode) {
				394	// All arithmetic operations we'll be creating are designed to take an extra
				395	// shift or extend operand, which we can conveniently set to zero.
				396
				397	// Operand order needs to go the other way for NAND.
				398	if (BinOpcode == AArch64::BICwww_lsl \|\| BinOpcode == AArch64::BICxxx_lsl)
				399	BuildMI(BB, dl, TII->get(BinOpcode), scratch)
				400	.addReg(incr).addReg(dest).addImm(0);
				401	else
				402	BuildMI(BB, dl, TII->get(BinOpcode), scratch)
				403	.addReg(dest).addReg(incr).addImm(0);
				404	}
				405
				406	// From the stxr, the register is GPR32; from the cmp it's GPR32wsp
				407	unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
				408	MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
				409
				410	BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
Tim Northover	9fafdf6	2013-02-28 13:52:07 +0000	[diff] [blame]	411	BuildMI(BB, dl, TII->get(AArch64::CBNZw))
				412	.addReg(stxr_status).addMBB(loopMBB);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	413
				414	BB->addSuccessor(loopMBB);
				415	BB->addSuccessor(exitMBB);
				416
				417	// exitMBB:
				418	// ...
				419	BB = exitMBB;
				420
				421	MI->eraseFromParent(); // The instruction is gone now.
				422
				423	return BB;
				424	}
				425
				426	MachineBasicBlock *
				427	AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
				428	MachineBasicBlock *BB,
				429	unsigned Size,
				430	unsigned CmpOp,
				431	A64CC::CondCodes Cond) const {
				432	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				433
				434	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				435	MachineFunction *MF = BB->getParent();
				436	MachineFunction::iterator It = BB;
				437	++It;
				438
				439	unsigned dest = MI->getOperand(0).getReg();
				440	unsigned ptr = MI->getOperand(1).getReg();
				441	unsigned incr = MI->getOperand(2).getReg();
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	442	AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
				443
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	444	unsigned oldval = dest;
				445	DebugLoc dl = MI->getDebugLoc();
				446
				447	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
				448	const TargetRegisterClass TRC, TRCsp;
				449	if (Size == 8) {
				450	TRC = &AArch64::GPR64RegClass;
				451	TRCsp = &AArch64::GPR64xspRegClass;
				452	} else {
				453	TRC = &AArch64::GPR32RegClass;
				454	TRCsp = &AArch64::GPR32wspRegClass;
				455	}
				456
				457	unsigned ldrOpc, strOpc;
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	458	getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	459
				460	MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
				461	MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
				462	MF->insert(It, loopMBB);
				463	MF->insert(It, exitMBB);
				464
				465	// Transfer the remainder of BB and its successor edges to exitMBB.
				466	exitMBB->splice(exitMBB->begin(), BB,
				467	llvm::next(MachineBasicBlock::iterator(MI)),
				468	BB->end());
				469	exitMBB->transferSuccessorsAndUpdatePHIs(BB);
				470
				471	unsigned scratch = MRI.createVirtualRegister(TRC);
				472	MRI.constrainRegClass(scratch, TRCsp);
				473
				474	// thisMBB:
				475	// ...
				476	// fallthrough --> loopMBB
				477	BB->addSuccessor(loopMBB);
				478
				479	// loopMBB:
				480	// ldxr dest, ptr
				481	// cmp incr, dest (, sign extend if necessary)
				482	// csel scratch, dest, incr, cond
				483	// stxr stxr_status, scratch, ptr
Tim Northover	9fafdf6	2013-02-28 13:52:07 +0000	[diff] [blame]	484	// cbnz stxr_status, loopMBB
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	485	// fallthrough --> exitMBB
				486	BB = loopMBB;
				487	BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
				488
				489	// Build compare and cmov instructions.
				490	MRI.constrainRegClass(incr, TRCsp);
				491	BuildMI(BB, dl, TII->get(CmpOp))
				492	.addReg(incr).addReg(oldval).addImm(0);
				493
				494	BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
				495	scratch)
				496	.addReg(oldval).addReg(incr).addImm(Cond);
				497
				498	unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
				499	MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
				500
				501	BuildMI(BB, dl, TII->get(strOpc), stxr_status)
				502	.addReg(scratch).addReg(ptr);
Tim Northover	9fafdf6	2013-02-28 13:52:07 +0000	[diff] [blame]	503	BuildMI(BB, dl, TII->get(AArch64::CBNZw))
				504	.addReg(stxr_status).addMBB(loopMBB);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	505
				506	BB->addSuccessor(loopMBB);
				507	BB->addSuccessor(exitMBB);
				508
				509	// exitMBB:
				510	// ...
				511	BB = exitMBB;
				512
				513	MI->eraseFromParent(); // The instruction is gone now.
				514
				515	return BB;
				516	}
				517
				518	MachineBasicBlock *
				519	AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
				520	MachineBasicBlock *BB,
				521	unsigned Size) const {
				522	unsigned dest = MI->getOperand(0).getReg();
				523	unsigned ptr = MI->getOperand(1).getReg();
				524	unsigned oldval = MI->getOperand(2).getReg();
				525	unsigned newval = MI->getOperand(3).getReg();
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	526	AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	527	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				528	DebugLoc dl = MI->getDebugLoc();
				529
				530	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
				531	const TargetRegisterClass *TRCsp;
				532	TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
				533
				534	unsigned ldrOpc, strOpc;
Tim Northover	15410e9	2013-04-08 08:40:41 +0000	[diff] [blame]	535	getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	536
				537	MachineFunction *MF = BB->getParent();
				538	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				539	MachineFunction::iterator It = BB;
				540	++It; // insert the new blocks after the current block
				541
				542	MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
				543	MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
				544	MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
				545	MF->insert(It, loop1MBB);
				546	MF->insert(It, loop2MBB);
				547	MF->insert(It, exitMBB);
				548
				549	// Transfer the remainder of BB and its successor edges to exitMBB.
				550	exitMBB->splice(exitMBB->begin(), BB,
				551	llvm::next(MachineBasicBlock::iterator(MI)),
				552	BB->end());
				553	exitMBB->transferSuccessorsAndUpdatePHIs(BB);
				554
				555	// thisMBB:
				556	// ...
				557	// fallthrough --> loop1MBB
				558	BB->addSuccessor(loop1MBB);
				559
				560	// loop1MBB:
				561	// ldxr dest, [ptr]
				562	// cmp dest, oldval
				563	// b.ne exitMBB
				564	BB = loop1MBB;
				565	BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
				566
				567	unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
				568	MRI.constrainRegClass(dest, TRCsp);
				569	BuildMI(BB, dl, TII->get(CmpOp))
				570	.addReg(dest).addReg(oldval).addImm(0);
				571	BuildMI(BB, dl, TII->get(AArch64::Bcc))
				572	.addImm(A64CC::NE).addMBB(exitMBB);
				573	BB->addSuccessor(loop2MBB);
				574	BB->addSuccessor(exitMBB);
				575
				576	// loop2MBB:
				577	// strex stxr_status, newval, [ptr]
Tim Northover	9fafdf6	2013-02-28 13:52:07 +0000	[diff] [blame]	578	// cbnz stxr_status, loop1MBB
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	579	BB = loop2MBB;
				580	unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
				581	MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
				582
				583	BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
Tim Northover	9fafdf6	2013-02-28 13:52:07 +0000	[diff] [blame]	584	BuildMI(BB, dl, TII->get(AArch64::CBNZw))
				585	.addReg(stxr_status).addMBB(loop1MBB);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	586	BB->addSuccessor(loop1MBB);
				587	BB->addSuccessor(exitMBB);
				588
				589	// exitMBB:
				590	// ...
				591	BB = exitMBB;
				592
				593	MI->eraseFromParent(); // The instruction is gone now.
				594
				595	return BB;
				596	}
				597
				598	MachineBasicBlock *
				599	AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
				600	MachineBasicBlock *MBB) const {
				601	// We materialise the F128CSEL pseudo-instruction using conditional branches
				602	// and loads, giving an instruciton sequence like:
				603	// str q0, [sp]
				604	// b.ne IfTrue
				605	// b Finish
				606	// IfTrue:
				607	// str q1, [sp]
				608	// Finish:
				609	// ldr q0, [sp]
				610	//
				611	// Using virtual registers would probably not be beneficial since COPY
				612	// instructions are expensive for f128 (there's no actual instruction to
				613	// implement them).
				614	//
				615	// An alternative would be to do an integer-CSEL on some address. E.g.:
				616	// mov x0, sp
				617	// add x1, sp, #16
				618	// str q0, [x0]
				619	// str q1, [x1]
				620	// csel x0, x0, x1, ne
				621	// ldr q0, [x0]
				622	//
				623	// It's unclear which approach is actually optimal.
				624	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				625	MachineFunction *MF = MBB->getParent();
				626	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
				627	DebugLoc DL = MI->getDebugLoc();
				628	MachineFunction::iterator It = MBB;
				629	++It;
				630
				631	unsigned DestReg = MI->getOperand(0).getReg();
				632	unsigned IfTrueReg = MI->getOperand(1).getReg();
				633	unsigned IfFalseReg = MI->getOperand(2).getReg();
				634	unsigned CondCode = MI->getOperand(3).getImm();
				635	bool NZCVKilled = MI->getOperand(4).isKill();
				636
				637	MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
				638	MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
				639	MF->insert(It, TrueBB);
				640	MF->insert(It, EndBB);
				641
				642	// Transfer rest of current basic-block to EndBB
				643	EndBB->splice(EndBB->begin(), MBB,
				644	llvm::next(MachineBasicBlock::iterator(MI)),
				645	MBB->end());
				646	EndBB->transferSuccessorsAndUpdatePHIs(MBB);
				647
				648	// We need somewhere to store the f128 value needed.
				649	int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
				650
				651	// [... start of incoming MBB ...]
				652	// str qIFFALSE, [sp]
				653	// b.cc IfTrue
				654	// b Done
				655	BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
				656	.addReg(IfFalseReg)
				657	.addFrameIndex(ScratchFI)
				658	.addImm(0);
				659	BuildMI(MBB, DL, TII->get(AArch64::Bcc))
				660	.addImm(CondCode)
				661	.addMBB(TrueBB);
				662	BuildMI(MBB, DL, TII->get(AArch64::Bimm))
				663	.addMBB(EndBB);
				664	MBB->addSuccessor(TrueBB);
				665	MBB->addSuccessor(EndBB);
				666
				667	// IfTrue:
				668	// str qIFTRUE, [sp]
				669	BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
				670	.addReg(IfTrueReg)
				671	.addFrameIndex(ScratchFI)
				672	.addImm(0);
				673
				674	// Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
				675	// blocks.
				676	TrueBB->addSuccessor(EndBB);
				677
				678	// Done:
				679	// ldr qDEST, [sp]
				680	// [... rest of incoming MBB ...]
				681	if (!NZCVKilled)
				682	EndBB->addLiveIn(AArch64::NZCV);
				683	MachineInstr *StartOfEnd = EndBB->begin();
				684	BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
				685	.addFrameIndex(ScratchFI)
				686	.addImm(0);
				687
				688	MI->eraseFromParent();
				689	return EndBB;
				690	}
				691
				692	MachineBasicBlock *
				693	AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
				694	MachineBasicBlock *MBB) const {
				695	switch (MI->getOpcode()) {
				696	default: llvm_unreachable("Unhandled instruction with custom inserter");
				697	case AArch64::F128CSEL:
				698	return EmitF128CSEL(MI, MBB);
				699	case AArch64::ATOMIC_LOAD_ADD_I8:
				700	return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
				701	case AArch64::ATOMIC_LOAD_ADD_I16:
				702	return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
				703	case AArch64::ATOMIC_LOAD_ADD_I32:
				704	return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
				705	case AArch64::ATOMIC_LOAD_ADD_I64:
				706	return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
				707
				708	case AArch64::ATOMIC_LOAD_SUB_I8:
				709	return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
				710	case AArch64::ATOMIC_LOAD_SUB_I16:
				711	return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
				712	case AArch64::ATOMIC_LOAD_SUB_I32:
				713	return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
				714	case AArch64::ATOMIC_LOAD_SUB_I64:
				715	return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
				716
				717	case AArch64::ATOMIC_LOAD_AND_I8:
				718	return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
				719	case AArch64::ATOMIC_LOAD_AND_I16:
				720	return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
				721	case AArch64::ATOMIC_LOAD_AND_I32:
				722	return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
				723	case AArch64::ATOMIC_LOAD_AND_I64:
				724	return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
				725
				726	case AArch64::ATOMIC_LOAD_OR_I8:
				727	return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
				728	case AArch64::ATOMIC_LOAD_OR_I16:
				729	return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
				730	case AArch64::ATOMIC_LOAD_OR_I32:
				731	return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
				732	case AArch64::ATOMIC_LOAD_OR_I64:
				733	return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
				734
				735	case AArch64::ATOMIC_LOAD_XOR_I8:
				736	return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
				737	case AArch64::ATOMIC_LOAD_XOR_I16:
				738	return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
				739	case AArch64::ATOMIC_LOAD_XOR_I32:
				740	return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
				741	case AArch64::ATOMIC_LOAD_XOR_I64:
				742	return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
				743
				744	case AArch64::ATOMIC_LOAD_NAND_I8:
				745	return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
				746	case AArch64::ATOMIC_LOAD_NAND_I16:
				747	return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
				748	case AArch64::ATOMIC_LOAD_NAND_I32:
				749	return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
				750	case AArch64::ATOMIC_LOAD_NAND_I64:
				751	return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
				752
				753	case AArch64::ATOMIC_LOAD_MIN_I8:
				754	return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
				755	case AArch64::ATOMIC_LOAD_MIN_I16:
				756	return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
				757	case AArch64::ATOMIC_LOAD_MIN_I32:
				758	return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
				759	case AArch64::ATOMIC_LOAD_MIN_I64:
				760	return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
				761
				762	case AArch64::ATOMIC_LOAD_MAX_I8:
				763	return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
				764	case AArch64::ATOMIC_LOAD_MAX_I16:
				765	return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
				766	case AArch64::ATOMIC_LOAD_MAX_I32:
				767	return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
				768	case AArch64::ATOMIC_LOAD_MAX_I64:
				769	return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
				770
				771	case AArch64::ATOMIC_LOAD_UMIN_I8:
				772	return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
				773	case AArch64::ATOMIC_LOAD_UMIN_I16:
				774	return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
				775	case AArch64::ATOMIC_LOAD_UMIN_I32:
				776	return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
				777	case AArch64::ATOMIC_LOAD_UMIN_I64:
				778	return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
				779
				780	case AArch64::ATOMIC_LOAD_UMAX_I8:
				781	return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
				782	case AArch64::ATOMIC_LOAD_UMAX_I16:
				783	return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
				784	case AArch64::ATOMIC_LOAD_UMAX_I32:
				785	return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
				786	case AArch64::ATOMIC_LOAD_UMAX_I64:
				787	return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
				788
				789	case AArch64::ATOMIC_SWAP_I8:
				790	return emitAtomicBinary(MI, MBB, 1, 0);
				791	case AArch64::ATOMIC_SWAP_I16:
				792	return emitAtomicBinary(MI, MBB, 2, 0);
				793	case AArch64::ATOMIC_SWAP_I32:
				794	return emitAtomicBinary(MI, MBB, 4, 0);
				795	case AArch64::ATOMIC_SWAP_I64:
				796	return emitAtomicBinary(MI, MBB, 8, 0);
				797
				798	case AArch64::ATOMIC_CMP_SWAP_I8:
				799	return emitAtomicCmpSwap(MI, MBB, 1);
				800	case AArch64::ATOMIC_CMP_SWAP_I16:
				801	return emitAtomicCmpSwap(MI, MBB, 2);
				802	case AArch64::ATOMIC_CMP_SWAP_I32:
				803	return emitAtomicCmpSwap(MI, MBB, 4);
				804	case AArch64::ATOMIC_CMP_SWAP_I64:
				805	return emitAtomicCmpSwap(MI, MBB, 8);
				806	}
				807	}
				808
				809
				810	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
				811	switch (Opcode) {
				812	case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
				813	case AArch64ISD::Call: return "AArch64ISD::Call";
				814	case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
				815	case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
				816	case AArch64ISD::BFI: return "AArch64ISD::BFI";
				817	case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
				818	case AArch64ISD::Ret: return "AArch64ISD::Ret";
				819	case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
				820	case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
				821	case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
				822	case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
				823	case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
				824	case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
Tim Northover	2dbef34	2013-05-04 16:53:46 +0000	[diff] [blame]	825	case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	826	case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
				827
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	828	case AArch64ISD::NEON_BSL:
				829	return "AArch64ISD::NEON_BSL";
				830	case AArch64ISD::NEON_MOVIMM:
				831	return "AArch64ISD::NEON_MOVIMM";
				832	case AArch64ISD::NEON_MVNIMM:
				833	return "AArch64ISD::NEON_MVNIMM";
				834	case AArch64ISD::NEON_FMOVIMM:
				835	return "AArch64ISD::NEON_FMOVIMM";
				836	case AArch64ISD::NEON_CMP:
				837	return "AArch64ISD::NEON_CMP";
				838	case AArch64ISD::NEON_CMPZ:
				839	return "AArch64ISD::NEON_CMPZ";
				840	case AArch64ISD::NEON_TST:
				841	return "AArch64ISD::NEON_TST";
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	842	case AArch64ISD::NEON_DUPIMM:
				843	return "AArch64ISD::NEON_DUPIMM";
				844	case AArch64ISD::NEON_QSHLs:
				845	return "AArch64ISD::NEON_QSHLs";
				846	case AArch64ISD::NEON_QSHLu:
				847	return "AArch64ISD::NEON_QSHLu";
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	848	default:
				849	return NULL;
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	850	}
				851	}
				852
				853	static const uint16_t AArch64FPRArgRegs[] = {
				854	AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
				855	AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
				856	};
				857	static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
				858
				859	static const uint16_t AArch64ArgRegs[] = {
				860	AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
				861	AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
				862	};
				863	static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
				864
				865	static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
				866	CCValAssign::LocInfo LocInfo,
				867	ISD::ArgFlagsTy ArgFlags, CCState &State) {
				868	// Mark all remaining general purpose registers as allocated. We don't
				869	// backtrack: if (for example) an i128 gets put on the stack, no subsequent
				870	// i64 will go in registers (C.11).
				871	for (unsigned i = 0; i < NumArgRegs; ++i)
				872	State.AllocateReg(AArch64ArgRegs[i]);
				873
				874	return false;
				875	}
				876
				877	#include "AArch64GenCallingConv.inc"
				878
				879	CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
				880
				881	switch(CC) {
				882	default: llvm_unreachable("Unsupported calling convention");
				883	case CallingConv::Fast:
				884	case CallingConv::C:
				885	return CC_A64_APCS;
				886	}
				887	}
				888
				889	void
				890	AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	891	SDLoc DL, SDValue &Chain) const {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	892	MachineFunction &MF = DAG.getMachineFunction();
				893	MachineFrameInfo *MFI = MF.getFrameInfo();
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	894	AArch64MachineFunctionInfo *FuncInfo
				895	= MF.getInfo<AArch64MachineFunctionInfo>();
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	896
				897	SmallVector<SDValue, 8> MemOps;
				898
				899	unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
				900	NumArgRegs);
				901	unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
				902	NumFPRArgRegs);
				903
				904	unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
				905	int GPRIdx = 0;
				906	if (GPRSaveSize != 0) {
				907	GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
				908
				909	SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
				910
				911	for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
				912	unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
				913	SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
				914	SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
				915	MachinePointerInfo::getStack(i * 8),
				916	false, false, 0);
				917	MemOps.push_back(Store);
				918	FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
				919	DAG.getConstant(8, getPointerTy()));
				920	}
				921	}
				922
				923	unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
				924	int FPRIdx = 0;
				925	if (FPRSaveSize != 0) {
				926	FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
				927
				928	SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
				929
				930	for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
				931	unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
				932	&AArch64::FPR128RegClass);
				933	SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
				934	SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
				935	MachinePointerInfo::getStack(i * 16),
				936	false, false, 0);
				937	MemOps.push_back(Store);
				938	FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
				939	DAG.getConstant(16, getPointerTy()));
				940	}
				941	}
				942
				943	int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
				944
				945	FuncInfo->setVariadicStackIdx(StackIdx);
				946	FuncInfo->setVariadicGPRIdx(GPRIdx);
				947	FuncInfo->setVariadicGPRSize(GPRSaveSize);
				948	FuncInfo->setVariadicFPRIdx(FPRIdx);
				949	FuncInfo->setVariadicFPRSize(FPRSaveSize);
				950
				951	if (!MemOps.empty()) {
				952	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
				953	MemOps.size());
				954	}
				955	}
				956
				957
				958	SDValue
				959	AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
				960	CallingConv::ID CallConv, bool isVarArg,
				961	const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	962	SDLoc dl, SelectionDAG &DAG,
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	963	SmallVectorImpl<SDValue> &InVals) const {
				964	MachineFunction &MF = DAG.getMachineFunction();
				965	AArch64MachineFunctionInfo *FuncInfo
				966	= MF.getInfo<AArch64MachineFunctionInfo>();
				967	MachineFrameInfo *MFI = MF.getFrameInfo();
				968	bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
				969
				970	SmallVector<CCValAssign, 16> ArgLocs;
				971	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
				972	getTargetMachine(), ArgLocs, *DAG.getContext());
				973	CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
				974
				975	SmallVector<SDValue, 16> ArgValues;
				976
				977	SDValue ArgValue;
				978	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				979	CCValAssign &VA = ArgLocs[i];
				980	ISD::ArgFlagsTy Flags = Ins[i].Flags;
				981
				982	if (Flags.isByVal()) {
				983	// Byval is used for small structs and HFAs in the PCS, but the system
				984	// should work in a non-compliant manner for larger structs.
				985	EVT PtrTy = getPointerTy();
				986	int Size = Flags.getByValSize();
				987	unsigned NumRegs = (Size + 7) / 8;
				988
				989	unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
				990	VA.getLocMemOffset(),
				991	false);
				992	SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
				993	InVals.push_back(FrameIdxN);
				994
				995	continue;
				996	} else if (VA.isRegLoc()) {
				997	MVT RegVT = VA.getLocVT();
				998	const TargetRegisterClass *RC = getRegClassFor(RegVT);
				999	unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
				1000
				1001	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
				1002	} else { // VA.isRegLoc()
				1003	assert(VA.isMemLoc());
				1004
				1005	int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
				1006	VA.getLocMemOffset(), true);
				1007
				1008	SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
				1009	ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
				1010	MachinePointerInfo::getFixedStack(FI),
				1011	false, false, false, 0);
				1012
				1013
				1014	}
				1015
				1016	switch (VA.getLocInfo()) {
				1017	default: llvm_unreachable("Unknown loc info!");
				1018	case CCValAssign::Full: break;
				1019	case CCValAssign::BCvt:
				1020	ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
				1021	break;
				1022	case CCValAssign::SExt:
				1023	case CCValAssign::ZExt:
				1024	case CCValAssign::AExt: {
				1025	unsigned DestSize = VA.getValVT().getSizeInBits();
				1026	unsigned DestSubReg;
				1027
				1028	switch (DestSize) {
				1029	case 8: DestSubReg = AArch64::sub_8; break;
				1030	case 16: DestSubReg = AArch64::sub_16; break;
				1031	case 32: DestSubReg = AArch64::sub_32; break;
				1032	case 64: DestSubReg = AArch64::sub_64; break;
				1033	default: llvm_unreachable("Unexpected argument promotion");
				1034	}
				1035
				1036	ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
				1037	VA.getValVT(), ArgValue,
				1038	DAG.getTargetConstant(DestSubReg, MVT::i32)),
				1039	0);
				1040	break;
				1041	}
				1042	}
				1043
				1044	InVals.push_back(ArgValue);
				1045	}
				1046
				1047	if (isVarArg)
				1048	SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
				1049
				1050	unsigned StackArgSize = CCInfo.getNextStackOffset();
				1051	if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
				1052	// This is a non-standard ABI so by fiat I say we're allowed to make full
				1053	// use of the stack area to be popped, which must be aligned to 16 bytes in
				1054	// any case:
				1055	StackArgSize = RoundUpToAlignment(StackArgSize, 16);
				1056
				1057	// If we're expected to restore the stack (e.g. fastcc) then we'll be adding
				1058	// a multiple of 16.
				1059	FuncInfo->setArgumentStackToRestore(StackArgSize);
				1060
				1061	// This realignment carries over to the available bytes below. Our own
				1062	// callers will guarantee the space is free by giving an aligned value to
				1063	// CALLSEQ_START.
				1064	}
				1065	// Even if we're not expected to free up the space, it's useful to know how
				1066	// much is there while considering tail calls (because we can reuse it).
				1067	FuncInfo->setBytesInStackArgArea(StackArgSize);
				1068
				1069	return Chain;
				1070	}
				1071
				1072	SDValue
				1073	AArch64TargetLowering::LowerReturn(SDValue Chain,
				1074	CallingConv::ID CallConv, bool isVarArg,
				1075	const SmallVectorImpl<ISD::OutputArg> &Outs,
				1076	const SmallVectorImpl<SDValue> &OutVals,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1077	SDLoc dl, SelectionDAG &DAG) const {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1078	// CCValAssign - represent the assignment of the return value to a location.
				1079	SmallVector<CCValAssign, 16> RVLocs;
				1080
				1081	// CCState - Info about the registers and stack slots.
				1082	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
				1083	getTargetMachine(), RVLocs, *DAG.getContext());
				1084
				1085	// Analyze outgoing return values.
				1086	CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
				1087
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1088	SDValue Flag;
Jakob Stoklund Olesen	dbc8c51	2013-02-05 18:21:49 +0000	[diff] [blame]	1089	SmallVector<SDValue, 4> RetOps(1, Chain);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1090
				1091	for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	1092	// PCS: "If the type, T, of the result of a function is such that
				1093	// void func(T arg) would require that arg be passed as a value in a
				1094	// register (or set of registers) according to the rules in 5.4, then the
				1095	// result is returned in the same registers as would be used for such an
				1096	// argument.
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1097	//
				1098	// Otherwise, the caller shall reserve a block of memory of sufficient
				1099	// size and alignment to hold the result. The address of the memory block
				1100	// shall be passed as an additional argument to the function in x8."
				1101	//
				1102	// This is implemented in two places. The register-return values are dealt
				1103	// with here, more complex returns are passed as an sret parameter, which
				1104	// means we don't have to worry about it during actual return.
				1105	CCValAssign &VA = RVLocs[i];
				1106	assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
				1107
				1108
				1109	SDValue Arg = OutVals[i];
				1110
				1111	// There's no convenient note in the ABI about this as there is for normal
				1112	// arguments, but it says return values are passed in the same registers as
				1113	// an argument would be. I believe that includes the comments about
				1114	// unspecified higher bits, putting the burden of widening on the caller
				1115	// for return values.
				1116	switch (VA.getLocInfo()) {
				1117	default: llvm_unreachable("Unknown loc info");
				1118	case CCValAssign::Full: break;
				1119	case CCValAssign::SExt:
				1120	case CCValAssign::ZExt:
				1121	case CCValAssign::AExt:
				1122	// Floating-point values should only be extended when they're going into
				1123	// memory, which can't happen here so an integer extend is acceptable.
				1124	Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
				1125	break;
				1126	case CCValAssign::BCvt:
				1127	Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
				1128	break;
				1129	}
				1130
				1131	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
				1132	Flag = Chain.getValue(1);
Jakob Stoklund Olesen	dbc8c51	2013-02-05 18:21:49 +0000	[diff] [blame]	1133	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1134	}
				1135
Jakob Stoklund Olesen	dbc8c51	2013-02-05 18:21:49 +0000	[diff] [blame]	1136	RetOps[0] = Chain; // Update chain.
				1137
				1138	// Add the flag if we have it.
				1139	if (Flag.getNode())
				1140	RetOps.push_back(Flag);
				1141
				1142	return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
				1143	&RetOps[0], RetOps.size());
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1144	}
				1145
				1146	SDValue
				1147	AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
				1148	SmallVectorImpl<SDValue> &InVals) const {
				1149	SelectionDAG &DAG = CLI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1150	SDLoc &dl = CLI.DL;
Craig Topper	b94011f	2013-07-14 04:42:23 +0000	[diff] [blame]	1151	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
				1152	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
				1153	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1154	SDValue Chain = CLI.Chain;
				1155	SDValue Callee = CLI.Callee;
				1156	bool &IsTailCall = CLI.IsTailCall;
				1157	CallingConv::ID CallConv = CLI.CallConv;
				1158	bool IsVarArg = CLI.IsVarArg;
				1159
				1160	MachineFunction &MF = DAG.getMachineFunction();
				1161	AArch64MachineFunctionInfo *FuncInfo
				1162	= MF.getInfo<AArch64MachineFunctionInfo>();
				1163	bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
				1164	bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
				1165	bool IsSibCall = false;
				1166
				1167	if (IsTailCall) {
				1168	IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
				1169	IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
				1170	Outs, OutVals, Ins, DAG);
				1171
				1172	// A sibling call is one where we're under the usual C ABI and not planning
				1173	// to change that but can still do a tail call:
				1174	if (!TailCallOpt && IsTailCall)
				1175	IsSibCall = true;
				1176	}
				1177
				1178	SmallVector<CCValAssign, 16> ArgLocs;
				1179	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
				1180	getTargetMachine(), ArgLocs, *DAG.getContext());
				1181	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
				1182
				1183	// On AArch64 (and all other architectures I'm aware of) the most this has to
				1184	// do is adjust the stack pointer.
				1185	unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
				1186	if (IsSibCall) {
				1187	// Since we're not changing the ABI to make this a tail call, the memory
				1188	// operands are already available in the caller's incoming argument space.
				1189	NumBytes = 0;
				1190	}
				1191
				1192	// FPDiff is the byte offset of the call's argument area from the callee's.
				1193	// Stores to callee stack arguments will be placed in FixedStackSlots offset
				1194	// by this amount for a tail call. In a sibling call it must be 0 because the
				1195	// caller will deallocate the entire stack and the callee still expects its
				1196	// arguments to begin at SP+0. Completely unused for non-tail calls.
				1197	int FPDiff = 0;
				1198
				1199	if (IsTailCall && !IsSibCall) {
				1200	unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
				1201
				1202	// FPDiff will be negative if this tail call requires more space than we
				1203	// would automatically have in our incoming argument space. Positive if we
				1204	// can actually shrink the stack.
				1205	FPDiff = NumReusableBytes - NumBytes;
				1206
				1207	// The stack pointer must be 16-byte aligned at all times it's used for a
				1208	// memory operation, which in practice means at all times and in
				1209	// particular across call boundaries. Therefore our own arguments started at
				1210	// a 16-byte aligned SP and the delta applied for the tail call should
				1211	// satisfy the same constraint.
				1212	assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
				1213	}
				1214
				1215	if (!IsSibCall)
Andrew Trick	ad6d08a	2013-05-29 22:03:55 +0000	[diff] [blame]	1216	Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
				1217	dl);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1218
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	1219	SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
				1220	getPointerTy());
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1221
				1222	SmallVector<SDValue, 8> MemOpChains;
				1223	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
				1224
				1225	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1226	CCValAssign &VA = ArgLocs[i];
				1227	ISD::ArgFlagsTy Flags = Outs[i].Flags;
				1228	SDValue Arg = OutVals[i];
				1229
				1230	// Callee does the actual widening, so all extensions just use an implicit
				1231	// definition of the rest of the Loc. Aesthetically, this would be nicer as
				1232	// an ANY_EXTEND, but that isn't valid for floating-point types and this
				1233	// alternative works on integer types too.
				1234	switch (VA.getLocInfo()) {
				1235	default: llvm_unreachable("Unknown loc info!");
				1236	case CCValAssign::Full: break;
				1237	case CCValAssign::SExt:
				1238	case CCValAssign::ZExt:
				1239	case CCValAssign::AExt: {
				1240	unsigned SrcSize = VA.getValVT().getSizeInBits();
				1241	unsigned SrcSubReg;
				1242
				1243	switch (SrcSize) {
				1244	case 8: SrcSubReg = AArch64::sub_8; break;
				1245	case 16: SrcSubReg = AArch64::sub_16; break;
				1246	case 32: SrcSubReg = AArch64::sub_32; break;
				1247	case 64: SrcSubReg = AArch64::sub_64; break;
				1248	default: llvm_unreachable("Unexpected argument promotion");
				1249	}
				1250
				1251	Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
				1252	VA.getLocVT(),
				1253	DAG.getUNDEF(VA.getLocVT()),
				1254	Arg,
				1255	DAG.getTargetConstant(SrcSubReg, MVT::i32)),
				1256	0);
				1257
				1258	break;
				1259	}
				1260	case CCValAssign::BCvt:
				1261	Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
				1262	break;
				1263	}
				1264
				1265	if (VA.isRegLoc()) {
				1266	// A normal register (sub-) argument. For now we just note it down because
				1267	// we want to copy things into registers as late as possible to avoid
				1268	// register-pressure (and possibly worse).
				1269	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1270	continue;
				1271	}
				1272
				1273	assert(VA.isMemLoc() && "unexpected argument location");
				1274
				1275	SDValue DstAddr;
				1276	MachinePointerInfo DstInfo;
				1277	if (IsTailCall) {
				1278	uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
				1279	VA.getLocVT().getSizeInBits();
				1280	OpSize = (OpSize + 7) / 8;
				1281	int32_t Offset = VA.getLocMemOffset() + FPDiff;
				1282	int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
				1283
				1284	DstAddr = DAG.getFrameIndex(FI, getPointerTy());
				1285	DstInfo = MachinePointerInfo::getFixedStack(FI);
				1286
				1287	// Make sure any stack arguments overlapping with where we're storing are
				1288	// loaded before this eventual operation. Otherwise they'll be clobbered.
				1289	Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
				1290	} else {
				1291	SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
				1292
				1293	DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
				1294	DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
				1295	}
				1296
				1297	if (Flags.isByVal()) {
				1298	SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
				1299	SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
				1300	Flags.getByValAlign(),
				1301	/isVolatile = / false,
				1302	/alwaysInline = / false,
				1303	DstInfo, MachinePointerInfo(0));
				1304	MemOpChains.push_back(Cpy);
				1305	} else {
				1306	// Normal stack argument, put it where it's needed.
				1307	SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
				1308	false, false, 0);
				1309	MemOpChains.push_back(Store);
				1310	}
				1311	}
				1312
				1313	// The loads and stores generated above shouldn't clash with each
				1314	// other. Combining them with this TokenFactor notes that fact for the rest of
				1315	// the backend.
				1316	if (!MemOpChains.empty())
				1317	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
				1318	&MemOpChains[0], MemOpChains.size());
				1319
				1320	// Most of the rest of the instructions need to be glued together; we don't
				1321	// want assignments to actual registers used by a call to be rearranged by a
				1322	// well-meaning scheduler.
				1323	SDValue InFlag;
				1324
				1325	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1326	Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
				1327	RegsToPass[i].second, InFlag);
				1328	InFlag = Chain.getValue(1);
				1329	}
				1330
				1331	// The linker is responsible for inserting veneers when necessary to put a
				1332	// function call destination in range, so we don't need to bother with a
				1333	// wrapper here.
				1334	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1335	const GlobalValue *GV = G->getGlobal();
				1336	Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
				1337	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
				1338	const char *Sym = S->getSymbol();
				1339	Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
				1340	}
				1341
				1342	// We don't usually want to end the call-sequence here because we would tidy
				1343	// the frame up after the call, however in the ABI-changing tail-call case
				1344	// we've carefully laid out the parameters so that when sp is reset they'll be
				1345	// in the correct location.
				1346	if (IsTailCall && !IsSibCall) {
				1347	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
Andrew Trick	ad6d08a	2013-05-29 22:03:55 +0000	[diff] [blame]	1348	DAG.getIntPtrConstant(0, true), InFlag, dl);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1349	InFlag = Chain.getValue(1);
				1350	}
				1351
				1352	// We produce the following DAG scheme for the actual call instruction:
				1353	// (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
				1354	//
				1355	// Most arguments aren't going to be used and just keep the values live as
				1356	// far as LLVM is concerned. It's expected to be selected as simply "bl
				1357	// callee" (for a direct, non-tail call).
				1358	std::vector<SDValue> Ops;
				1359	Ops.push_back(Chain);
				1360	Ops.push_back(Callee);
				1361
				1362	if (IsTailCall) {
				1363	// Each tail call may have to adjust the stack by a different amount, so
				1364	// this information must travel along with the operation for eventual
				1365	// consumption by emitEpilogue.
				1366	Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
				1367	}
				1368
				1369	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1370	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1371	RegsToPass[i].second.getValueType()));
				1372
				1373
				1374	// Add a register mask operand representing the call-preserved registers. This
				1375	// is used later in codegen to constrain register-allocation.
				1376	const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
				1377	const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
				1378	assert(Mask && "Missing call preserved mask for calling convention");
				1379	Ops.push_back(DAG.getRegisterMask(Mask));
				1380
				1381	// If we needed glue, put it in as the last argument.
				1382	if (InFlag.getNode())
				1383	Ops.push_back(InFlag);
				1384
				1385	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
				1386
				1387	if (IsTailCall) {
				1388	return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
				1389	}
				1390
				1391	Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
				1392	InFlag = Chain.getValue(1);
				1393
				1394	// Now we can reclaim the stack, just as well do it before working out where
				1395	// our return value is.
				1396	if (!IsSibCall) {
				1397	uint64_t CalleePopBytes
				1398	= DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
				1399
				1400	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
				1401	DAG.getIntPtrConstant(CalleePopBytes, true),
Andrew Trick	ad6d08a	2013-05-29 22:03:55 +0000	[diff] [blame]	1402	InFlag, dl);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1403	InFlag = Chain.getValue(1);
				1404	}
				1405
				1406	return LowerCallResult(Chain, InFlag, CallConv,
				1407	IsVarArg, Ins, dl, DAG, InVals);
				1408	}
				1409
				1410	SDValue
				1411	AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
				1412	CallingConv::ID CallConv, bool IsVarArg,
				1413	const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1414	SDLoc dl, SelectionDAG &DAG,
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1415	SmallVectorImpl<SDValue> &InVals) const {
				1416	// Assign locations to each value returned by this call.
				1417	SmallVector<CCValAssign, 16> RVLocs;
				1418	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
				1419	getTargetMachine(), RVLocs, *DAG.getContext());
				1420	CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
				1421
				1422	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1423	CCValAssign VA = RVLocs[i];
				1424
				1425	// Return values that are too big to fit into registers should use an sret
				1426	// pointer, so this can be a lot simpler than the main argument code.
				1427	assert(VA.isRegLoc() && "Memory locations not expected for call return");
				1428
				1429	SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
				1430	InFlag);
				1431	Chain = Val.getValue(1);
				1432	InFlag = Val.getValue(2);
				1433
				1434	switch (VA.getLocInfo()) {
				1435	default: llvm_unreachable("Unknown loc info!");
				1436	case CCValAssign::Full: break;
				1437	case CCValAssign::BCvt:
				1438	Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
				1439	break;
				1440	case CCValAssign::ZExt:
				1441	case CCValAssign::SExt:
				1442	case CCValAssign::AExt:
				1443	// Floating-point arguments only get extended/truncated if they're going
				1444	// in memory, so using the integer operation is acceptable here.
				1445	Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
				1446	break;
				1447	}
				1448
				1449	InVals.push_back(Val);
				1450	}
				1451
				1452	return Chain;
				1453	}
				1454
				1455	bool
				1456	AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
				1457	CallingConv::ID CalleeCC,
				1458	bool IsVarArg,
				1459	bool IsCalleeStructRet,
				1460	bool IsCallerStructRet,
				1461	const SmallVectorImpl<ISD::OutputArg> &Outs,
				1462	const SmallVectorImpl<SDValue> &OutVals,
				1463	const SmallVectorImpl<ISD::InputArg> &Ins,
				1464	SelectionDAG& DAG) const {
				1465
				1466	// For CallingConv::C this function knows whether the ABI needs
				1467	// changing. That's not true for other conventions so they will have to opt in
				1468	// manually.
				1469	if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
				1470	return false;
				1471
				1472	const MachineFunction &MF = DAG.getMachineFunction();
				1473	const Function *CallerF = MF.getFunction();
				1474	CallingConv::ID CallerCC = CallerF->getCallingConv();
				1475	bool CCMatch = CallerCC == CalleeCC;
				1476
				1477	// Byval parameters hand the function a pointer directly into the stack area
				1478	// we want to reuse during a tail call. Working around this is possible (see
				1479	// X86) but less efficient and uglier in LowerCall.
				1480	for (Function::const_arg_iterator i = CallerF->arg_begin(),
				1481	e = CallerF->arg_end(); i != e; ++i)
				1482	if (i->hasByValAttr())
				1483	return false;
				1484
				1485	if (getTargetMachine().Options.GuaranteedTailCallOpt) {
				1486	if (IsTailCallConvention(CalleeCC) && CCMatch)
				1487	return true;
				1488	return false;
				1489	}
				1490
				1491	// Now we search for cases where we can use a tail call without changing the
				1492	// ABI. Sibcall is used in some places (particularly gcc) to refer to this
				1493	// concept.
				1494
				1495	// I want anyone implementing a new calling convention to think long and hard
				1496	// about this assert.
				1497	assert((!IsVarArg \|\| CalleeCC == CallingConv::C)
				1498	&& "Unexpected variadic calling convention");
				1499
				1500	if (IsVarArg && !Outs.empty()) {
				1501	// At least two cases here: if caller is fastcc then we can't have any
				1502	// memory arguments (we'd be expected to clean up the stack afterwards). If
				1503	// caller is C then we could potentially use its argument area.
				1504
				1505	// FIXME: for now we take the most conservative of these in both cases:
				1506	// disallow all variadic memory operands.
				1507	SmallVector<CCValAssign, 16> ArgLocs;
				1508	CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
				1509	getTargetMachine(), ArgLocs, *DAG.getContext());
				1510
				1511	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
				1512	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
				1513	if (!ArgLocs[i].isRegLoc())
				1514	return false;
				1515	}
				1516
				1517	// If the calling conventions do not match, then we'd better make sure the
				1518	// results are returned in the same way as what the caller expects.
				1519	if (!CCMatch) {
				1520	SmallVector<CCValAssign, 16> RVLocs1;
				1521	CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
				1522	getTargetMachine(), RVLocs1, *DAG.getContext());
				1523	CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
				1524
				1525	SmallVector<CCValAssign, 16> RVLocs2;
				1526	CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
				1527	getTargetMachine(), RVLocs2, *DAG.getContext());
				1528	CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
				1529
				1530	if (RVLocs1.size() != RVLocs2.size())
				1531	return false;
				1532	for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
				1533	if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
				1534	return false;
				1535	if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
				1536	return false;
				1537	if (RVLocs1[i].isRegLoc()) {
				1538	if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
				1539	return false;
				1540	} else {
				1541	if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
				1542	return false;
				1543	}
				1544	}
				1545	}
				1546
				1547	// Nothing more to check if the callee is taking no arguments
				1548	if (Outs.empty())
				1549	return true;
				1550
				1551	SmallVector<CCValAssign, 16> ArgLocs;
				1552	CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
				1553	getTargetMachine(), ArgLocs, *DAG.getContext());
				1554
				1555	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
				1556
				1557	const AArch64MachineFunctionInfo *FuncInfo
				1558	= MF.getInfo<AArch64MachineFunctionInfo>();
				1559
				1560	// If the stack arguments for this call would fit into our own save area then
				1561	// the call can be made tail.
				1562	return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
				1563	}
				1564
				1565	bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
				1566	bool TailCallOpt) const {
				1567	return CallCC == CallingConv::Fast && TailCallOpt;
				1568	}
				1569
				1570	bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
				1571	return CallCC == CallingConv::Fast;
				1572	}
				1573
				1574	SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
				1575	SelectionDAG &DAG,
				1576	MachineFrameInfo *MFI,
				1577	int ClobberedFI) const {
				1578	SmallVector<SDValue, 8> ArgChains;
				1579	int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
				1580	int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
				1581
				1582	// Include the original chain at the beginning of the list. When this is
				1583	// used by target LowerCall hooks, this helps legalize find the
				1584	// CALLSEQ_BEGIN node.
				1585	ArgChains.push_back(Chain);
				1586
				1587	// Add a chain value for each stack argument corresponding
				1588	for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
				1589	UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
				1590	if (LoadSDNode L = dyn_cast<LoadSDNode>(U))
				1591	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
				1592	if (FI->getIndex() < 0) {
				1593	int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
				1594	int64_t InLastByte = InFirstByte;
				1595	InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
				1596
				1597	if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) \|\|
				1598	(FirstByte <= InFirstByte && InFirstByte <= LastByte))
				1599	ArgChains.push_back(SDValue(L, 1));
				1600	}
				1601
				1602	// Build a tokenfactor for all the chains.
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1603	return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1604	&ArgChains[0], ArgChains.size());
				1605	}
				1606
				1607	static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
				1608	switch (CC) {
				1609	case ISD::SETEQ: return A64CC::EQ;
				1610	case ISD::SETGT: return A64CC::GT;
				1611	case ISD::SETGE: return A64CC::GE;
				1612	case ISD::SETLT: return A64CC::LT;
				1613	case ISD::SETLE: return A64CC::LE;
				1614	case ISD::SETNE: return A64CC::NE;
				1615	case ISD::SETUGT: return A64CC::HI;
				1616	case ISD::SETUGE: return A64CC::HS;
				1617	case ISD::SETULT: return A64CC::LO;
				1618	case ISD::SETULE: return A64CC::LS;
				1619	default: llvm_unreachable("Unexpected condition code");
				1620	}
				1621	}
				1622
				1623	bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
				1624	// icmp is implemented using adds/subs immediate, which take an unsigned
				1625	// 12-bit immediate, optionally shifted left by 12 bits.
				1626
				1627	// Symmetric by using adds/subs
				1628	if (Val < 0)
				1629	Val = -Val;
				1630
				1631	return (Val & ~0xfff) == 0 \|\| (Val & ~0xfff000) == 0;
				1632	}
				1633
				1634	SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
				1635	ISD::CondCode CC, SDValue &A64cc,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1636	SelectionDAG &DAG, SDLoc &dl) const {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1637	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
				1638	int64_t C = 0;
				1639	EVT VT = RHSC->getValueType(0);
				1640	bool knownInvalid = false;
				1641
				1642	// I'm not convinced the rest of LLVM handles these edge cases properly, but
				1643	// we can at least get it right.
				1644	if (isSignedIntSetCC(CC)) {
				1645	C = RHSC->getSExtValue();
				1646	} else if (RHSC->getZExtValue() > INT64_MAX) {
				1647	// A 64-bit constant not representable by a signed 64-bit integer is far
				1648	// too big to fit into a SUBS immediate anyway.
				1649	knownInvalid = true;
				1650	} else {
				1651	C = RHSC->getZExtValue();
				1652	}
				1653
				1654	if (!knownInvalid && !isLegalICmpImmediate(C)) {
				1655	// Constant does not fit, try adjusting it by one?
				1656	switch (CC) {
				1657	default: break;
				1658	case ISD::SETLT:
				1659	case ISD::SETGE:
				1660	if (isLegalICmpImmediate(C-1)) {
				1661	CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
				1662	RHS = DAG.getConstant(C-1, VT);
				1663	}
				1664	break;
				1665	case ISD::SETULT:
				1666	case ISD::SETUGE:
				1667	if (isLegalICmpImmediate(C-1)) {
				1668	CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
				1669	RHS = DAG.getConstant(C-1, VT);
				1670	}
				1671	break;
				1672	case ISD::SETLE:
				1673	case ISD::SETGT:
				1674	if (isLegalICmpImmediate(C+1)) {
				1675	CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
				1676	RHS = DAG.getConstant(C+1, VT);
				1677	}
				1678	break;
				1679	case ISD::SETULE:
				1680	case ISD::SETUGT:
				1681	if (isLegalICmpImmediate(C+1)) {
				1682	CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
				1683	RHS = DAG.getConstant(C+1, VT);
				1684	}
				1685	break;
				1686	}
				1687	}
				1688	}
				1689
				1690	A64CC::CondCodes CondCode = IntCCToA64CC(CC);
				1691	A64cc = DAG.getConstant(CondCode, MVT::i32);
				1692	return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
				1693	DAG.getCondCode(CC));
				1694	}
				1695
				1696	static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
				1697	A64CC::CondCodes &Alternative) {
				1698	A64CC::CondCodes CondCode = A64CC::Invalid;
				1699	Alternative = A64CC::Invalid;
				1700
				1701	switch (CC) {
				1702	default: llvm_unreachable("Unknown FP condition!");
				1703	case ISD::SETEQ:
				1704	case ISD::SETOEQ: CondCode = A64CC::EQ; break;
				1705	case ISD::SETGT:
				1706	case ISD::SETOGT: CondCode = A64CC::GT; break;
				1707	case ISD::SETGE:
				1708	case ISD::SETOGE: CondCode = A64CC::GE; break;
				1709	case ISD::SETOLT: CondCode = A64CC::MI; break;
				1710	case ISD::SETOLE: CondCode = A64CC::LS; break;
				1711	case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
				1712	case ISD::SETO: CondCode = A64CC::VC; break;
				1713	case ISD::SETUO: CondCode = A64CC::VS; break;
				1714	case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
				1715	case ISD::SETUGT: CondCode = A64CC::HI; break;
				1716	case ISD::SETUGE: CondCode = A64CC::PL; break;
				1717	case ISD::SETLT:
				1718	case ISD::SETULT: CondCode = A64CC::LT; break;
				1719	case ISD::SETLE:
				1720	case ISD::SETULE: CondCode = A64CC::LE; break;
				1721	case ISD::SETNE:
				1722	case ISD::SETUNE: CondCode = A64CC::NE; break;
				1723	}
				1724	return CondCode;
				1725	}
				1726
				1727	SDValue
				1728	AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1729	SDLoc DL(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1730	EVT PtrVT = getPointerTy();
				1731	const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
				1732
Tim Northover	9fc1cdd	2013-05-04 16:53:53 +0000	[diff] [blame]	1733	switch(getTargetMachine().getCodeModel()) {
				1734	case CodeModel::Small:
				1735	// The most efficient code is PC-relative anyway for the small memory model,
				1736	// so we don't need to worry about relocation model.
				1737	return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
				1738	DAG.getTargetBlockAddress(BA, PtrVT, 0,
				1739	AArch64II::MO_NO_FLAG),
				1740	DAG.getTargetBlockAddress(BA, PtrVT, 0,
				1741	AArch64II::MO_LO12),
				1742	DAG.getConstant(/Alignment=/ 4, MVT::i32));
				1743	case CodeModel::Large:
				1744	return DAG.getNode(
				1745	AArch64ISD::WrapperLarge, DL, PtrVT,
				1746	DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
				1747	DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
				1748	DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
				1749	DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
				1750	default:
				1751	llvm_unreachable("Only small and large code models supported now");
				1752	}
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1753	}
				1754
				1755
				1756	// (BRCOND chain, val, dest)
				1757	SDValue
				1758	AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1759	SDLoc dl(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1760	SDValue Chain = Op.getOperand(0);
				1761	SDValue TheBit = Op.getOperand(1);
				1762	SDValue DestBB = Op.getOperand(2);
				1763
				1764	// AArch64 BooleanContents is the default UndefinedBooleanContent, which means
				1765	// that as the consumer we are responsible for ignoring rubbish in higher
				1766	// bits.
				1767	TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
				1768	DAG.getConstant(1, MVT::i32));
				1769
				1770	SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
				1771	DAG.getConstant(0, TheBit.getValueType()),
				1772	DAG.getCondCode(ISD::SETNE));
				1773
				1774	return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
				1775	A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
				1776	DestBB);
				1777	}
				1778
				1779	// (BR_CC chain, condcode, lhs, rhs, dest)
				1780	SDValue
				1781	AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1782	SDLoc dl(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1783	SDValue Chain = Op.getOperand(0);
				1784	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
				1785	SDValue LHS = Op.getOperand(2);
				1786	SDValue RHS = Op.getOperand(3);
				1787	SDValue DestBB = Op.getOperand(4);
				1788
				1789	if (LHS.getValueType() == MVT::f128) {
				1790	// f128 comparisons are lowered to runtime calls by a routine which sets
				1791	// LHS, RHS and CC appropriately for the rest of this function to continue.
				1792	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
				1793
				1794	// If softenSetCCOperands returned a scalar, we need to compare the result
				1795	// against zero to select between true and false values.
				1796	if (RHS.getNode() == 0) {
				1797	RHS = DAG.getConstant(0, LHS.getValueType());
				1798	CC = ISD::SETNE;
				1799	}
				1800	}
				1801
				1802	if (LHS.getValueType().isInteger()) {
				1803	SDValue A64cc;
				1804
				1805	// Integers are handled in a separate function because the combinations of
				1806	// immediates and tests can get hairy and we may want to fiddle things.
				1807	SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
				1808
				1809	return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
				1810	Chain, CmpOp, A64cc, DestBB);
				1811	}
				1812
				1813	// Note that some LLVM floating-point CondCodes can't be lowered to a single
				1814	// conditional branch, hence FPCCToA64CC can set a second test, where either
				1815	// passing is sufficient.
				1816	A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
				1817	CondCode = FPCCToA64CC(CC, Alternative);
				1818	SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
				1819	SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
				1820	DAG.getCondCode(CC));
				1821	SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
				1822	Chain, SetCC, A64cc, DestBB);
				1823
				1824	if (Alternative != A64CC::Invalid) {
				1825	A64cc = DAG.getConstant(Alternative, MVT::i32);
				1826	A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
				1827	A64BR_CC, SetCC, A64cc, DestBB);
				1828
				1829	}
				1830
				1831	return A64BR_CC;
				1832	}
				1833
				1834	SDValue
				1835	AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
				1836	RTLIB::Libcall Call) const {
				1837	ArgListTy Args;
				1838	ArgListEntry Entry;
				1839	for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
				1840	EVT ArgVT = Op.getOperand(i).getValueType();
				1841	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
				1842	Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
				1843	Entry.isSExt = false;
				1844	Entry.isZExt = false;
				1845	Args.push_back(Entry);
				1846	}
				1847	SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
				1848
				1849	Type RetTy = Op.getValueType().getTypeForEVT(DAG.getContext());
				1850
				1851	// By default, the input chain to this libcall is the entry node of the
				1852	// function. If the libcall is going to be emitted as a tail call then
				1853	// isUsedByReturnOnly will change it to the right chain if the return
				1854	// node which is being folded has a non-entry input chain.
				1855	SDValue InChain = DAG.getEntryNode();
				1856
				1857	// isTailCall may be true since the callee does not reference caller stack
				1858	// frame. Check if it's in the right position.
				1859	SDValue TCChain = InChain;
				1860	bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
				1861	if (isTailCall)
				1862	InChain = TCChain;
				1863
				1864	TargetLowering::
				1865	CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
				1866	0, getLibcallCallingConv(Call), isTailCall,
				1867	/doesNotReturn=/false, /isReturnValueUsed=/true,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1868	Callee, Args, DAG, SDLoc(Op));
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1869	std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
				1870
				1871	if (!CallInfo.second.getNode())
				1872	// It's a tailcall, return the chain (which is the DAG root).
				1873	return DAG.getRoot();
				1874
				1875	return CallInfo.first;
				1876	}
				1877
				1878	SDValue
				1879	AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
				1880	if (Op.getOperand(0).getValueType() != MVT::f128) {
				1881	// It's legal except when f128 is involved
				1882	return Op;
				1883	}
				1884
				1885	RTLIB::Libcall LC;
				1886	LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
				1887
				1888	SDValue SrcVal = Op.getOperand(0);
				1889	return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
Michael Gottesman	7a80172	2013-08-13 17:54:56 +0000	[diff] [blame]	1890	/isSigned/ false, SDLoc(Op)).first;
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1891	}
				1892
				1893	SDValue
				1894	AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
				1895	assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
				1896
				1897	RTLIB::Libcall LC;
				1898	LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
				1899
				1900	return LowerF128ToCall(Op, DAG, LC);
				1901	}
				1902
				1903	SDValue
				1904	AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
				1905	bool IsSigned) const {
				1906	if (Op.getOperand(0).getValueType() != MVT::f128) {
				1907	// It's legal except when f128 is involved
				1908	return Op;
				1909	}
				1910
				1911	RTLIB::Libcall LC;
				1912	if (IsSigned)
				1913	LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
				1914	else
				1915	LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
				1916
				1917	return LowerF128ToCall(Op, DAG, LC);
				1918	}
				1919
				1920	SDValue
Tim Northover	2dbef34	2013-05-04 16:53:46 +0000	[diff] [blame]	1921	AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
				1922	SelectionDAG &DAG) const {
				1923	assert(getTargetMachine().getCodeModel() == CodeModel::Large);
				1924	assert(getTargetMachine().getRelocationModel() == Reloc::Static);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1925
Tim Northover	2dbef34	2013-05-04 16:53:46 +0000	[diff] [blame]	1926	EVT PtrVT = getPointerTy();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1927	SDLoc dl(Op);
Tim Northover	2dbef34	2013-05-04 16:53:46 +0000	[diff] [blame]	1928	const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
				1929	const GlobalValue *GV = GN->getGlobal();
				1930
				1931	SDValue GlobalAddr = DAG.getNode(
				1932	AArch64ISD::WrapperLarge, dl, PtrVT,
				1933	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
				1934	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
				1935	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
				1936	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
				1937
				1938	if (GN->getOffset() != 0)
				1939	return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
				1940	DAG.getConstant(GN->getOffset(), PtrVT));
				1941
				1942	return GlobalAddr;
				1943	}
				1944
				1945	SDValue
				1946	AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
				1947	SelectionDAG &DAG) const {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1948	assert(getTargetMachine().getCodeModel() == CodeModel::Small);
				1949
				1950	EVT PtrVT = getPointerTy();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1951	SDLoc dl(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1952	const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
				1953	const GlobalValue *GV = GN->getGlobal();
				1954	unsigned Alignment = GV->getAlignment();
Tim Northover	228d9d3	2013-02-06 16:43:33 +0000	[diff] [blame]	1955	Reloc::Model RelocM = getTargetMachine().getRelocationModel();
Tim Northover	c3c5c09	2013-02-28 14:36:31 +0000	[diff] [blame]	1956	if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
				1957	// Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
				1958	// to zero when they remain undefined. In PIC mode the GOT can take care of
				1959	// this, but in absolute mode we use a constant pool load.
Tim Northover	3533ad6b	2013-02-15 09:33:43 +0000	[diff] [blame]	1960	SDValue PoolAddr;
				1961	PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
				1962	DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
				1963	AArch64II::MO_NO_FLAG),
				1964	DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
				1965	AArch64II::MO_LO12),
				1966	DAG.getConstant(8, MVT::i32));
Tim Northover	b9d4fd2	2013-02-28 14:36:24 +0000	[diff] [blame]	1967	SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
				1968	MachinePointerInfo::getConstantPool(),
				1969	/isVolatile=/ false,
				1970	/isNonTemporal=/ true,
				1971	/isInvariant=/ true, 8);
				1972	if (GN->getOffset() != 0)
				1973	return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
				1974	DAG.getConstant(GN->getOffset(), PtrVT));
				1975
				1976	return GlobalAddr;
Tim Northover	228d9d3	2013-02-06 16:43:33 +0000	[diff] [blame]	1977	}
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1978
				1979	if (Alignment == 0) {
				1980	const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	1981	if (GVPtrTy->getElementType()->isSized()) {
				1982	Alignment
				1983	= getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
				1984	} else {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1985	// Be conservative if we can't guess, not that it really matters:
				1986	// functions and labels aren't valid for loads, and the methods used to
				1987	// actually calculate an address work with any alignment.
				1988	Alignment = 1;
				1989	}
				1990	}
				1991
				1992	unsigned char HiFixup, LoFixup;
Bill Wendling	496dc33	2013-06-07 05:00:11 +0000	[diff] [blame]	1993	bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	1994
				1995	if (UseGOT) {
				1996	HiFixup = AArch64II::MO_GOT;
				1997	LoFixup = AArch64II::MO_GOT_LO12;
				1998	Alignment = 8;
				1999	} else {
				2000	HiFixup = AArch64II::MO_NO_FLAG;
				2001	LoFixup = AArch64II::MO_LO12;
				2002	}
				2003
				2004	// AArch64's small model demands the following sequence:
				2005	// ADRP x0, somewhere
				2006	// ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
				2007	SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
				2008	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
				2009	HiFixup),
				2010	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
				2011	LoFixup),
				2012	DAG.getConstant(Alignment, MVT::i32));
				2013
				2014	if (UseGOT) {
				2015	GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
				2016	GlobalRef);
				2017	}
				2018
				2019	if (GN->getOffset() != 0)
				2020	return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
				2021	DAG.getConstant(GN->getOffset(), PtrVT));
				2022
				2023	return GlobalRef;
				2024	}
				2025
Tim Northover	2dbef34	2013-05-04 16:53:46 +0000	[diff] [blame]	2026	SDValue
				2027	AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
				2028	SelectionDAG &DAG) const {
				2029	// TableGen doesn't have easy access to the CodeModel or RelocationModel, so
				2030	// we make those distinctions here.
				2031
				2032	switch (getTargetMachine().getCodeModel()) {
				2033	case CodeModel::Small:
				2034	return LowerGlobalAddressELFSmall(Op, DAG);
				2035	case CodeModel::Large:
				2036	return LowerGlobalAddressELFLarge(Op, DAG);
				2037	default:
				2038	llvm_unreachable("Only small and large code models supported now");
				2039	}
				2040	}
				2041
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2042	SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
				2043	SDValue DescAddr,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2044	SDLoc DL,
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2045	SelectionDAG &DAG) const {
				2046	EVT PtrVT = getPointerTy();
				2047
				2048	// The function we need to call is simply the first entry in the GOT for this
				2049	// descriptor, load it in preparation.
				2050	SDValue Func, Chain;
				2051	Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
				2052	DescAddr);
				2053
				2054	// The function takes only one argument: the address of the descriptor itself
				2055	// in X0.
				2056	SDValue Glue;
				2057	Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
				2058	Glue = Chain.getValue(1);
				2059
				2060	// Finally, there's a special calling-convention which means that the lookup
				2061	// must preserve all registers (except X0, obviously).
				2062	const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
				2063	const AArch64RegisterInfo *A64RI
				2064	= static_cast<const AArch64RegisterInfo *>(TRI);
				2065	const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
				2066
				2067	// We're now ready to populate the argument list, as with a normal call:
				2068	std::vector<SDValue> Ops;
				2069	Ops.push_back(Chain);
				2070	Ops.push_back(Func);
				2071	Ops.push_back(SymAddr);
				2072	Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
				2073	Ops.push_back(DAG.getRegisterMask(Mask));
				2074	Ops.push_back(Glue);
				2075
				2076	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	2077	Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
				2078	Ops.size());
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2079	Glue = Chain.getValue(1);
				2080
				2081	// After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
				2082	// back to the generic handling code.
				2083	return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
				2084	}
				2085
				2086	SDValue
				2087	AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
				2088	SelectionDAG &DAG) const {
Bill Wendling	496dc33	2013-06-07 05:00:11 +0000	[diff] [blame]	2089	assert(getSubtarget()->isTargetELF() &&
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2090	"TLS not implemented for non-ELF targets");
Tim Northover	85dcbde	2013-05-04 16:54:11 +0000	[diff] [blame]	2091	assert(getTargetMachine().getCodeModel() == CodeModel::Small
				2092	&& "TLS only supported in small memory model");
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2093	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
				2094
				2095	TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
				2096
				2097	SDValue TPOff;
				2098	EVT PtrVT = getPointerTy();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2099	SDLoc DL(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2100	const GlobalValue *GV = GA->getGlobal();
				2101
				2102	SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
				2103
				2104	if (Model == TLSModel::InitialExec) {
				2105	TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
				2106	DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
				2107	AArch64II::MO_GOTTPREL),
				2108	DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
				2109	AArch64II::MO_GOTTPREL_LO12),
				2110	DAG.getConstant(8, MVT::i32));
				2111	TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
				2112	TPOff);
				2113	} else if (Model == TLSModel::LocalExec) {
				2114	SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
				2115	AArch64II::MO_TPREL_G1);
				2116	SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
				2117	AArch64II::MO_TPREL_G0_NC);
				2118
				2119	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
Tim Northover	caaf238	2013-07-25 16:03:54 +0000	[diff] [blame]	2120	DAG.getTargetConstant(1, MVT::i32)), 0);
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	2121	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
				2122	TPOff, LoVar,
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2123	DAG.getTargetConstant(0, MVT::i32)), 0);
				2124	} else if (Model == TLSModel::GeneralDynamic) {
				2125	// Accesses used in this sequence go via the TLS descriptor which lives in
				2126	// the GOT. Prepare an address we can use to handle this.
				2127	SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
				2128	AArch64II::MO_TLSDESC);
				2129	SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
				2130	AArch64II::MO_TLSDESC_LO12);
				2131	SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	2132	HiDesc, LoDesc,
				2133	DAG.getConstant(8, MVT::i32));
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2134	SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
				2135
				2136	TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
				2137	} else if (Model == TLSModel::LocalDynamic) {
				2138	// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
				2139	// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
				2140	// the beginning of the module's TLS region, followed by a DTPREL offset
				2141	// calculation.
				2142
				2143	// These accesses will need deduplicating if there's more than one.
				2144	AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
				2145	.getInfo<AArch64MachineFunctionInfo>();
				2146	MFI->incNumLocalDynamicTLSAccesses();
				2147
				2148
				2149	// Get the location of _TLS_MODULE_BASE_:
				2150	SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
				2151	AArch64II::MO_TLSDESC);
				2152	SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
				2153	AArch64II::MO_TLSDESC_LO12);
				2154	SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	2155	HiDesc, LoDesc,
				2156	DAG.getConstant(8, MVT::i32));
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2157	SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
				2158
				2159	ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
				2160
				2161	// Get the variable's offset from _TLS_MODULE_BASE_
				2162	SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
				2163	AArch64II::MO_DTPREL_G1);
				2164	SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
				2165	AArch64II::MO_DTPREL_G0_NC);
				2166
				2167	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
				2168	DAG.getTargetConstant(0, MVT::i32)), 0);
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	2169	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
				2170	TPOff, LoVar,
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2171	DAG.getTargetConstant(0, MVT::i32)), 0);
				2172	} else
				2173	llvm_unreachable("Unsupported TLS access model");
				2174
				2175
				2176	return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
				2177	}
				2178
				2179	SDValue
				2180	AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
				2181	bool IsSigned) const {
				2182	if (Op.getValueType() != MVT::f128) {
				2183	// Legal for everything except f128.
				2184	return Op;
				2185	}
				2186
				2187	RTLIB::Libcall LC;
				2188	if (IsSigned)
				2189	LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
				2190	else
				2191	LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
				2192
				2193	return LowerF128ToCall(Op, DAG, LC);
				2194	}
				2195
				2196
				2197	SDValue
				2198	AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
				2199	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2200	SDLoc dl(JT);
Tim Northover	8ff187d	2013-05-04 16:54:00 +0000	[diff] [blame]	2201	EVT PtrVT = getPointerTy();
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2202
				2203	// When compiling PIC, jump tables get put in the code section so a static
				2204	// relocation-style is acceptable for both cases.
Tim Northover	8ff187d	2013-05-04 16:54:00 +0000	[diff] [blame]	2205	switch (getTargetMachine().getCodeModel()) {
				2206	case CodeModel::Small:
				2207	return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
				2208	DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
				2209	DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
				2210	AArch64II::MO_LO12),
				2211	DAG.getConstant(1, MVT::i32));
				2212	case CodeModel::Large:
				2213	return DAG.getNode(
				2214	AArch64ISD::WrapperLarge, dl, PtrVT,
				2215	DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
				2216	DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
				2217	DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
				2218	DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
				2219	default:
				2220	llvm_unreachable("Only small and large code models supported now");
				2221	}
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2222	}
				2223
				2224	// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
				2225	SDValue
				2226	AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2227	SDLoc dl(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2228	SDValue LHS = Op.getOperand(0);
				2229	SDValue RHS = Op.getOperand(1);
				2230	SDValue IfTrue = Op.getOperand(2);
				2231	SDValue IfFalse = Op.getOperand(3);
				2232	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
				2233
				2234	if (LHS.getValueType() == MVT::f128) {
				2235	// f128 comparisons are lowered to libcalls, but slot in nicely here
				2236	// afterwards.
				2237	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
				2238
				2239	// If softenSetCCOperands returned a scalar, we need to compare the result
				2240	// against zero to select between true and false values.
				2241	if (RHS.getNode() == 0) {
				2242	RHS = DAG.getConstant(0, LHS.getValueType());
				2243	CC = ISD::SETNE;
				2244	}
				2245	}
				2246
				2247	if (LHS.getValueType().isInteger()) {
				2248	SDValue A64cc;
				2249
				2250	// Integers are handled in a separate function because the combinations of
				2251	// immediates and tests can get hairy and we may want to fiddle things.
				2252	SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
				2253
				2254	return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
				2255	CmpOp, IfTrue, IfFalse, A64cc);
				2256	}
				2257
				2258	// Note that some LLVM floating-point CondCodes can't be lowered to a single
				2259	// conditional branch, hence FPCCToA64CC can set a second test, where either
				2260	// passing is sufficient.
				2261	A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
				2262	CondCode = FPCCToA64CC(CC, Alternative);
				2263	SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
				2264	SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
				2265	DAG.getCondCode(CC));
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	2266	SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
				2267	Op.getValueType(),
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2268	SetCC, IfTrue, IfFalse, A64cc);
				2269
				2270	if (Alternative != A64CC::Invalid) {
				2271	A64cc = DAG.getConstant(Alternative, MVT::i32);
				2272	A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
				2273	SetCC, IfTrue, A64SELECT_CC, A64cc);
				2274
				2275	}
				2276
				2277	return A64SELECT_CC;
				2278	}
				2279
				2280	// (SELECT testbit, iftrue, iffalse)
				2281	SDValue
				2282	AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2283	SDLoc dl(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2284	SDValue TheBit = Op.getOperand(0);
				2285	SDValue IfTrue = Op.getOperand(1);
				2286	SDValue IfFalse = Op.getOperand(2);
				2287
				2288	// AArch64 BooleanContents is the default UndefinedBooleanContent, which means
				2289	// that as the consumer we are responsible for ignoring rubbish in higher
				2290	// bits.
				2291	TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
				2292	DAG.getConstant(1, MVT::i32));
				2293	SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
				2294	DAG.getConstant(0, TheBit.getValueType()),
				2295	DAG.getCondCode(ISD::SETNE));
				2296
				2297	return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
				2298	A64CMP, IfTrue, IfFalse,
				2299	DAG.getConstant(A64CC::NE, MVT::i32));
				2300	}
				2301
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	2302	static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
				2303	SDLoc DL(Op);
				2304	SDValue LHS = Op.getOperand(0);
				2305	SDValue RHS = Op.getOperand(1);
				2306	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
				2307	EVT VT = Op.getValueType();
				2308	bool Invert = false;
				2309	SDValue Op0, Op1;
				2310	unsigned Opcode;
				2311
				2312	if (LHS.getValueType().isInteger()) {
				2313
				2314	// Attempt to use Vector Integer Compare Mask Test instruction.
				2315	// TST = icmp ne (and (op0, op1), zero).
				2316	if (CC == ISD::SETNE) {
				2317	if (((LHS.getOpcode() == ISD::AND) &&
				2318	ISD::isBuildVectorAllZeros(RHS.getNode())) \|\|
				2319	((RHS.getOpcode() == ISD::AND) &&
				2320	ISD::isBuildVectorAllZeros(LHS.getNode()))) {
				2321
				2322	SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
				2323	SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
				2324	SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
				2325	return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
				2326	}
				2327	}
				2328
				2329	// Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
				2330	// Note: Compare against Zero does not support unsigned predicates.
				2331	if ((ISD::isBuildVectorAllZeros(RHS.getNode()) \|\|
				2332	ISD::isBuildVectorAllZeros(LHS.getNode())) &&
				2333	!isUnsignedIntSetCC(CC)) {
				2334
				2335	// If LHS is the zero value, swap operands and CondCode.
				2336	if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
				2337	CC = getSetCCSwappedOperands(CC);
				2338	Op0 = RHS;
				2339	} else
				2340	Op0 = LHS;
				2341
				2342	// Ensure valid CondCode for Compare Mask against Zero instruction:
				2343	// EQ, GE, GT, LE, LT.
				2344	if (ISD::SETNE == CC) {
				2345	Invert = true;
				2346	CC = ISD::SETEQ;
				2347	}
				2348
				2349	// Using constant type to differentiate integer and FP compares with zero.
				2350	Op1 = DAG.getConstant(0, MVT::i32);
				2351	Opcode = AArch64ISD::NEON_CMPZ;
				2352
				2353	} else {
				2354	// Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
				2355	// Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
				2356	bool Swap = false;
				2357	switch (CC) {
				2358	default:
				2359	llvm_unreachable("Illegal integer comparison.");
				2360	case ISD::SETEQ:
				2361	case ISD::SETGT:
				2362	case ISD::SETGE:
				2363	case ISD::SETUGT:
				2364	case ISD::SETUGE:
				2365	break;
				2366	case ISD::SETNE:
				2367	Invert = true;
				2368	CC = ISD::SETEQ;
				2369	break;
				2370	case ISD::SETULT:
				2371	case ISD::SETULE:
				2372	case ISD::SETLT:
				2373	case ISD::SETLE:
				2374	Swap = true;
				2375	CC = getSetCCSwappedOperands(CC);
				2376	}
				2377
				2378	if (Swap)
				2379	std::swap(LHS, RHS);
				2380
				2381	Opcode = AArch64ISD::NEON_CMP;
				2382	Op0 = LHS;
				2383	Op1 = RHS;
				2384	}
				2385
				2386	// Generate Compare Mask instr or Compare Mask against Zero instr.
				2387	SDValue NeonCmp =
				2388	DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
				2389
				2390	if (Invert)
				2391	NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
				2392
				2393	return NeonCmp;
				2394	}
				2395
				2396	// Now handle Floating Point cases.
				2397	// Attempt to use Vector Floating Point Compare Mask against Zero instruction.
				2398	if (ISD::isBuildVectorAllZeros(RHS.getNode()) \|\|
				2399	ISD::isBuildVectorAllZeros(LHS.getNode())) {
				2400
				2401	// If LHS is the zero value, swap operands and CondCode.
				2402	if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
				2403	CC = getSetCCSwappedOperands(CC);
				2404	Op0 = RHS;
				2405	} else
				2406	Op0 = LHS;
				2407
				2408	// Using constant type to differentiate integer and FP compares with zero.
				2409	Op1 = DAG.getConstantFP(0, MVT::f32);
				2410	Opcode = AArch64ISD::NEON_CMPZ;
				2411	} else {
				2412	// Attempt to use Vector Floating Point Compare Mask instruction.
				2413	Op0 = LHS;
				2414	Op1 = RHS;
				2415	Opcode = AArch64ISD::NEON_CMP;
				2416	}
				2417
				2418	SDValue NeonCmpAlt;
				2419	// Some register compares have to be implemented with swapped CC and operands,
				2420	// e.g.: OLT implemented as OGT with swapped operands.
				2421	bool SwapIfRegArgs = false;
				2422
				2423	// Ensure valid CondCode for FP Compare Mask against Zero instruction:
				2424	// EQ, GE, GT, LE, LT.
				2425	// And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
				2426	switch (CC) {
				2427	default:
				2428	llvm_unreachable("Illegal FP comparison");
				2429	case ISD::SETUNE:
				2430	case ISD::SETNE:
				2431	Invert = true; // Fallthrough
				2432	case ISD::SETOEQ:
				2433	case ISD::SETEQ:
				2434	CC = ISD::SETEQ;
				2435	break;
				2436	case ISD::SETOLT:
				2437	case ISD::SETLT:
				2438	CC = ISD::SETLT;
				2439	SwapIfRegArgs = true;
				2440	break;
				2441	case ISD::SETOGT:
				2442	case ISD::SETGT:
				2443	CC = ISD::SETGT;
				2444	break;
				2445	case ISD::SETOLE:
				2446	case ISD::SETLE:
				2447	CC = ISD::SETLE;
				2448	SwapIfRegArgs = true;
				2449	break;
				2450	case ISD::SETOGE:
				2451	case ISD::SETGE:
				2452	CC = ISD::SETGE;
				2453	break;
				2454	case ISD::SETUGE:
				2455	Invert = true;
				2456	CC = ISD::SETLT;
				2457	SwapIfRegArgs = true;
				2458	break;
				2459	case ISD::SETULE:
				2460	Invert = true;
				2461	CC = ISD::SETGT;
				2462	break;
				2463	case ISD::SETUGT:
				2464	Invert = true;
				2465	CC = ISD::SETLE;
				2466	SwapIfRegArgs = true;
				2467	break;
				2468	case ISD::SETULT:
				2469	Invert = true;
				2470	CC = ISD::SETGE;
				2471	break;
				2472	case ISD::SETUEQ:
				2473	Invert = true; // Fallthrough
				2474	case ISD::SETONE:
				2475	// Expand this to (OGT \|OLT).
				2476	NeonCmpAlt =
				2477	DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
				2478	CC = ISD::SETLT;
				2479	SwapIfRegArgs = true;
				2480	break;
				2481	case ISD::SETUO:
				2482	Invert = true; // Fallthrough
				2483	case ISD::SETO:
				2484	// Expand this to (OGE \| OLT).
				2485	NeonCmpAlt =
				2486	DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
				2487	CC = ISD::SETLT;
				2488	SwapIfRegArgs = true;
				2489	break;
				2490	}
				2491
				2492	if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
				2493	CC = getSetCCSwappedOperands(CC);
				2494	std::swap(Op0, Op1);
				2495	}
				2496
				2497	// Generate FP Compare Mask instr or FP Compare Mask against Zero instr
				2498	SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
				2499
				2500	if (NeonCmpAlt.getNode())
				2501	NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
				2502
				2503	if (Invert)
				2504	NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
				2505
				2506	return NeonCmp;
				2507	}
				2508
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2509	// (SETCC lhs, rhs, condcode)
				2510	SDValue
				2511	AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2512	SDLoc dl(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2513	SDValue LHS = Op.getOperand(0);
				2514	SDValue RHS = Op.getOperand(1);
				2515	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
				2516	EVT VT = Op.getValueType();
				2517
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	2518	if (VT.isVector())
				2519	return LowerVectorSETCC(Op, DAG);
				2520
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2521	if (LHS.getValueType() == MVT::f128) {
				2522	// f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
				2523	// for the rest of the function (some i32 or i64 values).
				2524	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
				2525
				2526	// If softenSetCCOperands returned a scalar, use it.
				2527	if (RHS.getNode() == 0) {
				2528	assert(LHS.getValueType() == Op.getValueType() &&
				2529	"Unexpected setcc expansion!");
				2530	return LHS;
				2531	}
				2532	}
				2533
				2534	if (LHS.getValueType().isInteger()) {
				2535	SDValue A64cc;
				2536
				2537	// Integers are handled in a separate function because the combinations of
				2538	// immediates and tests can get hairy and we may want to fiddle things.
				2539	SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
				2540
				2541	return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
				2542	CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
				2543	A64cc);
				2544	}
				2545
				2546	// Note that some LLVM floating-point CondCodes can't be lowered to a single
				2547	// conditional branch, hence FPCCToA64CC can set a second test, where either
				2548	// passing is sufficient.
				2549	A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
				2550	CondCode = FPCCToA64CC(CC, Alternative);
				2551	SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
				2552	SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
				2553	DAG.getCondCode(CC));
				2554	SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
				2555	CmpOp, DAG.getConstant(1, VT),
				2556	DAG.getConstant(0, VT), A64cc);
				2557
				2558	if (Alternative != A64CC::Invalid) {
				2559	A64cc = DAG.getConstant(Alternative, MVT::i32);
				2560	A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
				2561	DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
				2562	}
				2563
				2564	return A64SELECT_CC;
				2565	}
				2566
				2567	SDValue
				2568	AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
				2569	const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
				2570	const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
				2571
				2572	// We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
				2573	// rather than just 8.
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2574	return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2575	Op.getOperand(1), Op.getOperand(2),
				2576	DAG.getConstant(32, MVT::i32), 8, false, false,
				2577	MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
				2578	}
				2579
				2580	SDValue
				2581	AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
				2582	// The layout of the va_list struct is specified in the AArch64 Procedure Call
				2583	// Standard, section B.3.
				2584	MachineFunction &MF = DAG.getMachineFunction();
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	2585	AArch64MachineFunctionInfo *FuncInfo
				2586	= MF.getInfo<AArch64MachineFunctionInfo>();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2587	SDLoc DL(Op);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2588
				2589	SDValue Chain = Op.getOperand(0);
				2590	SDValue VAList = Op.getOperand(1);
				2591	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
				2592	SmallVector<SDValue, 4> MemOps;
				2593
				2594	// void *__stack at offset 0
				2595	SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
				2596	getPointerTy());
				2597	MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
				2598	MachinePointerInfo(SV), false, false, 0));
				2599
				2600	// void *__gr_top at offset 8
				2601	int GPRSize = FuncInfo->getVariadicGPRSize();
				2602	if (GPRSize > 0) {
				2603	SDValue GRTop, GRTopAddr;
				2604
				2605	GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
				2606	DAG.getConstant(8, getPointerTy()));
				2607
				2608	GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
				2609	GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
				2610	DAG.getConstant(GPRSize, getPointerTy()));
				2611
				2612	MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
				2613	MachinePointerInfo(SV, 8),
				2614	false, false, 0));
				2615	}
				2616
				2617	// void *__vr_top at offset 16
				2618	int FPRSize = FuncInfo->getVariadicFPRSize();
				2619	if (FPRSize > 0) {
				2620	SDValue VRTop, VRTopAddr;
				2621	VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
				2622	DAG.getConstant(16, getPointerTy()));
				2623
				2624	VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
				2625	VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
				2626	DAG.getConstant(FPRSize, getPointerTy()));
				2627
				2628	MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
				2629	MachinePointerInfo(SV, 16),
				2630	false, false, 0));
				2631	}
				2632
				2633	// int __gr_offs at offset 24
				2634	SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
				2635	DAG.getConstant(24, getPointerTy()));
				2636	MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
				2637	GROffsAddr, MachinePointerInfo(SV, 24),
				2638	false, false, 0));
				2639
				2640	// int __vr_offs at offset 28
				2641	SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
				2642	DAG.getConstant(28, getPointerTy()));
				2643	MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
				2644	VROffsAddr, MachinePointerInfo(SV, 28),
				2645	false, false, 0));
				2646
				2647	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
				2648	MemOps.size());
				2649	}
				2650
				2651	SDValue
				2652	AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				2653	switch (Op.getOpcode()) {
				2654	default: llvm_unreachable("Don't know how to custom lower this!");
				2655	case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
				2656	case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
				2657	case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
				2658	case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
				2659	case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
				2660	case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
				2661	case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
				2662	case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
				2663	case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
				2664	case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
				2665
				2666	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
				2667	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
				2668	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
				2669	case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
				2670	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
				2671	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
				2672	case ISD::SELECT: return LowerSELECT(Op, DAG);
				2673	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				2674	case ISD::SETCC: return LowerSETCC(Op, DAG);
				2675	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
				2676	case ISD::VASTART: return LowerVASTART(Op, DAG);
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	2677	case ISD::BUILD_VECTOR:
				2678	return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2679	}
				2680
				2681	return SDValue();
				2682	}
				2683
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	2684	/// Check if the specified splat value corresponds to a valid vector constant
				2685	/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
				2686	/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
				2687	/// values.
				2688	static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
				2689	unsigned SplatBitSize, SelectionDAG &DAG,
				2690	bool is128Bits, NeonModImmType type, EVT &VT,
				2691	unsigned &Imm, unsigned &OpCmode) {
				2692	switch (SplatBitSize) {
				2693	default:
				2694	llvm_unreachable("unexpected size for isNeonModifiedImm");
				2695	case 8: {
				2696	if (type != Neon_Mov_Imm)
				2697	return false;
				2698	assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
				2699	// Neon movi per byte: Op=0, Cmode=1110.
				2700	OpCmode = 0xe;
				2701	Imm = SplatBits;
				2702	VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
				2703	break;
				2704	}
				2705	case 16: {
				2706	// Neon move inst per halfword
				2707	VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
				2708	if ((SplatBits & ~0xff) == 0) {
				2709	// Value = 0x00nn is 0x00nn LSL 0
				2710	// movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
				2711	// bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
				2712	// Op=x, Cmode=100y
				2713	Imm = SplatBits;
				2714	OpCmode = 0x8;
				2715	break;
				2716	}
				2717	if ((SplatBits & ~0xff00) == 0) {
				2718	// Value = 0xnn00 is 0x00nn LSL 8
				2719	// movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
				2720	// bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
				2721	// Op=x, Cmode=101x
				2722	Imm = SplatBits >> 8;
				2723	OpCmode = 0xa;
				2724	break;
				2725	}
				2726	// can't handle any other
				2727	return false;
				2728	}
				2729
				2730	case 32: {
				2731	// First the LSL variants (MSL is unusable by some interested instructions).
				2732
				2733	// Neon move instr per word, shift zeros
				2734	VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
				2735	if ((SplatBits & ~0xff) == 0) {
				2736	// Value = 0x000000nn is 0x000000nn LSL 0
				2737	// movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
				2738	// bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
				2739	// Op=x, Cmode=000x
				2740	Imm = SplatBits;
				2741	OpCmode = 0;
				2742	break;
				2743	}
				2744	if ((SplatBits & ~0xff00) == 0) {
				2745	// Value = 0x0000nn00 is 0x000000nn LSL 8
				2746	// movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
				2747	// bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
				2748	// Op=x, Cmode=001x
				2749	Imm = SplatBits >> 8;
				2750	OpCmode = 0x2;
				2751	break;
				2752	}
				2753	if ((SplatBits & ~0xff0000) == 0) {
				2754	// Value = 0x00nn0000 is 0x000000nn LSL 16
				2755	// movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
				2756	// bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
				2757	// Op=x, Cmode=010x
				2758	Imm = SplatBits >> 16;
				2759	OpCmode = 0x4;
				2760	break;
				2761	}
				2762	if ((SplatBits & ~0xff000000) == 0) {
				2763	// Value = 0xnn000000 is 0x000000nn LSL 24
				2764	// movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
				2765	// bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
				2766	// Op=x, Cmode=011x
				2767	Imm = SplatBits >> 24;
				2768	OpCmode = 0x6;
				2769	break;
				2770	}
				2771
				2772	// Now the MSL immediates.
				2773
				2774	// Neon move instr per word, shift ones
				2775	if ((SplatBits & ~0xffff) == 0 &&
				2776	((SplatBits \| SplatUndef) & 0xff) == 0xff) {
				2777	// Value = 0x0000nnff is 0x000000nn MSL 8
				2778	// movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
				2779	// Op=x, Cmode=1100
				2780	Imm = SplatBits >> 8;
				2781	OpCmode = 0xc;
				2782	break;
				2783	}
				2784	if ((SplatBits & ~0xffffff) == 0 &&
				2785	((SplatBits \| SplatUndef) & 0xffff) == 0xffff) {
				2786	// Value = 0x00nnffff is 0x000000nn MSL 16
				2787	// movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
				2788	// Op=x, Cmode=1101
				2789	Imm = SplatBits >> 16;
				2790	OpCmode = 0xd;
				2791	break;
				2792	}
				2793	// can't handle any other
				2794	return false;
				2795	}
				2796
				2797	case 64: {
				2798	if (type != Neon_Mov_Imm)
				2799	return false;
				2800	// Neon move instr bytemask, where each byte is either 0x00 or 0xff.
				2801	// movi Op=1, Cmode=1110.
				2802	OpCmode = 0x1e;
				2803	uint64_t BitMask = 0xff;
				2804	uint64_t Val = 0;
				2805	unsigned ImmMask = 1;
				2806	Imm = 0;
				2807	for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
				2808	if (((SplatBits \| SplatUndef) & BitMask) == BitMask) {
				2809	Val \|= BitMask;
				2810	Imm \|= ImmMask;
				2811	} else if ((SplatBits & BitMask) != 0) {
				2812	return false;
				2813	}
				2814	BitMask <<= 8;
				2815	ImmMask <<= 1;
				2816	}
				2817	SplatBits = Val;
				2818	VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
				2819	break;
				2820	}
				2821	}
				2822
				2823	return true;
				2824	}
				2825
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2826	static SDValue PerformANDCombine(SDNode *N,
				2827	TargetLowering::DAGCombinerInfo &DCI) {
				2828
				2829	SelectionDAG &DAG = DCI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2830	SDLoc DL(N);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2831	EVT VT = N->getValueType(0);
				2832
				2833	// We're looking for an SRA/SHL pair which form an SBFX.
				2834
				2835	if (VT != MVT::i32 && VT != MVT::i64)
				2836	return SDValue();
				2837
				2838	if (!isa<ConstantSDNode>(N->getOperand(1)))
				2839	return SDValue();
				2840
				2841	uint64_t TruncMask = N->getConstantOperandVal(1);
				2842	if (!isMask_64(TruncMask))
				2843	return SDValue();
				2844
				2845	uint64_t Width = CountPopulation_64(TruncMask);
				2846	SDValue Shift = N->getOperand(0);
				2847
				2848	if (Shift.getOpcode() != ISD::SRL)
				2849	return SDValue();
				2850
				2851	if (!isa<ConstantSDNode>(Shift->getOperand(1)))
				2852	return SDValue();
				2853	uint64_t LSB = Shift->getConstantOperandVal(1);
				2854
				2855	if (LSB > VT.getSizeInBits() \|\| Width > VT.getSizeInBits())
				2856	return SDValue();
				2857
				2858	return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
				2859	DAG.getConstant(LSB, MVT::i64),
				2860	DAG.getConstant(LSB + Width - 1, MVT::i64));
				2861	}
				2862
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2863	/// For a true bitfield insert, the bits getting into that contiguous mask
				2864	/// should come from the low part of an existing value: they must be formed from
				2865	/// a compatible SHL operation (unless they're already low). This function
				2866	/// checks that condition and returns the least-significant bit that's
				2867	/// intended. If the operation not a field preparation, -1 is returned.
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2868	static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2869	SDValue &MaskedVal, uint64_t Mask) {
				2870	if (!isShiftedMask_64(Mask))
				2871	return -1;
				2872
				2873	// Now we need to alter MaskedVal so that it is an appropriate input for a BFI
				2874	// instruction. BFI will do a left-shift by LSB before applying the mask we've
				2875	// spotted, so in general we should pre-emptively "undo" that by making sure
				2876	// the incoming bits have had a right-shift applied to them.
				2877	//
				2878	// This right shift, however, will combine with existing left/right shifts. In
				2879	// the simplest case of a completely straight bitfield operation, it will be
				2880	// expected to completely cancel out with an existing SHL. More complicated
				2881	// cases (e.g. bitfield to bitfield copy) may still need a real shift before
				2882	// the BFI.
				2883
Michael J. Spencer	df1ecbd7	2013-05-24 22:23:49 +0000	[diff] [blame]	2884	uint64_t LSB = countTrailingZeros(Mask);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2885	int64_t ShiftRightRequired = LSB;
				2886	if (MaskedVal.getOpcode() == ISD::SHL &&
				2887	isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
				2888	ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
				2889	MaskedVal = MaskedVal.getOperand(0);
				2890	} else if (MaskedVal.getOpcode() == ISD::SRL &&
				2891	isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
				2892	ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
				2893	MaskedVal = MaskedVal.getOperand(0);
				2894	}
				2895
				2896	if (ShiftRightRequired > 0)
				2897	MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
				2898	DAG.getConstant(ShiftRightRequired, MVT::i64));
				2899	else if (ShiftRightRequired < 0) {
				2900	// We could actually end up with a residual left shift, for example with
				2901	// "struc.bitfield = val << 1".
				2902	MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
				2903	DAG.getConstant(-ShiftRightRequired, MVT::i64));
				2904	}
				2905
				2906	return LSB;
				2907	}
				2908
				2909	/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
				2910	/// a mask and an extension. Returns true if a BFI was found and provides
				2911	/// information on its surroundings.
				2912	static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
				2913	bool &Extended) {
				2914	Extended = false;
				2915	if (N.getOpcode() == ISD::ZERO_EXTEND) {
				2916	Extended = true;
				2917	N = N.getOperand(0);
				2918	}
				2919
				2920	if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
				2921	Mask = N->getConstantOperandVal(1);
				2922	N = N.getOperand(0);
				2923	} else {
				2924	// Mask is the whole width.
Benjamin Kramer	a5dce35	2013-02-17 17:55:32 +0000	[diff] [blame]	2925	Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2926	}
				2927
				2928	if (N.getOpcode() == AArch64ISD::BFI) {
				2929	BFI = N;
				2930	return true;
				2931	}
				2932
				2933	return false;
				2934	}
				2935
				2936	/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
				2937	/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
				2938	/// can often be further combined with a larger mask. Ultimately, we want mask
				2939	/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
				2940	static SDValue tryCombineToBFI(SDNode *N,
				2941	TargetLowering::DAGCombinerInfo &DCI,
				2942	const AArch64Subtarget *Subtarget) {
				2943	SelectionDAG &DAG = DCI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	2944	SDLoc DL(N);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	2945	EVT VT = N->getValueType(0);
				2946
				2947	assert(N->getOpcode() == ISD::OR && "Unexpected root");
				2948
				2949	// We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
				2950	// abandon the effort.
				2951	SDValue LHS = N->getOperand(0);
				2952	if (LHS.getOpcode() != ISD::AND)
				2953	return SDValue();
				2954
				2955	uint64_t LHSMask;
				2956	if (isa<ConstantSDNode>(LHS.getOperand(1)))
				2957	LHSMask = LHS->getConstantOperandVal(1);
				2958	else
				2959	return SDValue();
				2960
				2961	// We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
				2962	// is or abandon the effort.
				2963	SDValue RHS = N->getOperand(1);
				2964	if (RHS.getOpcode() != ISD::AND)
				2965	return SDValue();
				2966
				2967	uint64_t RHSMask;
				2968	if (isa<ConstantSDNode>(RHS.getOperand(1)))
				2969	RHSMask = RHS->getConstantOperandVal(1);
				2970	else
				2971	return SDValue();
				2972
				2973	// Can't do anything if the masks are incompatible.
				2974	if (LHSMask & RHSMask)
				2975	return SDValue();
				2976
				2977	// Now we need one of the masks to be a contiguous field. Without loss of
				2978	// generality that should be the RHS one.
				2979	SDValue Bitfield = LHS.getOperand(0);
				2980	if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
				2981	// We know that LHS is a candidate new value, and RHS isn't already a better
				2982	// one.
				2983	std::swap(LHS, RHS);
				2984	std::swap(LHSMask, RHSMask);
				2985	}
				2986
				2987	// We've done our best to put the right operands in the right places, all we
				2988	// can do now is check whether a BFI exists.
				2989	Bitfield = RHS.getOperand(0);
				2990	int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
				2991	if (LSB == -1)
				2992	return SDValue();
				2993
				2994	uint32_t Width = CountPopulation_64(RHSMask);
				2995	assert(Width && "Expected non-zero bitfield width");
				2996
				2997	SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
				2998	LHS.getOperand(0), Bitfield,
				2999	DAG.getConstant(LSB, MVT::i64),
				3000	DAG.getConstant(Width, MVT::i64));
				3001
				3002	// Mask is trivial
Benjamin Kramer	a5dce35	2013-02-17 17:55:32 +0000	[diff] [blame]	3003	if ((LHSMask \| RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3004	return BFI;
				3005
				3006	return DAG.getNode(ISD::AND, DL, VT, BFI,
				3007	DAG.getConstant(LHSMask \| RHSMask, VT));
				3008	}
				3009
				3010	/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
				3011	/// original input. This is surprisingly common because SROA splits things up
				3012	/// into i8 chunks, so the originally detected MaskedBFI may actually only act
				3013	/// on the low (say) byte of a word. This is then orred into the rest of the
				3014	/// word afterwards.
				3015	///
				3016	/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
				3017	///
				3018	/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
				3019	/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
				3020	/// involved.
				3021	static SDValue tryCombineToLargerBFI(SDNode *N,
				3022	TargetLowering::DAGCombinerInfo &DCI,
				3023	const AArch64Subtarget *Subtarget) {
				3024	SelectionDAG &DAG = DCI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	3025	SDLoc DL(N);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3026	EVT VT = N->getValueType(0);
				3027
				3028	// First job is to hunt for a MaskedBFI on either the left or right. Swap
				3029	// operands if it's actually on the right.
				3030	SDValue BFI;
				3031	SDValue PossExtraMask;
				3032	uint64_t ExistingMask = 0;
				3033	bool Extended = false;
				3034	if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
				3035	PossExtraMask = N->getOperand(1);
				3036	else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
				3037	PossExtraMask = N->getOperand(0);
				3038	else
				3039	return SDValue();
				3040
				3041	// We can only combine a BFI with another compatible mask.
				3042	if (PossExtraMask.getOpcode() != ISD::AND \|\|
				3043	!isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
				3044	return SDValue();
				3045
				3046	uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
				3047
				3048	// Masks must be compatible.
				3049	if (ExtraMask & ExistingMask)
				3050	return SDValue();
				3051
				3052	SDValue OldBFIVal = BFI.getOperand(0);
				3053	SDValue NewBFIVal = BFI.getOperand(1);
				3054	if (Extended) {
				3055	// We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
				3056	// 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
				3057	// need to be made compatible.
				3058	assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
				3059	&& "Invalid types for BFI");
				3060	OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
				3061	NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
				3062	}
				3063
				3064	// We need the MaskedBFI to be combined with a mask of the same value.
				3065	if (PossExtraMask.getOperand(0) != OldBFIVal)
				3066	return SDValue();
				3067
				3068	BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
				3069	OldBFIVal, NewBFIVal,
				3070	BFI.getOperand(2), BFI.getOperand(3));
				3071
				3072	// If the masking is trivial, we don't need to create it.
Benjamin Kramer	a5dce35	2013-02-17 17:55:32 +0000	[diff] [blame]	3073	if ((ExtraMask \| ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3074	return BFI;
				3075
				3076	return DAG.getNode(ISD::AND, DL, VT, BFI,
				3077	DAG.getConstant(ExtraMask \| ExistingMask, VT));
				3078	}
				3079
				3080	/// An EXTR instruction is made up of two shifts, ORed together. This helper
				3081	/// searches for and classifies those shifts.
				3082	static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
				3083	bool &FromHi) {
				3084	if (N.getOpcode() == ISD::SHL)
				3085	FromHi = false;
				3086	else if (N.getOpcode() == ISD::SRL)
				3087	FromHi = true;
				3088	else
				3089	return false;
				3090
				3091	if (!isa<ConstantSDNode>(N.getOperand(1)))
				3092	return false;
				3093
				3094	ShiftAmount = N->getConstantOperandVal(1);
				3095	Src = N->getOperand(0);
				3096	return true;
				3097	}
				3098
Joel Jones	440d8e4	2013-02-10 23:56:30 +0000	[diff] [blame]	3099	/// EXTR instruction extracts a contiguous chunk of bits from two existing
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3100	/// registers viewed as a high/low pair. This function looks for the pattern:
				3101	/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
				3102	/// EXTR. Can't quite be done in TableGen because the two immediates aren't
				3103	/// independent.
				3104	static SDValue tryCombineToEXTR(SDNode *N,
				3105	TargetLowering::DAGCombinerInfo &DCI) {
				3106	SelectionDAG &DAG = DCI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	3107	SDLoc DL(N);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3108	EVT VT = N->getValueType(0);
				3109
				3110	assert(N->getOpcode() == ISD::OR && "Unexpected root");
				3111
				3112	if (VT != MVT::i32 && VT != MVT::i64)
				3113	return SDValue();
				3114
				3115	SDValue LHS;
				3116	uint32_t ShiftLHS = 0;
				3117	bool LHSFromHi = 0;
				3118	if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
				3119	return SDValue();
				3120
				3121	SDValue RHS;
				3122	uint32_t ShiftRHS = 0;
				3123	bool RHSFromHi = 0;
				3124	if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
				3125	return SDValue();
				3126
				3127	// If they're both trying to come from the high part of the register, they're
				3128	// not really an EXTR.
				3129	if (LHSFromHi == RHSFromHi)
				3130	return SDValue();
				3131
				3132	if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
				3133	return SDValue();
				3134
				3135	if (LHSFromHi) {
				3136	std::swap(LHS, RHS);
				3137	std::swap(ShiftLHS, ShiftRHS);
				3138	}
				3139
				3140	return DAG.getNode(AArch64ISD::EXTR, DL, VT,
				3141	LHS, RHS,
				3142	DAG.getConstant(ShiftRHS, MVT::i64));
				3143	}
				3144
				3145	/// Target-specific dag combine xforms for ISD::OR
				3146	static SDValue PerformORCombine(SDNode *N,
				3147	TargetLowering::DAGCombinerInfo &DCI,
				3148	const AArch64Subtarget *Subtarget) {
				3149
				3150	SelectionDAG &DAG = DCI.DAG;
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	3151	SDLoc DL(N);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3152	EVT VT = N->getValueType(0);
				3153
				3154	if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
				3155	return SDValue();
				3156
				3157	// Attempt to recognise bitfield-insert operations.
				3158	SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
				3159	if (Res.getNode())
				3160	return Res;
				3161
				3162	// Attempt to combine an existing MaskedBFI operation into one with a larger
				3163	// mask.
				3164	Res = tryCombineToLargerBFI(N, DCI, Subtarget);
				3165	if (Res.getNode())
				3166	return Res;
				3167
				3168	Res = tryCombineToEXTR(N, DCI);
				3169	if (Res.getNode())
				3170	return Res;
				3171
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	3172	if (!Subtarget->hasNEON())
				3173	return SDValue();
				3174
				3175	// Attempt to use vector immediate-form BSL
				3176	// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
				3177
				3178	SDValue N0 = N->getOperand(0);
				3179	if (N0.getOpcode() != ISD::AND)
				3180	return SDValue();
				3181
				3182	SDValue N1 = N->getOperand(1);
				3183	if (N1.getOpcode() != ISD::AND)
				3184	return SDValue();
				3185
				3186	if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
				3187	APInt SplatUndef;
				3188	unsigned SplatBitSize;
				3189	bool HasAnyUndefs;
				3190	BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
				3191	APInt SplatBits0;
				3192	if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
				3193	HasAnyUndefs) &&
				3194	!HasAnyUndefs) {
				3195	BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
				3196	APInt SplatBits1;
				3197	if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
				3198	HasAnyUndefs) &&
				3199	!HasAnyUndefs && SplatBits0 == ~SplatBits1) {
				3200	// Canonicalize the vector type to make instruction selection simpler.
				3201	EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
				3202	SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
				3203	N0->getOperand(1), N0->getOperand(0),
				3204	N1->getOperand(0));
				3205	return DAG.getNode(ISD::BITCAST, DL, VT, Result);
				3206	}
				3207	}
				3208	}
				3209
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3210	return SDValue();
				3211	}
				3212
				3213	/// Target-specific dag combine xforms for ISD::SRA
				3214	static SDValue PerformSRACombine(SDNode *N,
				3215	TargetLowering::DAGCombinerInfo &DCI) {
				3216
				3217	SelectionDAG &DAG = DCI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	3218	SDLoc DL(N);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3219	EVT VT = N->getValueType(0);
				3220
				3221	// We're looking for an SRA/SHL pair which form an SBFX.
				3222
				3223	if (VT != MVT::i32 && VT != MVT::i64)
				3224	return SDValue();
				3225
				3226	if (!isa<ConstantSDNode>(N->getOperand(1)))
				3227	return SDValue();
				3228
				3229	uint64_t ExtraSignBits = N->getConstantOperandVal(1);
				3230	SDValue Shift = N->getOperand(0);
				3231
				3232	if (Shift.getOpcode() != ISD::SHL)
				3233	return SDValue();
				3234
				3235	if (!isa<ConstantSDNode>(Shift->getOperand(1)))
				3236	return SDValue();
				3237
				3238	uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
				3239	uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
				3240	uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
				3241
				3242	if (LSB > VT.getSizeInBits() \|\| Width > VT.getSizeInBits())
				3243	return SDValue();
				3244
				3245	return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
				3246	DAG.getConstant(LSB, MVT::i64),
				3247	DAG.getConstant(LSB + Width - 1, MVT::i64));
				3248	}
				3249
Hao Liu	cd8b02d	2013-08-15 08:26:11 +0000	[diff] [blame]	3250	/// Check if this is a valid build_vector for the immediate operand of
				3251	/// a vector shift operation, where all the elements of the build_vector
				3252	/// must have the same constant integer value.
				3253	static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
				3254	// Ignore bit_converts.
				3255	while (Op.getOpcode() == ISD::BITCAST)
				3256	Op = Op.getOperand(0);
				3257	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
				3258	APInt SplatBits, SplatUndef;
				3259	unsigned SplatBitSize;
				3260	bool HasAnyUndefs;
				3261	if (!BVN \|\| !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
				3262	HasAnyUndefs, ElementBits) \|\|
				3263	SplatBitSize > ElementBits)
				3264	return false;
				3265	Cnt = SplatBits.getSExtValue();
				3266	return true;
				3267	}
				3268
				3269	/// Check if this is a valid build_vector for the immediate operand of
				3270	/// a vector shift left operation. That value must be in the range:
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	3271	/// 0 <= Value < ElementBits
Hao Liu	cd8b02d	2013-08-15 08:26:11 +0000	[diff] [blame]	3272	static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
				3273	assert(VT.isVector() && "vector shift count is not a vector type");
				3274	unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
				3275	if (!getVShiftImm(Op, ElementBits, Cnt))
				3276	return false;
				3277	return (Cnt >= 0 && Cnt < ElementBits);
				3278	}
				3279
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	3280	/// Check if this is a valid build_vector for the immediate operand of a
				3281	/// vector shift right operation. The value must be in the range:
				3282	/// 1 <= Value <= ElementBits
				3283	static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
				3284	assert(VT.isVector() && "vector shift count is not a vector type");
				3285	unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
				3286	if (!getVShiftImm(Op, ElementBits, Cnt))
				3287	return false;
				3288	return (Cnt >= 1 && Cnt <= ElementBits);
				3289	}
				3290
				3291	/// Checks for immediate versions of vector shifts and lowers them.
				3292	static SDValue PerformShiftCombine(SDNode *N,
				3293	TargetLowering::DAGCombinerInfo &DCI,
Hao Liu	cd8b02d	2013-08-15 08:26:11 +0000	[diff] [blame]	3294	const AArch64Subtarget *ST) {
				3295	SelectionDAG &DAG = DCI.DAG;
				3296	EVT VT = N->getValueType(0);
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	3297	if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 \|\| VT == MVT::i64))
				3298	return PerformSRACombine(N, DCI);
Hao Liu	cd8b02d	2013-08-15 08:26:11 +0000	[diff] [blame]	3299
				3300	// Nothing to be done for scalar shifts.
				3301	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
				3302	if (!VT.isVector() \|\| !TLI.isTypeLegal(VT))
				3303	return SDValue();
				3304
				3305	assert(ST->hasNEON() && "unexpected vector shift");
				3306	int64_t Cnt;
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	3307
				3308	switch (N->getOpcode()) {
				3309	default:
				3310	llvm_unreachable("unexpected shift opcode");
				3311
				3312	case ISD::SHL:
				3313	if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
				3314	SDValue RHS =
				3315	DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
				3316	DAG.getConstant(Cnt, MVT::i32));
				3317	return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
				3318	}
				3319	break;
				3320
				3321	case ISD::SRA:
				3322	case ISD::SRL:
				3323	if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
				3324	SDValue RHS =
				3325	DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
				3326	DAG.getConstant(Cnt, MVT::i32));
				3327	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
				3328	}
				3329	break;
				3330	}
				3331
				3332	return SDValue();
				3333	}
				3334
				3335	/// ARM-specific DAG combining for intrinsics.
				3336	static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
				3337	unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
				3338
				3339	switch (IntNo) {
				3340	default:
				3341	// Don't do anything for most intrinsics.
				3342	break;
				3343
				3344	case Intrinsic::arm_neon_vqshifts:
				3345	case Intrinsic::arm_neon_vqshiftu:
				3346	EVT VT = N->getOperand(1).getValueType();
				3347	int64_t Cnt;
				3348	if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
				3349	break;
				3350	unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
				3351	? AArch64ISD::NEON_QSHLs
				3352	: AArch64ISD::NEON_QSHLu;
				3353	return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
				3354	N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
Hao Liu	cd8b02d	2013-08-15 08:26:11 +0000	[diff] [blame]	3355	}
				3356
				3357	return SDValue();
				3358	}
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3359
				3360	SDValue
				3361	AArch64TargetLowering::PerformDAGCombine(SDNode *N,
				3362	DAGCombinerInfo &DCI) const {
				3363	switch (N->getOpcode()) {
				3364	default: break;
				3365	case ISD::AND: return PerformANDCombine(N, DCI);
Bill Wendling	496dc33	2013-06-07 05:00:11 +0000	[diff] [blame]	3366	case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
Hao Liu	d4aede0	2013-09-04 09:28:24 +0000	[diff] [blame]	3367	case ISD::SHL:
				3368	case ISD::SRA:
				3369	case ISD::SRL:
				3370	return PerformShiftCombine(N, DCI, getSubtarget());
				3371	case ISD::INTRINSIC_WO_CHAIN:
				3372	return PerformIntrinsicCombine(N, DCI.DAG);
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3373	}
				3374	return SDValue();
				3375	}
				3376
Stephen Lin	73de7bf	2013-07-09 18:16:56 +0000	[diff] [blame]	3377	bool
				3378	AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
				3379	VT = VT.getScalarType();
				3380
				3381	if (!VT.isSimple())
				3382	return false;
				3383
				3384	switch (VT.getSimpleVT().SimpleTy) {
				3385	case MVT::f16:
				3386	case MVT::f32:
				3387	case MVT::f64:
				3388	return true;
				3389	case MVT::f128:
				3390	return false;
				3391	default:
				3392	break;
				3393	}
				3394
				3395	return false;
				3396	}
				3397
Tim Northover	40e9efd	2013-08-01 09:20:35 +0000	[diff] [blame]	3398	// If this is a case we can't handle, return null and let the default
				3399	// expansion code take care of it.
				3400	SDValue
				3401	AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
				3402	const AArch64Subtarget *ST) const {
				3403
				3404	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
				3405	SDLoc DL(Op);
				3406	EVT VT = Op.getValueType();
				3407
				3408	APInt SplatBits, SplatUndef;
				3409	unsigned SplatBitSize;
				3410	bool HasAnyUndefs;
				3411
				3412	// Note we favor lowering MOVI over MVNI.
				3413	// This has implications on the definition of patterns in TableGen to select
				3414	// BIC immediate instructions but not ORR immediate instructions.
				3415	// If this lowering order is changed, TableGen patterns for BIC immediate and
				3416	// ORR immediate instructions have to be updated.
				3417	if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
				3418	if (SplatBitSize <= 64) {
				3419	// First attempt to use vector immediate-form MOVI
				3420	EVT NeonMovVT;
				3421	unsigned Imm = 0;
				3422	unsigned OpCmode = 0;
				3423
				3424	if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
				3425	SplatBitSize, DAG, VT.is128BitVector(),
				3426	Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
				3427	SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
				3428	SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
				3429
				3430	if (ImmVal.getNode() && OpCmodeVal.getNode()) {
				3431	SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
				3432	ImmVal, OpCmodeVal);
				3433	return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
				3434	}
				3435	}
				3436
				3437	// Then attempt to use vector immediate-form MVNI
				3438	uint64_t NegatedImm = (~SplatBits).getZExtValue();
				3439	if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
				3440	DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
				3441	Imm, OpCmode)) {
				3442	SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
				3443	SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
				3444	if (ImmVal.getNode() && OpCmodeVal.getNode()) {
				3445	SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
				3446	ImmVal, OpCmodeVal);
				3447	return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
				3448	}
				3449	}
				3450
				3451	// Attempt to use vector immediate-form FMOV
				3452	if (((VT == MVT::v2f32 \|\| VT == MVT::v4f32) && SplatBitSize == 32) \|\|
				3453	(VT == MVT::v2f64 && SplatBitSize == 64)) {
				3454	APFloat RealVal(
				3455	SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
				3456	SplatBits);
				3457	uint32_t ImmVal;
				3458	if (A64Imms::isFPImm(RealVal, ImmVal)) {
				3459	SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
				3460	return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
				3461	}
				3462	}
				3463	}
				3464	}
				3465	return SDValue();
				3466	}
				3467
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3468	AArch64TargetLowering::ConstraintType
				3469	AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
				3470	if (Constraint.size() == 1) {
				3471	switch (Constraint[0]) {
				3472	default: break;
				3473	case 'w': // An FP/SIMD vector register
				3474	return C_RegisterClass;
				3475	case 'I': // Constant that can be used with an ADD instruction
				3476	case 'J': // Constant that can be used with a SUB instruction
				3477	case 'K': // Constant that can be used with a 32-bit logical instruction
				3478	case 'L': // Constant that can be used with a 64-bit logical instruction
				3479	case 'M': // Constant that can be used as a 32-bit MOV immediate
				3480	case 'N': // Constant that can be used as a 64-bit MOV immediate
				3481	case 'Y': // Floating point constant zero
				3482	case 'Z': // Integer constant zero
				3483	return C_Other;
				3484	case 'Q': // A memory reference with base register and no offset
				3485	return C_Memory;
				3486	case 'S': // A symbolic address
				3487	return C_Other;
				3488	}
				3489	}
				3490
				3491	// FIXME: Ump, Utf, Usa, Ush
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	3492	// Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
				3493	// whatever they may be
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3494	// Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
				3495	// Usa: An absolute symbolic address
				3496	// Ush: The high part (bits 32:12) of a pc-relative symbolic address
				3497	assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
				3498	&& Constraint != "Ush" && "Unimplemented constraints");
				3499
				3500	return TargetLowering::getConstraintType(Constraint);
				3501	}
				3502
				3503	TargetLowering::ConstraintWeight
				3504	AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
				3505	const char *Constraint) const {
				3506
				3507	llvm_unreachable("Constraint weight unimplemented");
				3508	}
				3509
				3510	void
				3511	AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
				3512	std::string &Constraint,
				3513	std::vector<SDValue> &Ops,
				3514	SelectionDAG &DAG) const {
				3515	SDValue Result(0, 0);
				3516
				3517	// Only length 1 constraints are C_Other.
				3518	if (Constraint.size() != 1) return;
				3519
				3520	// Only C_Other constraints get lowered like this. That means constants for us
				3521	// so return early if there's no hope the constraint can be lowered.
				3522
				3523	switch(Constraint[0]) {
				3524	default: break;
				3525	case 'I': case 'J': case 'K': case 'L':
				3526	case 'M': case 'N': case 'Z': {
				3527	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				3528	if (!C)
				3529	return;
				3530
				3531	uint64_t CVal = C->getZExtValue();
				3532	uint32_t Bits;
				3533
				3534	switch (Constraint[0]) {
				3535	default:
				3536	// FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
				3537	// is a peculiarly useless SUB constraint.
				3538	llvm_unreachable("Unimplemented C_Other constraint");
				3539	case 'I':
				3540	if (CVal <= 0xfff)
				3541	break;
				3542	return;
				3543	case 'K':
				3544	if (A64Imms::isLogicalImm(32, CVal, Bits))
				3545	break;
				3546	return;
				3547	case 'L':
				3548	if (A64Imms::isLogicalImm(64, CVal, Bits))
				3549	break;
				3550	return;
				3551	case 'Z':
				3552	if (CVal == 0)
				3553	break;
				3554	return;
				3555	}
				3556
				3557	Result = DAG.getTargetConstant(CVal, Op.getValueType());
				3558	break;
				3559	}
				3560	case 'S': {
				3561	// An absolute symbolic address or label reference.
				3562	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	3563	Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3564	GA->getValueType(0));
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	3565	} else if (const BlockAddressSDNode *BA
				3566	= dyn_cast<BlockAddressSDNode>(Op)) {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3567	Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
				3568	BA->getValueType(0));
				3569	} else if (const ExternalSymbolSDNode *ES
				3570	= dyn_cast<ExternalSymbolSDNode>(Op)) {
				3571	Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
				3572	ES->getValueType(0));
				3573	} else
				3574	return;
				3575	break;
				3576	}
				3577	case 'Y':
				3578	if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
				3579	if (CFP->isExactlyValue(0.0)) {
				3580	Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
				3581	break;
				3582	}
				3583	}
				3584	return;
				3585	}
				3586
				3587	if (Result.getNode()) {
				3588	Ops.push_back(Result);
				3589	return;
				3590	}
				3591
				3592	// It's an unknown constraint for us. Let generic code have a go.
				3593	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
				3594	}
				3595
				3596	std::pair<unsigned, const TargetRegisterClass*>
Tim Northover	bcaca87	2013-02-05 13:24:56 +0000	[diff] [blame]	3597	AArch64TargetLowering::getRegForInlineAsmConstraint(
				3598	const std::string &Constraint,
Chad Rosier	295bd43	2013-06-22 18:37:38 +0000	[diff] [blame]	3599	MVT VT) const {
Tim Northover	e0e3aef	2013-01-31 12:12:40 +0000	[diff] [blame]	3600	if (Constraint.size() == 1) {
				3601	switch (Constraint[0]) {
				3602	case 'r':
				3603	if (VT.getSizeInBits() <= 32)
				3604	return std::make_pair(0U, &AArch64::GPR32RegClass);
				3605	else if (VT == MVT::i64)
				3606	return std::make_pair(0U, &AArch64::GPR64RegClass);
				3607	break;
				3608	case 'w':
				3609	if (VT == MVT::f16)
				3610	return std::make_pair(0U, &AArch64::FPR16RegClass);
				3611	else if (VT == MVT::f32)
				3612	return std::make_pair(0U, &AArch64::FPR32RegClass);
				3613	else if (VT == MVT::f64)
				3614	return std::make_pair(0U, &AArch64::FPR64RegClass);
				3615	else if (VT.getSizeInBits() == 64)
				3616	return std::make_pair(0U, &AArch64::VPR64RegClass);
				3617	else if (VT == MVT::f128)
				3618	return std::make_pair(0U, &AArch64::FPR128RegClass);
				3619	else if (VT.getSizeInBits() == 128)
				3620	return std::make_pair(0U, &AArch64::VPR128RegClass);
				3621	break;
				3622	}
				3623	}
				3624
				3625	// Use the default implementation in TargetLowering to convert the register
				3626	// constraint into a member of a register class.
				3627	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				3628	}