Blame - llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp - toolchain/llvm-project

blob: 98609760a73af518c258757e0c22e4e9155009cc [file] [log] [blame]

Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	1	//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines an instruction selector for the AArch64 target.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "AArch64TargetMachine.h"
				15	#include "MCTargetDesc/AArch64AddressingModes.h"
				16	#include "llvm/ADT/APSInt.h"
				17	#include "llvm/CodeGen/SelectionDAGISel.h"
				18	#include "llvm/IR/Function.h" // To access function attributes.
				19	#include "llvm/IR/GlobalValue.h"
				20	#include "llvm/IR/Intrinsics.h"
				21	#include "llvm/Support/Debug.h"
				22	#include "llvm/Support/ErrorHandling.h"
				23	#include "llvm/Support/MathExtras.h"
				24	#include "llvm/Support/raw_ostream.h"
				25
				26	using namespace llvm;
				27
				28	#define DEBUG_TYPE "aarch64-isel"
				29
				30	//===--------------------------------------------------------------------===//
				31	/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
				32	/// instructions for SelectionDAG operations.
				33	///
				34	namespace {
				35
				36	class AArch64DAGToDAGISel : public SelectionDAGISel {
				37	AArch64TargetMachine &TM;
				38
				39	/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
				40	/// make the right decision when generating code for different targets.
				41	const AArch64Subtarget *Subtarget;
				42
				43	bool ForCodeSize;
				44
				45	public:
				46	explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
				47	CodeGenOpt::Level OptLevel)
				48	: SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
				49	ForCodeSize(false) {}
				50
				51	const char *getPassName() const override {
				52	return "AArch64 Instruction Selection";
				53	}
				54
				55	bool runOnMachineFunction(MachineFunction &MF) override {
				56	AttributeSet FnAttrs = MF.getFunction()->getAttributes();
				57	ForCodeSize =
				58	FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
				59	Attribute::OptimizeForSize) \|\|
				60	FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
				61	Subtarget = &TM.getSubtarget<AArch64Subtarget>();
				62	return SelectionDAGISel::runOnMachineFunction(MF);
				63	}
				64
				65	SDNode Select(SDNode Node) override;
				66
				67	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
				68	/// inline asm expressions.
				69	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
				70	char ConstraintCode,
				71	std::vector<SDValue> &OutOps) override;
				72
				73	SDNode SelectMLAV64LaneV128(SDNode N);
				74	SDNode SelectMULLV64LaneV128(unsigned IntNo, SDNode N);
				75	bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
				76	bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
				77	bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
				78	bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
				79	return SelectShiftedRegister(N, false, Reg, Shift);
				80	}
				81	bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
				82	return SelectShiftedRegister(N, true, Reg, Shift);
				83	}
				84	bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
				85	return SelectAddrModeIndexed(N, 1, Base, OffImm);
				86	}
				87	bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
				88	return SelectAddrModeIndexed(N, 2, Base, OffImm);
				89	}
				90	bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
				91	return SelectAddrModeIndexed(N, 4, Base, OffImm);
				92	}
				93	bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
				94	return SelectAddrModeIndexed(N, 8, Base, OffImm);
				95	}
				96	bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
				97	return SelectAddrModeIndexed(N, 16, Base, OffImm);
				98	}
				99	bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
				100	return SelectAddrModeUnscaled(N, 1, Base, OffImm);
				101	}
				102	bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
				103	return SelectAddrModeUnscaled(N, 2, Base, OffImm);
				104	}
				105	bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
				106	return SelectAddrModeUnscaled(N, 4, Base, OffImm);
				107	}
				108	bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
				109	return SelectAddrModeUnscaled(N, 8, Base, OffImm);
				110	}
				111	bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
				112	return SelectAddrModeUnscaled(N, 16, Base, OffImm);
				113	}
				114
				115	template<int Width>
				116	bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
				117	SDValue &SignExtend, SDValue &DoShift) {
				118	return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
				119	}
				120
				121	template<int Width>
				122	bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
				123	SDValue &SignExtend, SDValue &DoShift) {
				124	return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
				125	}
				126
				127
				128	/// Form sequences of consecutive 64/128-bit registers for use in NEON
				129	/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
				130	/// between 1 and 4 elements. If it contains a single element that is returned
				131	/// unchanged; otherwise a REG_SEQUENCE value is returned.
				132	SDValue createDTuple(ArrayRef<SDValue> Vecs);
				133	SDValue createQTuple(ArrayRef<SDValue> Vecs);
				134
				135	/// Generic helper for the createDTuple/createQTuple
				136	/// functions. Those should almost always be called instead.
				137	SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
				138	unsigned SubRegs[]);
				139
				140	SDNode SelectTable(SDNode N, unsigned NumVecs, unsigned Opc, bool isExt);
				141
				142	SDNode SelectIndexedLoad(SDNode N, bool &Done);
				143
				144	SDNode SelectLoad(SDNode N, unsigned NumVecs, unsigned Opc,
				145	unsigned SubRegIdx);
				146	SDNode SelectPostLoad(SDNode N, unsigned NumVecs, unsigned Opc,
				147	unsigned SubRegIdx);
				148	SDNode SelectLoadLane(SDNode N, unsigned NumVecs, unsigned Opc);
				149	SDNode SelectPostLoadLane(SDNode N, unsigned NumVecs, unsigned Opc);
				150
				151	SDNode SelectStore(SDNode N, unsigned NumVecs, unsigned Opc);
				152	SDNode SelectPostStore(SDNode N, unsigned NumVecs, unsigned Opc);
				153	SDNode SelectStoreLane(SDNode N, unsigned NumVecs, unsigned Opc);
				154	SDNode SelectPostStoreLane(SDNode N, unsigned NumVecs, unsigned Opc);
				155
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	156	SDNode SelectBitfieldExtractOp(SDNode N);
				157	SDNode SelectBitfieldInsertOp(SDNode N);
				158
				159	SDNode SelectLIBM(SDNode N);
				160
				161	// Include the pieces autogenerated from the target description.
				162	#include "AArch64GenDAGISel.inc"
				163
				164	private:
				165	bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
				166	SDValue &Shift);
				167	bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
				168	SDValue &OffImm);
				169	bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
				170	SDValue &OffImm);
				171	bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
				172	SDValue &Offset, SDValue &SignExtend,
				173	SDValue &DoShift);
				174	bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
				175	SDValue &Offset, SDValue &SignExtend,
				176	SDValue &DoShift);
				177	bool isWorthFolding(SDValue V) const;
				178	bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
				179	SDValue &Offset, SDValue &SignExtend);
				180
				181	template<unsigned RegWidth>
				182	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
				183	return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
				184	}
				185
				186	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
				187	};
				188	} // end anonymous namespace
				189
				190	/// isIntImmediate - This method tests to see if the node is a constant
				191	/// operand. If so Imm will receive the 32-bit value.
				192	static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
				193	if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
				194	Imm = C->getZExtValue();
				195	return true;
				196	}
				197	return false;
				198	}
				199
				200	// isIntImmediate - This method tests to see if a constant operand.
				201	// If so Imm will receive the value.
				202	static bool isIntImmediate(SDValue N, uint64_t &Imm) {
				203	return isIntImmediate(N.getNode(), Imm);
				204	}
				205
				206	// isOpcWithIntImmediate - This method tests to see if the node is a specific
				207	// opcode and that it has a immediate integer right operand.
				208	// If so Imm will receive the 32 bit value.
				209	static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
				210	uint64_t &Imm) {
				211	return N->getOpcode() == Opc &&
				212	isIntImmediate(N->getOperand(1).getNode(), Imm);
				213	}
				214
				215	bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
				216	const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
				217	assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
				218	// Require the address to be in a register. That is safe for all AArch64
				219	// variants and it is hard to do anything much smarter without knowing
				220	// how the operand is used.
				221	OutOps.push_back(Op);
				222	return false;
				223	}
				224
				225	/// SelectArithImmed - Select an immediate value that can be represented as
				226	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
				227	/// Val set to the 12-bit value and Shift set to the shifter operand.
				228	bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
				229	SDValue &Shift) {
				230	// This function is called from the addsub_shifted_imm ComplexPattern,
				231	// which lists [imm] as the list of opcode it's interested in, however
				232	// we still need to check whether the operand is actually an immediate
				233	// here because the ComplexPattern opcode list is only used in
				234	// root-level opcode matching.
				235	if (!isa<ConstantSDNode>(N.getNode()))
				236	return false;
				237
				238	uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
				239	unsigned ShiftAmt;
				240
				241	if (Immed >> 12 == 0) {
				242	ShiftAmt = 0;
				243	} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
				244	ShiftAmt = 12;
				245	Immed = Immed >> 12;
				246	} else
				247	return false;
				248
				249	unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
				250	Val = CurDAG->getTargetConstant(Immed, MVT::i32);
				251	Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
				252	return true;
				253	}
				254
				255	/// SelectNegArithImmed - As above, but negates the value before trying to
				256	/// select it.
				257	bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
				258	SDValue &Shift) {
				259	// This function is called from the addsub_shifted_imm ComplexPattern,
				260	// which lists [imm] as the list of opcode it's interested in, however
				261	// we still need to check whether the operand is actually an immediate
				262	// here because the ComplexPattern opcode list is only used in
				263	// root-level opcode matching.
				264	if (!isa<ConstantSDNode>(N.getNode()))
				265	return false;
				266
				267	// The immediate operand must be a 24-bit zero-extended immediate.
				268	uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
				269
				270	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
				271	// have the opposite effect on the C flag, so this pattern mustn't match under
				272	// those circumstances.
				273	if (Immed == 0)
				274	return false;
				275
				276	if (N.getValueType() == MVT::i32)
				277	Immed = ~((uint32_t)Immed) + 1;
				278	else
				279	Immed = ~Immed + 1ULL;
				280	if (Immed & 0xFFFFFFFFFF000000ULL)
				281	return false;
				282
				283	Immed &= 0xFFFFFFULL;
				284	return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
				285	}
				286
				287	/// getShiftTypeForNode - Translate a shift node to the corresponding
				288	/// ShiftType value.
				289	static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
				290	switch (N.getOpcode()) {
				291	default:
				292	return AArch64_AM::InvalidShiftExtend;
				293	case ISD::SHL:
				294	return AArch64_AM::LSL;
				295	case ISD::SRL:
				296	return AArch64_AM::LSR;
				297	case ISD::SRA:
				298	return AArch64_AM::ASR;
				299	case ISD::ROTR:
				300	return AArch64_AM::ROR;
				301	}
				302	}
				303
				304	/// \brief Determine wether it is worth to fold V into an extended register.
				305	bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
				306	// it hurts if the a value is used at least twice, unless we are optimizing
				307	// for code size.
				308	if (ForCodeSize \|\| V.hasOneUse())
				309	return true;
				310	return false;
				311	}
				312
				313	/// SelectShiftedRegister - Select a "shifted register" operand. If the value
				314	/// is not shifted, set the Shift operand to default of "LSL 0". The logical
				315	/// instructions allow the shifted register to be rotated, but the arithmetic
				316	/// instructions do not. The AllowROR parameter specifies whether ROR is
				317	/// supported.
				318	bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
				319	SDValue &Reg, SDValue &Shift) {
				320	AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
				321	if (ShType == AArch64_AM::InvalidShiftExtend)
				322	return false;
				323	if (!AllowROR && ShType == AArch64_AM::ROR)
				324	return false;
				325
				326	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
				327	unsigned BitSize = N.getValueType().getSizeInBits();
				328	unsigned Val = RHS->getZExtValue() & (BitSize - 1);
				329	unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
				330
				331	Reg = N.getOperand(0);
				332	Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
				333	return isWorthFolding(N);
				334	}
				335
				336	return false;
				337	}
				338
				339	/// getExtendTypeForNode - Translate an extend node to the corresponding
				340	/// ExtendType value.
				341	static AArch64_AM::ShiftExtendType
				342	getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
				343	if (N.getOpcode() == ISD::SIGN_EXTEND \|\|
				344	N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
				345	EVT SrcVT;
				346	if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
				347	SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
				348	else
				349	SrcVT = N.getOperand(0).getValueType();
				350
				351	if (!IsLoadStore && SrcVT == MVT::i8)
				352	return AArch64_AM::SXTB;
				353	else if (!IsLoadStore && SrcVT == MVT::i16)
				354	return AArch64_AM::SXTH;
				355	else if (SrcVT == MVT::i32)
				356	return AArch64_AM::SXTW;
				357	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
				358
				359	return AArch64_AM::InvalidShiftExtend;
				360	} else if (N.getOpcode() == ISD::ZERO_EXTEND \|\|
				361	N.getOpcode() == ISD::ANY_EXTEND) {
				362	EVT SrcVT = N.getOperand(0).getValueType();
				363	if (!IsLoadStore && SrcVT == MVT::i8)
				364	return AArch64_AM::UXTB;
				365	else if (!IsLoadStore && SrcVT == MVT::i16)
				366	return AArch64_AM::UXTH;
				367	else if (SrcVT == MVT::i32)
				368	return AArch64_AM::UXTW;
				369	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
				370
				371	return AArch64_AM::InvalidShiftExtend;
				372	} else if (N.getOpcode() == ISD::AND) {
				373	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
				374	if (!CSD)
				375	return AArch64_AM::InvalidShiftExtend;
				376	uint64_t AndMask = CSD->getZExtValue();
				377
				378	switch (AndMask) {
				379	default:
				380	return AArch64_AM::InvalidShiftExtend;
				381	case 0xFF:
				382	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
				383	case 0xFFFF:
				384	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
				385	case 0xFFFFFFFF:
				386	return AArch64_AM::UXTW;
				387	}
				388	}
				389
				390	return AArch64_AM::InvalidShiftExtend;
				391	}
				392
				393	// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
				394	static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
				395	if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
				396	DL->getOpcode() != AArch64ISD::DUPLANE32)
				397	return false;
				398
				399	SDValue SV = DL->getOperand(0);
				400	if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
				401	return false;
				402
				403	SDValue EV = SV.getOperand(1);
				404	if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
				405	return false;
				406
				407	ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
				408	ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
				409	LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
				410	LaneOp = EV.getOperand(0);
				411
				412	return true;
				413	}
				414
				415	// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
				416	// high lane extract.
				417	static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
				418	SDValue &LaneOp, int &LaneIdx) {
				419
				420	if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
				421	std::swap(Op0, Op1);
				422	if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
				423	return false;
				424	}
				425	StdOp = Op1;
				426	return true;
				427	}
				428
				429	/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
				430	/// is a lane in the upper half of a 128-bit vector. Recognize and select this
				431	/// so that we don't emit unnecessary lane extracts.
				432	SDNode AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode N) {
				433	SDValue Op0 = N->getOperand(0);
				434	SDValue Op1 = N->getOperand(1);
				435	SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
				436	SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
				437	int LaneIdx = -1; // Will hold the lane index.
				438
				439	if (Op1.getOpcode() != ISD::MUL \|\|
				440	!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
				441	LaneIdx)) {
				442	std::swap(Op0, Op1);
				443	if (Op1.getOpcode() != ISD::MUL \|\|
				444	!checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
				445	LaneIdx))
				446	return nullptr;
				447	}
				448
				449	SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
				450
				451	SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
				452
				453	unsigned MLAOpc = ~0U;
				454
				455	switch (N->getSimpleValueType(0).SimpleTy) {
				456	default:
				457	llvm_unreachable("Unrecognized MLA.");
				458	case MVT::v4i16:
				459	MLAOpc = AArch64::MLAv4i16_indexed;
				460	break;
				461	case MVT::v8i16:
				462	MLAOpc = AArch64::MLAv8i16_indexed;
				463	break;
				464	case MVT::v2i32:
				465	MLAOpc = AArch64::MLAv2i32_indexed;
				466	break;
				467	case MVT::v4i32:
				468	MLAOpc = AArch64::MLAv4i32_indexed;
				469	break;
				470	}
				471
				472	return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
				473	}
				474
				475	SDNode AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode N) {
				476	SDValue SMULLOp0;
				477	SDValue SMULLOp1;
				478	int LaneIdx;
				479
				480	if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
				481	LaneIdx))
				482	return nullptr;
				483
				484	SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
				485
				486	SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
				487
				488	unsigned SMULLOpc = ~0U;
				489
				490	if (IntNo == Intrinsic::aarch64_neon_smull) {
				491	switch (N->getSimpleValueType(0).SimpleTy) {
				492	default:
				493	llvm_unreachable("Unrecognized SMULL.");
				494	case MVT::v4i32:
				495	SMULLOpc = AArch64::SMULLv4i16_indexed;
				496	break;
				497	case MVT::v2i64:
				498	SMULLOpc = AArch64::SMULLv2i32_indexed;
				499	break;
				500	}
				501	} else if (IntNo == Intrinsic::aarch64_neon_umull) {
				502	switch (N->getSimpleValueType(0).SimpleTy) {
				503	default:
				504	llvm_unreachable("Unrecognized SMULL.");
				505	case MVT::v4i32:
				506	SMULLOpc = AArch64::UMULLv4i16_indexed;
				507	break;
				508	case MVT::v2i64:
				509	SMULLOpc = AArch64::UMULLv2i32_indexed;
				510	break;
				511	}
				512	} else
				513	llvm_unreachable("Unrecognized intrinsic.");
				514
				515	return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
				516	}
				517
				518	/// Instructions that accept extend modifiers like UXTW expect the register
				519	/// being extended to be a GPR32, but the incoming DAG might be acting on a
				520	/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
				521	/// this is the case.
				522	static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
				523	if (N.getValueType() == MVT::i32)
				524	return N;
				525
				526	SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
				527	MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
				528	SDLoc(N), MVT::i32, N, SubReg);
				529	return SDValue(Node, 0);
				530	}
				531
				532
				533	/// SelectArithExtendedRegister - Select a "extended register" operand. This
				534	/// operand folds in an extend followed by an optional left shift.
				535	bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
				536	SDValue &Shift) {
				537	unsigned ShiftVal = 0;
				538	AArch64_AM::ShiftExtendType Ext;
				539
				540	if (N.getOpcode() == ISD::SHL) {
				541	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
				542	if (!CSD)
				543	return false;
				544	ShiftVal = CSD->getZExtValue();
				545	if (ShiftVal > 4)
				546	return false;
				547
				548	Ext = getExtendTypeForNode(N.getOperand(0));
				549	if (Ext == AArch64_AM::InvalidShiftExtend)
				550	return false;
				551
				552	Reg = N.getOperand(0).getOperand(0);
				553	} else {
				554	Ext = getExtendTypeForNode(N);
				555	if (Ext == AArch64_AM::InvalidShiftExtend)
				556	return false;
				557
				558	Reg = N.getOperand(0);
				559	}
				560
				561	// AArch64 mandates that the RHS of the operation must use the smallest
				562	// register classs that could contain the size being extended from. Thus,
				563	// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
				564	// there might not be an actual 32-bit value in the program. We can
				565	// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
				566	assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
				567	Reg = narrowIfNeeded(CurDAG, Reg);
				568	Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
				569	return isWorthFolding(N);
				570	}
				571
				572	/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
				573	/// immediate" address. The "Size" argument is the size in bytes of the memory
				574	/// reference, which determines the scale.
				575	bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
				576	SDValue &Base, SDValue &OffImm) {
				577	const TargetLowering *TLI = getTargetLowering();
				578	if (N.getOpcode() == ISD::FrameIndex) {
				579	int FI = cast<FrameIndexSDNode>(N)->getIndex();
				580	Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
				581	OffImm = CurDAG->getTargetConstant(0, MVT::i64);
				582	return true;
				583	}
				584
				585	if (N.getOpcode() == AArch64ISD::ADDlow) {
				586	GlobalAddressSDNode *GAN =
				587	dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
				588	Base = N.getOperand(0);
				589	OffImm = N.getOperand(1);
				590	if (!GAN)
				591	return true;
				592
				593	const GlobalValue *GV = GAN->getGlobal();
				594	unsigned Alignment = GV->getAlignment();
				595	const DataLayout *DL = TLI->getDataLayout();
				596	if (Alignment == 0 && !Subtarget->isTargetDarwin())
				597	Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
				598
				599	if (Alignment >= Size)
				600	return true;
				601	}
				602
				603	if (CurDAG->isBaseWithConstantOffset(N)) {
				604	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
				605	int64_t RHSC = (int64_t)RHS->getZExtValue();
				606	unsigned Scale = Log2_32(Size);
				607	if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
				608	Base = N.getOperand(0);
				609	if (Base.getOpcode() == ISD::FrameIndex) {
				610	int FI = cast<FrameIndexSDNode>(Base)->getIndex();
				611	Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
				612	}
				613	OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
				614	return true;
				615	}
				616	}
				617	}
				618
				619	// Before falling back to our general case, check if the unscaled
				620	// instructions can handle this. If so, that's preferable.
				621	if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
				622	return false;
				623
				624	// Base only. The address will be materialized into a register before
				625	// the memory is accessed.
				626	// add x0, Xbase, #offset
				627	// ldr x0, [x0]
				628	Base = N;
				629	OffImm = CurDAG->getTargetConstant(0, MVT::i64);
				630	return true;
				631	}
				632
				633	/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
				634	/// immediate" address. This should only match when there is an offset that
				635	/// is not valid for a scaled immediate addressing mode. The "Size" argument
				636	/// is the size in bytes of the memory reference, which is needed here to know
				637	/// what is valid for a scaled immediate.
				638	bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
				639	SDValue &Base,
				640	SDValue &OffImm) {
				641	if (!CurDAG->isBaseWithConstantOffset(N))
				642	return false;
				643	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
				644	int64_t RHSC = RHS->getSExtValue();
				645	// If the offset is valid as a scaled immediate, don't match here.
				646	if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
				647	RHSC < (0x1000 << Log2_32(Size)))
				648	return false;
				649	if (RHSC >= -256 && RHSC < 256) {
				650	Base = N.getOperand(0);
				651	if (Base.getOpcode() == ISD::FrameIndex) {
				652	int FI = cast<FrameIndexSDNode>(Base)->getIndex();
				653	const TargetLowering *TLI = getTargetLowering();
				654	Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
				655	}
				656	OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
				657	return true;
				658	}
				659	}
				660	return false;
				661	}
				662
				663	static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
				664	SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
				665	SDValue ImpDef = SDValue(
				666	CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
				667	0);
				668	MachineSDNode *Node = CurDAG->getMachineNode(
				669	TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
				670	return SDValue(Node, 0);
				671	}
				672
				673	/// \brief Check if the given SHL node (\p N), can be used to form an
				674	/// extended register for an addressing mode.
				675	bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
				676	bool WantExtend, SDValue &Offset,
				677	SDValue &SignExtend) {
				678	assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
				679	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
				680	if (!CSD \|\| (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
				681	return false;
				682
				683	if (WantExtend) {
				684	AArch64_AM::ShiftExtendType Ext =
				685	getExtendTypeForNode(N.getOperand(0), true);
				686	if (Ext == AArch64_AM::InvalidShiftExtend)
				687	return false;
				688
				689	Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
				690	SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
				691	} else {
				692	Offset = N.getOperand(0);
				693	SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
				694	}
				695
				696	unsigned LegalShiftVal = Log2_32(Size);
				697	unsigned ShiftVal = CSD->getZExtValue();
				698
				699	if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
				700	return false;
				701
				702	if (isWorthFolding(N))
				703	return true;
				704
				705	return false;
				706	}
				707
				708	bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
				709	SDValue &Base, SDValue &Offset,
				710	SDValue &SignExtend,
				711	SDValue &DoShift) {
				712	if (N.getOpcode() != ISD::ADD)
				713	return false;
				714	SDValue LHS = N.getOperand(0);
				715	SDValue RHS = N.getOperand(1);
				716
				717	// We don't want to match immediate adds here, because they are better lowered
				718	// to the register-immediate addressing modes.
				719	if (isa<ConstantSDNode>(LHS) \|\| isa<ConstantSDNode>(RHS))
				720	return false;
				721
				722	// Check if this particular node is reused in any non-memory related
				723	// operation. If yes, do not try to fold this node into the address
				724	// computation, since the computation will be kept.
				725	const SDNode *Node = N.getNode();
				726	for (SDNode *UI : Node->uses()) {
				727	if (!isa<MemSDNode>(*UI))
				728	return false;
				729	}
				730
				731	// Remember if it is worth folding N when it produces extended register.
				732	bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
				733
				734	// Try to match a shifted extend on the RHS.
				735	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
				736	SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
				737	Base = LHS;
				738	DoShift = CurDAG->getTargetConstant(true, MVT::i32);
				739	return true;
				740	}
				741
				742	// Try to match a shifted extend on the LHS.
				743	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
				744	SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
				745	Base = RHS;
				746	DoShift = CurDAG->getTargetConstant(true, MVT::i32);
				747	return true;
				748	}
				749
				750	// There was no shift, whatever else we find.
				751	DoShift = CurDAG->getTargetConstant(false, MVT::i32);
				752
				753	AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
				754	// Try to match an unshifted extend on the LHS.
				755	if (IsExtendedRegisterWorthFolding &&
				756	(Ext = getExtendTypeForNode(LHS, true)) !=
				757	AArch64_AM::InvalidShiftExtend) {
				758	Base = RHS;
				759	Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
				760	SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
				761	if (isWorthFolding(LHS))
				762	return true;
				763	}
				764
				765	// Try to match an unshifted extend on the RHS.
				766	if (IsExtendedRegisterWorthFolding &&
				767	(Ext = getExtendTypeForNode(RHS, true)) !=
				768	AArch64_AM::InvalidShiftExtend) {
				769	Base = LHS;
				770	Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
				771	SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
				772	if (isWorthFolding(RHS))
				773	return true;
				774	}
				775
				776	return false;
				777	}
				778
				779	bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
				780	SDValue &Base, SDValue &Offset,
				781	SDValue &SignExtend,
				782	SDValue &DoShift) {
				783	if (N.getOpcode() != ISD::ADD)
				784	return false;
				785	SDValue LHS = N.getOperand(0);
				786	SDValue RHS = N.getOperand(1);
				787
				788	// We don't want to match immediate adds here, because they are better lowered
				789	// to the register-immediate addressing modes.
				790	if (isa<ConstantSDNode>(LHS) \|\| isa<ConstantSDNode>(RHS))
				791	return false;
				792
				793	// Check if this particular node is reused in any non-memory related
				794	// operation. If yes, do not try to fold this node into the address
				795	// computation, since the computation will be kept.
				796	const SDNode *Node = N.getNode();
				797	for (SDNode *UI : Node->uses()) {
				798	if (!isa<MemSDNode>(*UI))
				799	return false;
				800	}
				801
				802	// Remember if it is worth folding N when it produces extended register.
				803	bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
				804
				805	// Try to match a shifted extend on the RHS.
				806	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
				807	SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
				808	Base = LHS;
				809	DoShift = CurDAG->getTargetConstant(true, MVT::i32);
				810	return true;
				811	}
				812
				813	// Try to match a shifted extend on the LHS.
				814	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
				815	SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
				816	Base = RHS;
				817	DoShift = CurDAG->getTargetConstant(true, MVT::i32);
				818	return true;
				819	}
				820
				821	// Match any non-shifted, non-extend, non-immediate add expression.
				822	Base = LHS;
				823	Offset = RHS;
				824	SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
				825	DoShift = CurDAG->getTargetConstant(false, MVT::i32);
				826	// Reg1 + Reg2 is free: no check needed.
				827	return true;
				828	}
				829
				830	SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
				831	static unsigned RegClassIDs[] = {
				832	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
				833	static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
				834	AArch64::dsub2, AArch64::dsub3 };
				835
				836	return createTuple(Regs, RegClassIDs, SubRegs);
				837	}
				838
				839	SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
				840	static unsigned RegClassIDs[] = {
				841	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
				842	static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
				843	AArch64::qsub2, AArch64::qsub3 };
				844
				845	return createTuple(Regs, RegClassIDs, SubRegs);
				846	}
				847
				848	SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
				849	unsigned RegClassIDs[],
				850	unsigned SubRegs[]) {
				851	// There's no special register-class for a vector-list of 1 element: it's just
				852	// a vector.
				853	if (Regs.size() == 1)
				854	return Regs[0];
				855
				856	assert(Regs.size() >= 2 && Regs.size() <= 4);
				857
				858	SDLoc DL(Regs[0].getNode());
				859
				860	SmallVector<SDValue, 4> Ops;
				861
				862	// First operand of REG_SEQUENCE is the desired RegClass.
				863	Ops.push_back(
				864	CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
				865
				866	// Then we get pairs of source & subregister-position for the components.
				867	for (unsigned i = 0; i < Regs.size(); ++i) {
				868	Ops.push_back(Regs[i]);
				869	Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
				870	}
				871
				872	SDNode *N =
				873	CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
				874	return SDValue(N, 0);
				875	}
				876
				877	SDNode AArch64DAGToDAGISel::SelectTable(SDNode N, unsigned NumVecs,
				878	unsigned Opc, bool isExt) {
				879	SDLoc dl(N);
				880	EVT VT = N->getValueType(0);
				881
				882	unsigned ExtOff = isExt;
				883
				884	// Form a REG_SEQUENCE to force register allocation.
				885	unsigned Vec0Off = ExtOff + 1;
				886	SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
				887	N->op_begin() + Vec0Off + NumVecs);
				888	SDValue RegSeq = createQTuple(Regs);
				889
				890	SmallVector<SDValue, 6> Ops;
				891	if (isExt)
				892	Ops.push_back(N->getOperand(1));
				893	Ops.push_back(RegSeq);
				894	Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
				895	return CurDAG->getMachineNode(Opc, dl, VT, Ops);
				896	}
				897
				898	SDNode AArch64DAGToDAGISel::SelectIndexedLoad(SDNode N, bool &Done) {
				899	LoadSDNode *LD = cast<LoadSDNode>(N);
				900	if (LD->isUnindexed())
				901	return nullptr;
				902	EVT VT = LD->getMemoryVT();
				903	EVT DstVT = N->getValueType(0);
				904	ISD::MemIndexedMode AM = LD->getAddressingMode();
				905	bool IsPre = AM == ISD::PRE_INC \|\| AM == ISD::PRE_DEC;
				906
				907	// We're not doing validity checking here. That was done when checking
				908	// if we should mark the load as indexed or not. We're just selecting
				909	// the right instruction.
				910	unsigned Opcode = 0;
				911
				912	ISD::LoadExtType ExtType = LD->getExtensionType();
				913	bool InsertTo64 = false;
				914	if (VT == MVT::i64)
				915	Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
				916	else if (VT == MVT::i32) {
				917	if (ExtType == ISD::NON_EXTLOAD)
				918	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
				919	else if (ExtType == ISD::SEXTLOAD)
				920	Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
				921	else {
				922	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
				923	InsertTo64 = true;
				924	// The result of the load is only i32. It's the subreg_to_reg that makes
				925	// it into an i64.
				926	DstVT = MVT::i32;
				927	}
				928	} else if (VT == MVT::i16) {
				929	if (ExtType == ISD::SEXTLOAD) {
				930	if (DstVT == MVT::i64)
				931	Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
				932	else
				933	Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
				934	} else {
				935	Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
				936	InsertTo64 = DstVT == MVT::i64;
				937	// The result of the load is only i32. It's the subreg_to_reg that makes
				938	// it into an i64.
				939	DstVT = MVT::i32;
				940	}
				941	} else if (VT == MVT::i8) {
				942	if (ExtType == ISD::SEXTLOAD) {
				943	if (DstVT == MVT::i64)
				944	Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
				945	else
				946	Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
				947	} else {
				948	Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
				949	InsertTo64 = DstVT == MVT::i64;
				950	// The result of the load is only i32. It's the subreg_to_reg that makes
				951	// it into an i64.
				952	DstVT = MVT::i32;
				953	}
				954	} else if (VT == MVT::f32) {
				955	Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
				956	} else if (VT == MVT::f64 \|\| VT.is64BitVector()) {
				957	Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
				958	} else if (VT.is128BitVector()) {
				959	Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
				960	} else
				961	return nullptr;
				962	SDValue Chain = LD->getChain();
				963	SDValue Base = LD->getBasePtr();
				964	ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
				965	int OffsetVal = (int)OffsetOp->getZExtValue();
				966	SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
				967	SDValue Ops[] = { Base, Offset, Chain };
				968	SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
				969	MVT::Other, Ops);
				970	// Either way, we're replacing the node, so tell the caller that.
				971	Done = true;
				972	SDValue LoadedVal = SDValue(Res, 1);
				973	if (InsertTo64) {
				974	SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
				975	LoadedVal =
				976	SDValue(CurDAG->getMachineNode(
				977	AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
				978	CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
				979	0);
				980	}
				981
				982	ReplaceUses(SDValue(N, 0), LoadedVal);
				983	ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
				984	ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
				985
				986	return nullptr;
				987	}
				988
				989	SDNode AArch64DAGToDAGISel::SelectLoad(SDNode N, unsigned NumVecs,
				990	unsigned Opc, unsigned SubRegIdx) {
				991	SDLoc dl(N);
				992	EVT VT = N->getValueType(0);
				993	SDValue Chain = N->getOperand(0);
				994
				995	SmallVector<SDValue, 6> Ops;
				996	Ops.push_back(N->getOperand(2)); // Mem operand;
				997	Ops.push_back(Chain);
				998
				999	std::vector<EVT> ResTys;
				1000	ResTys.push_back(MVT::Untyped);
				1001	ResTys.push_back(MVT::Other);
				1002
				1003	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
				1004	SDValue SuperReg = SDValue(Ld, 0);
				1005	for (unsigned i = 0; i < NumVecs; ++i)
				1006	ReplaceUses(SDValue(N, i),
				1007	CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
				1008
				1009	ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
				1010	return nullptr;
				1011	}
				1012
				1013	SDNode AArch64DAGToDAGISel::SelectPostLoad(SDNode N, unsigned NumVecs,
				1014	unsigned Opc, unsigned SubRegIdx) {
				1015	SDLoc dl(N);
				1016	EVT VT = N->getValueType(0);
				1017	SDValue Chain = N->getOperand(0);
				1018
				1019	SmallVector<SDValue, 6> Ops;
				1020	Ops.push_back(N->getOperand(1)); // Mem operand
				1021	Ops.push_back(N->getOperand(2)); // Incremental
				1022	Ops.push_back(Chain);
				1023
				1024	std::vector<EVT> ResTys;
				1025	ResTys.push_back(MVT::i64); // Type of the write back register
				1026	ResTys.push_back(MVT::Untyped);
				1027	ResTys.push_back(MVT::Other);
				1028
				1029	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
				1030
				1031	// Update uses of write back register
				1032	ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
				1033
				1034	// Update uses of vector list
				1035	SDValue SuperReg = SDValue(Ld, 1);
				1036	if (NumVecs == 1)
				1037	ReplaceUses(SDValue(N, 0), SuperReg);
				1038	else
				1039	for (unsigned i = 0; i < NumVecs; ++i)
				1040	ReplaceUses(SDValue(N, i),
				1041	CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
				1042
				1043	// Update the chain
				1044	ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
				1045	return nullptr;
				1046	}
				1047
				1048	SDNode AArch64DAGToDAGISel::SelectStore(SDNode N, unsigned NumVecs,
				1049	unsigned Opc) {
				1050	SDLoc dl(N);
				1051	EVT VT = N->getOperand(2)->getValueType(0);
				1052
				1053	// Form a REG_SEQUENCE to force register allocation.
				1054	bool Is128Bit = VT.getSizeInBits() == 128;
				1055	SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
				1056	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
				1057
				1058	SmallVector<SDValue, 6> Ops;
				1059	Ops.push_back(RegSeq);
				1060	Ops.push_back(N->getOperand(NumVecs + 2));
				1061	Ops.push_back(N->getOperand(0));
				1062	SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
				1063
				1064	return St;
				1065	}
				1066
				1067	SDNode AArch64DAGToDAGISel::SelectPostStore(SDNode N, unsigned NumVecs,
				1068	unsigned Opc) {
				1069	SDLoc dl(N);
				1070	EVT VT = N->getOperand(2)->getValueType(0);
				1071	SmallVector<EVT, 2> ResTys;
				1072	ResTys.push_back(MVT::i64); // Type of the write back register
				1073	ResTys.push_back(MVT::Other); // Type for the Chain
				1074
				1075	// Form a REG_SEQUENCE to force register allocation.
				1076	bool Is128Bit = VT.getSizeInBits() == 128;
				1077	SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
				1078	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
				1079
				1080	SmallVector<SDValue, 6> Ops;
				1081	Ops.push_back(RegSeq);
				1082	Ops.push_back(N->getOperand(NumVecs + 1)); // base register
				1083	Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
				1084	Ops.push_back(N->getOperand(0)); // Chain
				1085	SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
				1086
				1087	return St;
				1088	}
				1089
				1090	/// WidenVector - Given a value in the V64 register class, produce the
				1091	/// equivalent value in the V128 register class.
				1092	class WidenVector {
				1093	SelectionDAG &DAG;
				1094
				1095	public:
				1096	WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
				1097
				1098	SDValue operator()(SDValue V64Reg) {
				1099	EVT VT = V64Reg.getValueType();
				1100	unsigned NarrowSize = VT.getVectorNumElements();
				1101	MVT EltTy = VT.getVectorElementType().getSimpleVT();
				1102	MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
				1103	SDLoc DL(V64Reg);
				1104
				1105	SDValue Undef =
				1106	SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
				1107	return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
				1108	}
				1109	};
				1110
				1111	/// NarrowVector - Given a value in the V128 register class, produce the
				1112	/// equivalent value in the V64 register class.
				1113	static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
				1114	EVT VT = V128Reg.getValueType();
				1115	unsigned WideSize = VT.getVectorNumElements();
				1116	MVT EltTy = VT.getVectorElementType().getSimpleVT();
				1117	MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
				1118
				1119	return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
				1120	V128Reg);
				1121	}
				1122
				1123	SDNode AArch64DAGToDAGISel::SelectLoadLane(SDNode N, unsigned NumVecs,
				1124	unsigned Opc) {
				1125	SDLoc dl(N);
				1126	EVT VT = N->getValueType(0);
				1127	bool Narrow = VT.getSizeInBits() == 64;
				1128
				1129	// Form a REG_SEQUENCE to force register allocation.
				1130	SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
				1131
				1132	if (Narrow)
				1133	std::transform(Regs.begin(), Regs.end(), Regs.begin(),
				1134	WidenVector(*CurDAG));
				1135
				1136	SDValue RegSeq = createQTuple(Regs);
				1137
				1138	std::vector<EVT> ResTys;
				1139	ResTys.push_back(MVT::Untyped);
				1140	ResTys.push_back(MVT::Other);
				1141
				1142	unsigned LaneNo =
				1143	cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
				1144
				1145	SmallVector<SDValue, 6> Ops;
				1146	Ops.push_back(RegSeq);
				1147	Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
				1148	Ops.push_back(N->getOperand(NumVecs + 3));
				1149	Ops.push_back(N->getOperand(0));
				1150	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
				1151	SDValue SuperReg = SDValue(Ld, 0);
				1152
				1153	EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
				1154	static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
				1155	AArch64::qsub3 };
				1156	for (unsigned i = 0; i < NumVecs; ++i) {
				1157	SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
				1158	if (Narrow)
				1159	NV = NarrowVector(NV, *CurDAG);
				1160	ReplaceUses(SDValue(N, i), NV);
				1161	}
				1162
				1163	ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
				1164
				1165	return Ld;
				1166	}
				1167
				1168	SDNode AArch64DAGToDAGISel::SelectPostLoadLane(SDNode N, unsigned NumVecs,
				1169	unsigned Opc) {
				1170	SDLoc dl(N);
				1171	EVT VT = N->getValueType(0);
				1172	bool Narrow = VT.getSizeInBits() == 64;
				1173
				1174	// Form a REG_SEQUENCE to force register allocation.
				1175	SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
				1176
				1177	if (Narrow)
				1178	std::transform(Regs.begin(), Regs.end(), Regs.begin(),
				1179	WidenVector(*CurDAG));
				1180
				1181	SDValue RegSeq = createQTuple(Regs);
				1182
				1183	std::vector<EVT> ResTys;
				1184	ResTys.push_back(MVT::i64); // Type of the write back register
				1185	ResTys.push_back(MVT::Untyped);
				1186	ResTys.push_back(MVT::Other);
				1187
				1188	unsigned LaneNo =
				1189	cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
				1190
				1191	SmallVector<SDValue, 6> Ops;
				1192	Ops.push_back(RegSeq);
				1193	Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
				1194	Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
				1195	Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
				1196	Ops.push_back(N->getOperand(0));
				1197	SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
				1198
				1199	// Update uses of the write back register
				1200	ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
				1201
				1202	// Update uses of the vector list
				1203	SDValue SuperReg = SDValue(Ld, 1);
				1204	if (NumVecs == 1) {
				1205	ReplaceUses(SDValue(N, 0),
				1206	Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
				1207	} else {
				1208	EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
				1209	static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
				1210	AArch64::qsub3 };
				1211	for (unsigned i = 0; i < NumVecs; ++i) {
				1212	SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
				1213	SuperReg);
				1214	if (Narrow)
				1215	NV = NarrowVector(NV, *CurDAG);
				1216	ReplaceUses(SDValue(N, i), NV);
				1217	}
				1218	}
				1219
				1220	// Update the Chain
				1221	ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
				1222
				1223	return Ld;
				1224	}
				1225
				1226	SDNode AArch64DAGToDAGISel::SelectStoreLane(SDNode N, unsigned NumVecs,
				1227	unsigned Opc) {
				1228	SDLoc dl(N);
				1229	EVT VT = N->getOperand(2)->getValueType(0);
				1230	bool Narrow = VT.getSizeInBits() == 64;
				1231
				1232	// Form a REG_SEQUENCE to force register allocation.
				1233	SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
				1234
				1235	if (Narrow)
				1236	std::transform(Regs.begin(), Regs.end(), Regs.begin(),
				1237	WidenVector(*CurDAG));
				1238
				1239	SDValue RegSeq = createQTuple(Regs);
				1240
				1241	unsigned LaneNo =
				1242	cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
				1243
				1244	SmallVector<SDValue, 6> Ops;
				1245	Ops.push_back(RegSeq);
				1246	Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
				1247	Ops.push_back(N->getOperand(NumVecs + 3));
				1248	Ops.push_back(N->getOperand(0));
				1249	SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
				1250
				1251	// Transfer memoperands.
				1252	MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
				1253	MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
				1254	cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
				1255
				1256	return St;
				1257	}
				1258
				1259	SDNode AArch64DAGToDAGISel::SelectPostStoreLane(SDNode N, unsigned NumVecs,
				1260	unsigned Opc) {
				1261	SDLoc dl(N);
				1262	EVT VT = N->getOperand(2)->getValueType(0);
				1263	bool Narrow = VT.getSizeInBits() == 64;
				1264
				1265	// Form a REG_SEQUENCE to force register allocation.
				1266	SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
				1267
				1268	if (Narrow)
				1269	std::transform(Regs.begin(), Regs.end(), Regs.begin(),
				1270	WidenVector(*CurDAG));
				1271
				1272	SDValue RegSeq = createQTuple(Regs);
				1273
				1274	SmallVector<EVT, 2> ResTys;
				1275	ResTys.push_back(MVT::i64); // Type of the write back register
				1276	ResTys.push_back(MVT::Other);
				1277
				1278	unsigned LaneNo =
				1279	cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
				1280
				1281	SmallVector<SDValue, 6> Ops;
				1282	Ops.push_back(RegSeq);
				1283	Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
				1284	Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
				1285	Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
				1286	Ops.push_back(N->getOperand(0));
				1287	SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
				1288
				1289	// Transfer memoperands.
				1290	MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
				1291	MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
				1292	cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
				1293
				1294	return St;
				1295	}
				1296
				1297	static bool isBitfieldExtractOpFromAnd(SelectionDAG CurDAG, SDNode N,
				1298	unsigned &Opc, SDValue &Opd0,
				1299	unsigned &LSB, unsigned &MSB,
				1300	unsigned NumberOfIgnoredLowBits,
				1301	bool BiggerPattern) {
				1302	assert(N->getOpcode() == ISD::AND &&
				1303	"N must be a AND operation to call this function");
				1304
				1305	EVT VT = N->getValueType(0);
				1306
				1307	// Here we can test the type of VT and return false when the type does not
				1308	// match, but since it is done prior to that call in the current context
				1309	// we turned that into an assert to avoid redundant code.
				1310	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
				1311	"Type checking must have been done before calling this function");
				1312
				1313	// FIXME: simplify-demanded-bits in DAGCombine will probably have
				1314	// changed the AND node to a 32-bit mask operation. We'll have to
				1315	// undo that as part of the transform here if we want to catch all
				1316	// the opportunities.
				1317	// Currently the NumberOfIgnoredLowBits argument helps to recover
				1318	// form these situations when matching bigger pattern (bitfield insert).
				1319
				1320	// For unsigned extracts, check for a shift right and mask
				1321	uint64_t And_imm = 0;
				1322	if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
				1323	return false;
				1324
				1325	const SDNode *Op0 = N->getOperand(0).getNode();
				1326
				1327	// Because of simplify-demanded-bits in DAGCombine, the mask may have been
				1328	// simplified. Try to undo that
				1329	And_imm \|= (1 << NumberOfIgnoredLowBits) - 1;
				1330
				1331	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
				1332	if (And_imm & (And_imm + 1))
				1333	return false;
				1334
				1335	bool ClampMSB = false;
				1336	uint64_t Srl_imm = 0;
				1337	// Handle the SRL + ANY_EXTEND case.
				1338	if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
				1339	isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
				1340	// Extend the incoming operand of the SRL to 64-bit.
				1341	Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
				1342	// Make sure to clamp the MSB so that we preserve the semantics of the
				1343	// original operations.
				1344	ClampMSB = true;
				1345	} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
				1346	isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
				1347	Srl_imm)) {
				1348	// If the shift result was truncated, we can still combine them.
				1349	Opd0 = Op0->getOperand(0).getOperand(0);
				1350
				1351	// Use the type of SRL node.
				1352	VT = Opd0->getValueType(0);
				1353	} else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
				1354	Opd0 = Op0->getOperand(0);
				1355	} else if (BiggerPattern) {
				1356	// Let's pretend a 0 shift right has been performed.
				1357	// The resulting code will be at least as good as the original one
				1358	// plus it may expose more opportunities for bitfield insert pattern.
				1359	// FIXME: Currently we limit this to the bigger pattern, because
				1360	// some optimizations expect AND and not UBFM
				1361	Opd0 = N->getOperand(0);
				1362	} else
				1363	return false;
				1364
				1365	assert((BiggerPattern \|\| (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
				1366	"bad amount in shift node!");
				1367
				1368	LSB = Srl_imm;
				1369	MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
				1370	: CountTrailingOnes_64(And_imm)) -
				1371	1;
				1372	if (ClampMSB)
				1373	// Since we're moving the extend before the right shift operation, we need
				1374	// to clamp the MSB to make sure we don't shift in undefined bits instead of
				1375	// the zeros which would get shifted in with the original right shift
				1376	// operation.
				1377	MSB = MSB > 31 ? 31 : MSB;
				1378
				1379	Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
				1380	return true;
				1381	}
				1382
				1383	static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
				1384	unsigned &LSB, unsigned &MSB) {
				1385	// We are looking for the following pattern which basically extracts a single
				1386	// bit from the source value and places it in the LSB of the destination
				1387	// value, all other bits of the destination value or set to zero:
				1388	//
				1389	// Value2 = AND Value, MaskImm
				1390	// SRL Value2, ShiftImm
				1391	//
				1392	// with MaskImm >> ShiftImm == 1.
				1393	//
				1394	// This gets selected into a single UBFM:
				1395	//
				1396	// UBFM Value, ShiftImm, ShiftImm
				1397	//
				1398
				1399	if (N->getOpcode() != ISD::SRL)
				1400	return false;
				1401
				1402	uint64_t And_mask = 0;
				1403	if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
				1404	return false;
				1405
				1406	Opd0 = N->getOperand(0).getOperand(0);
				1407
				1408	uint64_t Srl_imm = 0;
				1409	if (!isIntImmediate(N->getOperand(1), Srl_imm))
				1410	return false;
				1411
				1412	// Check whether we really have a one bit extract here.
				1413	if (And_mask >> Srl_imm == 0x1) {
				1414	if (N->getValueType(0) == MVT::i32)
				1415	Opc = AArch64::UBFMWri;
				1416	else
				1417	Opc = AArch64::UBFMXri;
				1418
				1419	LSB = MSB = Srl_imm;
				1420
				1421	return true;
				1422	}
				1423
				1424	return false;
				1425	}
				1426
				1427	static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
				1428	unsigned &LSB, unsigned &MSB,
				1429	bool BiggerPattern) {
				1430	assert((N->getOpcode() == ISD::SRA \|\| N->getOpcode() == ISD::SRL) &&
				1431	"N must be a SHR/SRA operation to call this function");
				1432
				1433	EVT VT = N->getValueType(0);
				1434
				1435	// Here we can test the type of VT and return false when the type does not
				1436	// match, but since it is done prior to that call in the current context
				1437	// we turned that into an assert to avoid redundant code.
				1438	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
				1439	"Type checking must have been done before calling this function");
				1440
				1441	// Check for AND + SRL doing a one bit extract.
				1442	if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
				1443	return true;
				1444
				1445	// we're looking for a shift of a shift
				1446	uint64_t Shl_imm = 0;
				1447	uint64_t Trunc_bits = 0;
				1448	if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
				1449	Opd0 = N->getOperand(0).getOperand(0);
				1450	} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
				1451	N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
				1452	// We are looking for a shift of truncate. Truncate from i64 to i32 could
				1453	// be considered as setting high 32 bits as zero. Our strategy here is to
				1454	// always generate 64bit UBFM. This consistency will help the CSE pass
				1455	// later find more redundancy.
				1456	Opd0 = N->getOperand(0).getOperand(0);
				1457	Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
				1458	VT = Opd0->getValueType(0);
				1459	assert(VT == MVT::i64 && "the promoted type should be i64");
				1460	} else if (BiggerPattern) {
				1461	// Let's pretend a 0 shift left has been performed.
				1462	// FIXME: Currently we limit this to the bigger pattern case,
				1463	// because some optimizations expect AND and not UBFM
				1464	Opd0 = N->getOperand(0);
				1465	} else
				1466	return false;
				1467
				1468	assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
				1469	uint64_t Srl_imm = 0;
				1470	if (!isIntImmediate(N->getOperand(1), Srl_imm))
				1471	return false;
				1472
				1473	assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
				1474	"bad amount in shift node!");
				1475	// Note: The width operand is encoded as width-1.
				1476	unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
				1477	int sLSB = Srl_imm - Shl_imm;
				1478	if (sLSB < 0)
				1479	return false;
				1480	LSB = sLSB;
				1481	MSB = LSB + Width;
				1482	// SRA requires a signed extraction
				1483	if (VT == MVT::i32)
				1484	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
				1485	else
				1486	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
				1487	return true;
				1488	}
				1489
				1490	static bool isBitfieldExtractOp(SelectionDAG CurDAG, SDNode N, unsigned &Opc,
				1491	SDValue &Opd0, unsigned &LSB, unsigned &MSB,
				1492	unsigned NumberOfIgnoredLowBits = 0,
				1493	bool BiggerPattern = false) {
				1494	if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
				1495	return false;
				1496
				1497	switch (N->getOpcode()) {
				1498	default:
				1499	if (!N->isMachineOpcode())
				1500	return false;
				1501	break;
				1502	case ISD::AND:
				1503	return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
				1504	NumberOfIgnoredLowBits, BiggerPattern);
				1505	case ISD::SRL:
				1506	case ISD::SRA:
				1507	return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
				1508	}
				1509
				1510	unsigned NOpc = N->getMachineOpcode();
				1511	switch (NOpc) {
				1512	default:
				1513	return false;
				1514	case AArch64::SBFMWri:
				1515	case AArch64::UBFMWri:
				1516	case AArch64::SBFMXri:
				1517	case AArch64::UBFMXri:
				1518	Opc = NOpc;
				1519	Opd0 = N->getOperand(0);
				1520	LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
				1521	MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
				1522	return true;
				1523	}
				1524	// Unreachable
				1525	return false;
				1526	}
				1527
				1528	SDNode AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode N) {
				1529	unsigned Opc, LSB, MSB;
				1530	SDValue Opd0;
				1531	if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
				1532	return nullptr;
				1533
				1534	EVT VT = N->getValueType(0);
				1535
				1536	// If the bit extract operation is 64bit but the original type is 32bit, we
				1537	// need to add one EXTRACT_SUBREG.
				1538	if ((Opc == AArch64::SBFMXri \|\| Opc == AArch64::UBFMXri) && VT == MVT::i32) {
				1539	SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
				1540	CurDAG->getTargetConstant(MSB, MVT::i64)};
				1541
				1542	SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
				1543	SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
				1544	MachineSDNode *Node =
				1545	CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
				1546	SDValue(BFM, 0), SubReg);
				1547	return Node;
				1548	}
				1549
				1550	SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
				1551	CurDAG->getTargetConstant(MSB, VT)};
				1552	return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
				1553	}
				1554
				1555	/// Does DstMask form a complementary pair with the mask provided by
				1556	/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
				1557	/// this asks whether DstMask zeroes precisely those bits that will be set by
				1558	/// the other half.
				1559	static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
				1560	unsigned NumberOfIgnoredHighBits, EVT VT) {
				1561	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
				1562	"i32 or i64 mask type expected!");
				1563	unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
				1564
				1565	APInt SignificantDstMask = APInt(BitWidth, DstMask);
				1566	APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
				1567
				1568	return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
				1569	(SignificantDstMask \| SignificantBitsToBeInserted).isAllOnesValue();
				1570	}
				1571
				1572	// Look for bits that will be useful for later uses.
				1573	// A bit is consider useless as soon as it is dropped and never used
				1574	// before it as been dropped.
				1575	// E.g., looking for useful bit of x
				1576	// 1. y = x & 0x7
				1577	// 2. z = y >> 2
				1578	// After #1, x useful bits are 0x7, then the useful bits of x, live through
				1579	// y.
				1580	// After #2, the useful bits of x are 0x4.
				1581	// However, if x is used on an unpredicatable instruction, then all its bits
				1582	// are useful.
				1583	// E.g.
				1584	// 1. y = x & 0x7
				1585	// 2. z = y >> 2
				1586	// 3. str x, [@x]
				1587	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
				1588
				1589	static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
				1590	unsigned Depth) {
				1591	uint64_t Imm =
				1592	cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
				1593	Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
				1594	UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
				1595	getUsefulBits(Op, UsefulBits, Depth + 1);
				1596	}
				1597
				1598	static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
				1599	uint64_t Imm, uint64_t MSB,
				1600	unsigned Depth) {
				1601	// inherit the bitwidth value
				1602	APInt OpUsefulBits(UsefulBits);
				1603	OpUsefulBits = 1;
				1604
				1605	if (MSB >= Imm) {
				1606	OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
				1607	--OpUsefulBits;
				1608	// The interesting part will be in the lower part of the result
				1609	getUsefulBits(Op, OpUsefulBits, Depth + 1);
				1610	// The interesting part was starting at Imm in the argument
				1611	OpUsefulBits = OpUsefulBits.shl(Imm);
				1612	} else {
				1613	OpUsefulBits = OpUsefulBits.shl(MSB + 1);
				1614	--OpUsefulBits;
				1615	// The interesting part will be shifted in the result
				1616	OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
				1617	getUsefulBits(Op, OpUsefulBits, Depth + 1);
				1618	// The interesting part was at zero in the argument
				1619	OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
				1620	}
				1621
				1622	UsefulBits &= OpUsefulBits;
				1623	}
				1624
				1625	static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
				1626	unsigned Depth) {
				1627	uint64_t Imm =
				1628	cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
				1629	uint64_t MSB =
				1630	cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
				1631
				1632	getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
				1633	}
				1634
				1635	static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
				1636	unsigned Depth) {
				1637	uint64_t ShiftTypeAndValue =
				1638	cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
				1639	APInt Mask(UsefulBits);
				1640	Mask.clearAllBits();
				1641	Mask.flipAllBits();
				1642
				1643	if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
				1644	// Shift Left
				1645	uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
				1646	Mask = Mask.shl(ShiftAmt);
				1647	getUsefulBits(Op, Mask, Depth + 1);
				1648	Mask = Mask.lshr(ShiftAmt);
				1649	} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
				1650	// Shift Right
				1651	// We do not handle AArch64_AM::ASR, because the sign will change the
				1652	// number of useful bits
				1653	uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
				1654	Mask = Mask.lshr(ShiftAmt);
				1655	getUsefulBits(Op, Mask, Depth + 1);
				1656	Mask = Mask.shl(ShiftAmt);
				1657	} else
				1658	return;
				1659
				1660	UsefulBits &= Mask;
				1661	}
				1662
				1663	static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
				1664	unsigned Depth) {
				1665	uint64_t Imm =
				1666	cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
				1667	uint64_t MSB =
				1668	cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
				1669
				1670	if (Op.getOperand(1) == Orig)
				1671	return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
				1672
				1673	APInt OpUsefulBits(UsefulBits);
				1674	OpUsefulBits = 1;
				1675
				1676	if (MSB >= Imm) {
				1677	OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
				1678	--OpUsefulBits;
				1679	UsefulBits &= ~OpUsefulBits;
				1680	getUsefulBits(Op, UsefulBits, Depth + 1);
				1681	} else {
				1682	OpUsefulBits = OpUsefulBits.shl(MSB + 1);
				1683	--OpUsefulBits;
				1684	UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
				1685	getUsefulBits(Op, UsefulBits, Depth + 1);
				1686	}
				1687	}
				1688
				1689	static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
				1690	SDValue Orig, unsigned Depth) {
				1691
				1692	// Users of this node should have already been instruction selected
				1693	// FIXME: Can we turn that into an assert?
				1694	if (!UserNode->isMachineOpcode())
				1695	return;
				1696
				1697	switch (UserNode->getMachineOpcode()) {
				1698	default:
				1699	return;
				1700	case AArch64::ANDSWri:
				1701	case AArch64::ANDSXri:
				1702	case AArch64::ANDWri:
				1703	case AArch64::ANDXri:
				1704	// We increment Depth only when we call the getUsefulBits
				1705	return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
				1706	Depth);
				1707	case AArch64::UBFMWri:
				1708	case AArch64::UBFMXri:
				1709	return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
				1710
				1711	case AArch64::ORRWrs:
				1712	case AArch64::ORRXrs:
				1713	if (UserNode->getOperand(1) != Orig)
				1714	return;
				1715	return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
				1716	Depth);
				1717	case AArch64::BFMWri:
				1718	case AArch64::BFMXri:
				1719	return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
				1720	}
				1721	}
				1722
				1723	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
				1724	if (Depth >= 6)
				1725	return;
				1726	// Initialize UsefulBits
				1727	if (!Depth) {
				1728	unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
				1729	// At the beginning, assume every produced bits is useful
				1730	UsefulBits = APInt(Bitwidth, 0);
				1731	UsefulBits.flipAllBits();
				1732	}
				1733	APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
				1734
				1735	for (SDNode *Node : Op.getNode()->uses()) {
				1736	// A use cannot produce useful bits
				1737	APInt UsefulBitsForUse = APInt(UsefulBits);
				1738	getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
				1739	UsersUsefulBits \|= UsefulBitsForUse;
				1740	}
				1741	// UsefulBits contains the produced bits that are meaningful for the
				1742	// current definition, thus a user cannot make a bit meaningful at
				1743	// this point
				1744	UsefulBits &= UsersUsefulBits;
				1745	}
				1746
				1747	/// Create a machine node performing a notional SHL of Op by ShlAmount. If
				1748	/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
				1749	/// 0, return Op unchanged.
				1750	static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
				1751	if (ShlAmount == 0)
				1752	return Op;
				1753
				1754	EVT VT = Op.getValueType();
				1755	unsigned BitWidth = VT.getSizeInBits();
				1756	unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
				1757
				1758	SDNode *ShiftNode;
				1759	if (ShlAmount > 0) {
				1760	// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
				1761	ShiftNode = CurDAG->getMachineNode(
				1762	UBFMOpc, SDLoc(Op), VT, Op,
				1763	CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
				1764	CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
				1765	} else {
				1766	// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
				1767	assert(ShlAmount < 0 && "expected right shift");
				1768	int ShrAmount = -ShlAmount;
				1769	ShiftNode = CurDAG->getMachineNode(
				1770	UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
				1771	CurDAG->getTargetConstant(BitWidth - 1, VT));
				1772	}
				1773
				1774	return SDValue(ShiftNode, 0);
				1775	}
				1776
				1777	/// Does this tree qualify as an attempt to move a bitfield into position,
				1778	/// essentially "(and (shl VAL, N), Mask)".
				1779	static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
				1780	SDValue &Src, int &ShiftAmount,
				1781	int &MaskWidth) {
				1782	EVT VT = Op.getValueType();
				1783	unsigned BitWidth = VT.getSizeInBits();
				1784	(void)BitWidth;
				1785	assert(BitWidth == 32 \|\| BitWidth == 64);
				1786
				1787	APInt KnownZero, KnownOne;
				1788	CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
				1789
				1790	// Non-zero in the sense that they're not provably zero, which is the key
				1791	// point if we want to use this value
				1792	uint64_t NonZeroBits = (~KnownZero).getZExtValue();
				1793
				1794	// Discard a constant AND mask if present. It's safe because the node will
				1795	// already have been factored into the computeKnownBits calculation above.
				1796	uint64_t AndImm;
				1797	if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
				1798	assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
				1799	Op = Op.getOperand(0);
				1800	}
				1801
				1802	uint64_t ShlImm;
				1803	if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
				1804	return false;
				1805	Op = Op.getOperand(0);
				1806
				1807	if (!isShiftedMask_64(NonZeroBits))
				1808	return false;
				1809
				1810	ShiftAmount = countTrailingZeros(NonZeroBits);
				1811	MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
				1812
				1813	// BFI encompasses sufficiently many nodes that it's worth inserting an extra
				1814	// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
				1815	// amount.
				1816	Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
				1817
				1818	return true;
				1819	}
				1820
				1821	// Given a OR operation, check if we have the following pattern
				1822	// ubfm c, b, imm, imm2 (or something that does the same jobs, see
				1823	// isBitfieldExtractOp)
				1824	// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
				1825	// countTrailingZeros(mask2) == imm2 - imm + 1
				1826	// f = d \| c
				1827	// if yes, given reference arguments will be update so that one can replace
				1828	// the OR instruction with:
				1829	// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
				1830	static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
				1831	SDValue &Src, unsigned &ImmR,
				1832	unsigned &ImmS, SelectionDAG *CurDAG) {
				1833	assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
				1834
				1835	// Set Opc
				1836	EVT VT = N->getValueType(0);
				1837	if (VT == MVT::i32)
				1838	Opc = AArch64::BFMWri;
				1839	else if (VT == MVT::i64)
				1840	Opc = AArch64::BFMXri;
				1841	else
				1842	return false;
				1843
				1844	// Because of simplify-demanded-bits in DAGCombine, involved masks may not
				1845	// have the expected shape. Try to undo that.
				1846	APInt UsefulBits;
				1847	getUsefulBits(SDValue(N, 0), UsefulBits);
				1848
				1849	unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
				1850	unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
				1851
				1852	// OR is commutative, check both possibilities (does llvm provide a
				1853	// way to do that directely, e.g., via code matcher?)
				1854	SDValue OrOpd1Val = N->getOperand(1);
				1855	SDNode *OrOpd0 = N->getOperand(0).getNode();
				1856	SDNode *OrOpd1 = N->getOperand(1).getNode();
				1857	for (int i = 0; i < 2;
				1858	++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
				1859	unsigned BFXOpc;
				1860	int DstLSB, Width;
				1861	if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
				1862	NumberOfIgnoredLowBits, true)) {
				1863	// Check that the returned opcode is compatible with the pattern,
				1864	// i.e., same type and zero extended (U and not S)
				1865	if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) \|\|
				1866	(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
				1867	continue;
				1868
				1869	// Compute the width of the bitfield insertion
				1870	DstLSB = 0;
				1871	Width = ImmS - ImmR + 1;
				1872	// FIXME: This constraint is to catch bitfield insertion we may
				1873	// want to widen the pattern if we want to grab general bitfied
				1874	// move case
				1875	if (Width <= 0)
				1876	continue;
				1877
				1878	// If the mask on the insertee is correct, we have a BFXIL operation. We
				1879	// can share the ImmR and ImmS values from the already-computed UBFM.
				1880	} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
				1881	DstLSB, Width)) {
				1882	ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
				1883	ImmS = Width - 1;
				1884	} else
				1885	continue;
				1886
				1887	// Check the second part of the pattern
				1888	EVT VT = OrOpd1->getValueType(0);
				1889	assert((VT == MVT::i32 \|\| VT == MVT::i64) && "unexpected OR operand");
				1890
				1891	// Compute the Known Zero for the candidate of the first operand.
				1892	// This allows to catch more general case than just looking for
				1893	// AND with imm. Indeed, simplify-demanded-bits may have removed
				1894	// the AND instruction because it proves it was useless.
				1895	APInt KnownZero, KnownOne;
				1896	CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
				1897
				1898	// Check if there is enough room for the second operand to appear
				1899	// in the first one
				1900	APInt BitsToBeInserted =
				1901	APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
				1902
				1903	if ((BitsToBeInserted & ~KnownZero) != 0)
				1904	continue;
				1905
				1906	// Set the first operand
				1907	uint64_t Imm;
				1908	if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
				1909	isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
				1910	// In that case, we can eliminate the AND
				1911	Dst = OrOpd1->getOperand(0);
				1912	else
				1913	// Maybe the AND has been removed by simplify-demanded-bits
				1914	// or is useful because it discards more bits
				1915	Dst = OrOpd1Val;
				1916
				1917	// both parts match
				1918	return true;
				1919	}
				1920
				1921	return false;
				1922	}
				1923
				1924	SDNode AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode N) {
				1925	if (N->getOpcode() != ISD::OR)
				1926	return nullptr;
				1927
				1928	unsigned Opc;
				1929	unsigned LSB, MSB;
				1930	SDValue Opd0, Opd1;
				1931
				1932	if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
				1933	return nullptr;
				1934
				1935	EVT VT = N->getValueType(0);
				1936	SDValue Ops[] = { Opd0,
				1937	Opd1,
				1938	CurDAG->getTargetConstant(LSB, VT),
				1939	CurDAG->getTargetConstant(MSB, VT) };
				1940	return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
				1941	}
				1942
				1943	SDNode AArch64DAGToDAGISel::SelectLIBM(SDNode N) {
				1944	EVT VT = N->getValueType(0);
				1945	unsigned Variant;
				1946	unsigned Opc;
				1947	unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
				1948
				1949	if (VT == MVT::f32) {
				1950	Variant = 0;
				1951	} else if (VT == MVT::f64) {
				1952	Variant = 1;
				1953	} else
				1954	return nullptr; // Unrecognized argument type. Fall back on default codegen.
				1955
				1956	// Pick the FRINTX variant needed to set the flags.
				1957	unsigned FRINTXOpc = FRINTXOpcs[Variant];
				1958
				1959	switch (N->getOpcode()) {
				1960	default:
				1961	return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
				1962	case ISD::FCEIL: {
				1963	unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
				1964	Opc = FRINTPOpcs[Variant];
				1965	break;
				1966	}
				1967	case ISD::FFLOOR: {
				1968	unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
				1969	Opc = FRINTMOpcs[Variant];
				1970	break;
				1971	}
				1972	case ISD::FTRUNC: {
				1973	unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
				1974	Opc = FRINTZOpcs[Variant];
				1975	break;
				1976	}
				1977	case ISD::FROUND: {
				1978	unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
				1979	Opc = FRINTAOpcs[Variant];
				1980	break;
				1981	}
				1982	}
				1983
				1984	SDLoc dl(N);
				1985	SDValue In = N->getOperand(0);
				1986	SmallVector<SDValue, 2> Ops;
				1987	Ops.push_back(In);
				1988
				1989	if (!TM.Options.UnsafeFPMath) {
				1990	SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
				1991	Ops.push_back(SDValue(FRINTX, 1));
				1992	}
				1993
				1994	return CurDAG->getMachineNode(Opc, dl, VT, Ops);
				1995	}
				1996
				1997	bool
				1998	AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
				1999	unsigned RegWidth) {
				2000	APFloat FVal(0.0);
				2001	if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
				2002	FVal = CN->getValueAPF();
				2003	else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
				2004	// Some otherwise illegal constants are allowed in this case.
				2005	if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow \|\|
				2006	!isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
				2007	return false;
				2008
				2009	ConstantPoolSDNode *CN =
				2010	dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
				2011	FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
				2012	} else
				2013	return false;
				2014
				2015	// An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
				2016	// is between 1 and 32 for a destination w-register, or 1 and 64 for an
				2017	// x-register.
				2018	//
				2019	// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
				2020	// want THIS_NODE to be 2^fbits. This is much easier to deal with using
				2021	// integers.
				2022	bool IsExact;
				2023
				2024	// fbits is between 1 and 64 in the worst-case, which means the fmul
				2025	// could have 2^64 as an actual operand. Need 65 bits of precision.
				2026	APSInt IntVal(65, true);
				2027	FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
				2028
				2029	// N.b. isPowerOf2 also checks for > 0.
				2030	if (!IsExact \|\| !IntVal.isPowerOf2()) return false;
				2031	unsigned FBits = IntVal.logBase2();
				2032
				2033	// Checks above should have guaranteed that we haven't lost information in
				2034	// finding FBits, but it must still be in range.
				2035	if (FBits == 0 \|\| FBits > RegWidth) return false;
				2036
				2037	FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
				2038	return true;
				2039	}
				2040
				2041	SDNode AArch64DAGToDAGISel::Select(SDNode Node) {
				2042	// Dump information about the Node being selected
				2043	DEBUG(errs() << "Selecting: ");
				2044	DEBUG(Node->dump(CurDAG));
				2045	DEBUG(errs() << "\n");
				2046
				2047	// If we have a custom node, we already have selected!
				2048	if (Node->isMachineOpcode()) {
				2049	DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
				2050	Node->setNodeId(-1);
				2051	return nullptr;
				2052	}
				2053
				2054	// Few custom selection stuff.
				2055	SDNode *ResNode = nullptr;
				2056	EVT VT = Node->getValueType(0);
				2057
				2058	switch (Node->getOpcode()) {
				2059	default:
				2060	break;
				2061
				2062	case ISD::ADD:
				2063	if (SDNode *I = SelectMLAV64LaneV128(Node))
				2064	return I;
				2065	break;
				2066
				2067	case ISD::LOAD: {
				2068	// Try to select as an indexed load. Fall through to normal processing
				2069	// if we can't.
				2070	bool Done = false;
				2071	SDNode *I = SelectIndexedLoad(Node, Done);
				2072	if (Done)
				2073	return I;
				2074	break;
				2075	}
				2076
				2077	case ISD::SRL:
				2078	case ISD::AND:
				2079	case ISD::SRA:
				2080	if (SDNode *I = SelectBitfieldExtractOp(Node))
				2081	return I;
				2082	break;
				2083
				2084	case ISD::OR:
				2085	if (SDNode *I = SelectBitfieldInsertOp(Node))
				2086	return I;
				2087	break;
				2088
				2089	case ISD::EXTRACT_VECTOR_ELT: {
				2090	// Extracting lane zero is a special case where we can just use a plain
				2091	// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
				2092	// the rest of the compiler, especially the register allocator and copyi
				2093	// propagation, to reason about, so is preferred when it's possible to
				2094	// use it.
				2095	ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
				2096	// Bail and use the default Select() for non-zero lanes.
				2097	if (LaneNode->getZExtValue() != 0)
				2098	break;
				2099	// If the element type is not the same as the result type, likewise
				2100	// bail and use the default Select(), as there's more to do than just
				2101	// a cross-class COPY. This catches extracts of i8 and i16 elements
				2102	// since they will need an explicit zext.
				2103	if (VT != Node->getOperand(0).getValueType().getVectorElementType())
				2104	break;
				2105	unsigned SubReg;
				2106	switch (Node->getOperand(0)
				2107	.getValueType()
				2108	.getVectorElementType()
				2109	.getSizeInBits()) {
				2110	default:
Craig Topper	2a30d78	2014-06-18 05:05:13 +0000	[diff] [blame^]	2111	llvm_unreachable("Unexpected vector element type!");
Tim Northover	3b0846e	2014-05-24 12:50:23 +0000	[diff] [blame]	2112	case 64:
				2113	SubReg = AArch64::dsub;
				2114	break;
				2115	case 32:
				2116	SubReg = AArch64::ssub;
				2117	break;
				2118	case 16: // FALLTHROUGH
				2119	case 8:
				2120	llvm_unreachable("unexpected zext-requiring extract element!");
				2121	}
				2122	SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
				2123	Node->getOperand(0));
				2124	DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
				2125	DEBUG(Extract->dumpr(CurDAG));
				2126	DEBUG(dbgs() << "\n");
				2127	return Extract.getNode();
				2128	}
				2129	case ISD::Constant: {
				2130	// Materialize zero constants as copies from WZR/XZR. This allows
				2131	// the coalescer to propagate these into other instructions.
				2132	ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
				2133	if (ConstNode->isNullValue()) {
				2134	if (VT == MVT::i32)
				2135	return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
				2136	AArch64::WZR, MVT::i32).getNode();
				2137	else if (VT == MVT::i64)
				2138	return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
				2139	AArch64::XZR, MVT::i64).getNode();
				2140	}
				2141	break;
				2142	}
				2143
				2144	case ISD::FrameIndex: {
				2145	// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
				2146	int FI = cast<FrameIndexSDNode>(Node)->getIndex();
				2147	unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
				2148	const TargetLowering *TLI = getTargetLowering();
				2149	SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
				2150	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
				2151	CurDAG->getTargetConstant(Shifter, MVT::i32) };
				2152	return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
				2153	}
				2154	case ISD::INTRINSIC_W_CHAIN: {
				2155	unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
				2156	switch (IntNo) {
				2157	default:
				2158	break;
				2159	case Intrinsic::aarch64_ldaxp:
				2160	case Intrinsic::aarch64_ldxp: {
				2161	unsigned Op =
				2162	IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
				2163	SDValue MemAddr = Node->getOperand(2);
				2164	SDLoc DL(Node);
				2165	SDValue Chain = Node->getOperand(0);
				2166
				2167	SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
				2168	MVT::Other, MemAddr, Chain);
				2169
				2170	// Transfer memoperands.
				2171	MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
				2172	MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
				2173	cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
				2174	return Ld;
				2175	}
				2176	case Intrinsic::aarch64_stlxp:
				2177	case Intrinsic::aarch64_stxp: {
				2178	unsigned Op =
				2179	IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
				2180	SDLoc DL(Node);
				2181	SDValue Chain = Node->getOperand(0);
				2182	SDValue ValLo = Node->getOperand(2);
				2183	SDValue ValHi = Node->getOperand(3);
				2184	SDValue MemAddr = Node->getOperand(4);
				2185
				2186	// Place arguments in the right order.
				2187	SmallVector<SDValue, 7> Ops;
				2188	Ops.push_back(ValLo);
				2189	Ops.push_back(ValHi);
				2190	Ops.push_back(MemAddr);
				2191	Ops.push_back(Chain);
				2192
				2193	SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
				2194	// Transfer memoperands.
				2195	MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
				2196	MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
				2197	cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
				2198
				2199	return St;
				2200	}
				2201	case Intrinsic::aarch64_neon_ld1x2:
				2202	if (VT == MVT::v8i8)
				2203	return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
				2204	else if (VT == MVT::v16i8)
				2205	return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
				2206	else if (VT == MVT::v4i16)
				2207	return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
				2208	else if (VT == MVT::v8i16)
				2209	return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
				2210	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2211	return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
				2212	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2213	return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
				2214	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2215	return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
				2216	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2217	return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
				2218	break;
				2219	case Intrinsic::aarch64_neon_ld1x3:
				2220	if (VT == MVT::v8i8)
				2221	return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
				2222	else if (VT == MVT::v16i8)
				2223	return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
				2224	else if (VT == MVT::v4i16)
				2225	return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
				2226	else if (VT == MVT::v8i16)
				2227	return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
				2228	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2229	return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
				2230	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2231	return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
				2232	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2233	return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
				2234	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2235	return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
				2236	break;
				2237	case Intrinsic::aarch64_neon_ld1x4:
				2238	if (VT == MVT::v8i8)
				2239	return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
				2240	else if (VT == MVT::v16i8)
				2241	return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
				2242	else if (VT == MVT::v4i16)
				2243	return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
				2244	else if (VT == MVT::v8i16)
				2245	return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
				2246	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2247	return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
				2248	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2249	return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
				2250	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2251	return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
				2252	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2253	return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
				2254	break;
				2255	case Intrinsic::aarch64_neon_ld2:
				2256	if (VT == MVT::v8i8)
				2257	return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
				2258	else if (VT == MVT::v16i8)
				2259	return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
				2260	else if (VT == MVT::v4i16)
				2261	return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
				2262	else if (VT == MVT::v8i16)
				2263	return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
				2264	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2265	return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
				2266	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2267	return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
				2268	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2269	return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
				2270	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2271	return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
				2272	break;
				2273	case Intrinsic::aarch64_neon_ld3:
				2274	if (VT == MVT::v8i8)
				2275	return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
				2276	else if (VT == MVT::v16i8)
				2277	return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
				2278	else if (VT == MVT::v4i16)
				2279	return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
				2280	else if (VT == MVT::v8i16)
				2281	return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
				2282	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2283	return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
				2284	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2285	return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
				2286	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2287	return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
				2288	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2289	return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
				2290	break;
				2291	case Intrinsic::aarch64_neon_ld4:
				2292	if (VT == MVT::v8i8)
				2293	return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
				2294	else if (VT == MVT::v16i8)
				2295	return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
				2296	else if (VT == MVT::v4i16)
				2297	return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
				2298	else if (VT == MVT::v8i16)
				2299	return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
				2300	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2301	return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
				2302	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2303	return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
				2304	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2305	return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
				2306	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2307	return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
				2308	break;
				2309	case Intrinsic::aarch64_neon_ld2r:
				2310	if (VT == MVT::v8i8)
				2311	return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
				2312	else if (VT == MVT::v16i8)
				2313	return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
				2314	else if (VT == MVT::v4i16)
				2315	return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
				2316	else if (VT == MVT::v8i16)
				2317	return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
				2318	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2319	return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
				2320	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2321	return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
				2322	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2323	return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
				2324	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2325	return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
				2326	break;
				2327	case Intrinsic::aarch64_neon_ld3r:
				2328	if (VT == MVT::v8i8)
				2329	return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
				2330	else if (VT == MVT::v16i8)
				2331	return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
				2332	else if (VT == MVT::v4i16)
				2333	return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
				2334	else if (VT == MVT::v8i16)
				2335	return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
				2336	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2337	return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
				2338	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2339	return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
				2340	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2341	return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
				2342	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2343	return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
				2344	break;
				2345	case Intrinsic::aarch64_neon_ld4r:
				2346	if (VT == MVT::v8i8)
				2347	return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
				2348	else if (VT == MVT::v16i8)
				2349	return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
				2350	else if (VT == MVT::v4i16)
				2351	return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
				2352	else if (VT == MVT::v8i16)
				2353	return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
				2354	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2355	return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
				2356	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2357	return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
				2358	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2359	return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
				2360	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2361	return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
				2362	break;
				2363	case Intrinsic::aarch64_neon_ld2lane:
				2364	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2365	return SelectLoadLane(Node, 2, AArch64::LD2i8);
				2366	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2367	return SelectLoadLane(Node, 2, AArch64::LD2i16);
				2368	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2369	VT == MVT::v2f32)
				2370	return SelectLoadLane(Node, 2, AArch64::LD2i32);
				2371	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2372	VT == MVT::v1f64)
				2373	return SelectLoadLane(Node, 2, AArch64::LD2i64);
				2374	break;
				2375	case Intrinsic::aarch64_neon_ld3lane:
				2376	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2377	return SelectLoadLane(Node, 3, AArch64::LD3i8);
				2378	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2379	return SelectLoadLane(Node, 3, AArch64::LD3i16);
				2380	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2381	VT == MVT::v2f32)
				2382	return SelectLoadLane(Node, 3, AArch64::LD3i32);
				2383	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2384	VT == MVT::v1f64)
				2385	return SelectLoadLane(Node, 3, AArch64::LD3i64);
				2386	break;
				2387	case Intrinsic::aarch64_neon_ld4lane:
				2388	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2389	return SelectLoadLane(Node, 4, AArch64::LD4i8);
				2390	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2391	return SelectLoadLane(Node, 4, AArch64::LD4i16);
				2392	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2393	VT == MVT::v2f32)
				2394	return SelectLoadLane(Node, 4, AArch64::LD4i32);
				2395	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2396	VT == MVT::v1f64)
				2397	return SelectLoadLane(Node, 4, AArch64::LD4i64);
				2398	break;
				2399	}
				2400	} break;
				2401	case ISD::INTRINSIC_WO_CHAIN: {
				2402	unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
				2403	switch (IntNo) {
				2404	default:
				2405	break;
				2406	case Intrinsic::aarch64_neon_tbl2:
				2407	return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
				2408	: AArch64::TBLv16i8Two,
				2409	false);
				2410	case Intrinsic::aarch64_neon_tbl3:
				2411	return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
				2412	: AArch64::TBLv16i8Three,
				2413	false);
				2414	case Intrinsic::aarch64_neon_tbl4:
				2415	return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
				2416	: AArch64::TBLv16i8Four,
				2417	false);
				2418	case Intrinsic::aarch64_neon_tbx2:
				2419	return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
				2420	: AArch64::TBXv16i8Two,
				2421	true);
				2422	case Intrinsic::aarch64_neon_tbx3:
				2423	return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
				2424	: AArch64::TBXv16i8Three,
				2425	true);
				2426	case Intrinsic::aarch64_neon_tbx4:
				2427	return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
				2428	: AArch64::TBXv16i8Four,
				2429	true);
				2430	case Intrinsic::aarch64_neon_smull:
				2431	case Intrinsic::aarch64_neon_umull:
				2432	if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
				2433	return N;
				2434	break;
				2435	}
				2436	break;
				2437	}
				2438	case ISD::INTRINSIC_VOID: {
				2439	unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
				2440	if (Node->getNumOperands() >= 3)
				2441	VT = Node->getOperand(2)->getValueType(0);
				2442	switch (IntNo) {
				2443	default:
				2444	break;
				2445	case Intrinsic::aarch64_neon_st1x2: {
				2446	if (VT == MVT::v8i8)
				2447	return SelectStore(Node, 2, AArch64::ST1Twov8b);
				2448	else if (VT == MVT::v16i8)
				2449	return SelectStore(Node, 2, AArch64::ST1Twov16b);
				2450	else if (VT == MVT::v4i16)
				2451	return SelectStore(Node, 2, AArch64::ST1Twov4h);
				2452	else if (VT == MVT::v8i16)
				2453	return SelectStore(Node, 2, AArch64::ST1Twov8h);
				2454	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2455	return SelectStore(Node, 2, AArch64::ST1Twov2s);
				2456	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2457	return SelectStore(Node, 2, AArch64::ST1Twov4s);
				2458	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2459	return SelectStore(Node, 2, AArch64::ST1Twov2d);
				2460	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2461	return SelectStore(Node, 2, AArch64::ST1Twov1d);
				2462	break;
				2463	}
				2464	case Intrinsic::aarch64_neon_st1x3: {
				2465	if (VT == MVT::v8i8)
				2466	return SelectStore(Node, 3, AArch64::ST1Threev8b);
				2467	else if (VT == MVT::v16i8)
				2468	return SelectStore(Node, 3, AArch64::ST1Threev16b);
				2469	else if (VT == MVT::v4i16)
				2470	return SelectStore(Node, 3, AArch64::ST1Threev4h);
				2471	else if (VT == MVT::v8i16)
				2472	return SelectStore(Node, 3, AArch64::ST1Threev8h);
				2473	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2474	return SelectStore(Node, 3, AArch64::ST1Threev2s);
				2475	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2476	return SelectStore(Node, 3, AArch64::ST1Threev4s);
				2477	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2478	return SelectStore(Node, 3, AArch64::ST1Threev2d);
				2479	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2480	return SelectStore(Node, 3, AArch64::ST1Threev1d);
				2481	break;
				2482	}
				2483	case Intrinsic::aarch64_neon_st1x4: {
				2484	if (VT == MVT::v8i8)
				2485	return SelectStore(Node, 4, AArch64::ST1Fourv8b);
				2486	else if (VT == MVT::v16i8)
				2487	return SelectStore(Node, 4, AArch64::ST1Fourv16b);
				2488	else if (VT == MVT::v4i16)
				2489	return SelectStore(Node, 4, AArch64::ST1Fourv4h);
				2490	else if (VT == MVT::v8i16)
				2491	return SelectStore(Node, 4, AArch64::ST1Fourv8h);
				2492	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2493	return SelectStore(Node, 4, AArch64::ST1Fourv2s);
				2494	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2495	return SelectStore(Node, 4, AArch64::ST1Fourv4s);
				2496	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2497	return SelectStore(Node, 4, AArch64::ST1Fourv2d);
				2498	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2499	return SelectStore(Node, 4, AArch64::ST1Fourv1d);
				2500	break;
				2501	}
				2502	case Intrinsic::aarch64_neon_st2: {
				2503	if (VT == MVT::v8i8)
				2504	return SelectStore(Node, 2, AArch64::ST2Twov8b);
				2505	else if (VT == MVT::v16i8)
				2506	return SelectStore(Node, 2, AArch64::ST2Twov16b);
				2507	else if (VT == MVT::v4i16)
				2508	return SelectStore(Node, 2, AArch64::ST2Twov4h);
				2509	else if (VT == MVT::v8i16)
				2510	return SelectStore(Node, 2, AArch64::ST2Twov8h);
				2511	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2512	return SelectStore(Node, 2, AArch64::ST2Twov2s);
				2513	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2514	return SelectStore(Node, 2, AArch64::ST2Twov4s);
				2515	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2516	return SelectStore(Node, 2, AArch64::ST2Twov2d);
				2517	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2518	return SelectStore(Node, 2, AArch64::ST1Twov1d);
				2519	break;
				2520	}
				2521	case Intrinsic::aarch64_neon_st3: {
				2522	if (VT == MVT::v8i8)
				2523	return SelectStore(Node, 3, AArch64::ST3Threev8b);
				2524	else if (VT == MVT::v16i8)
				2525	return SelectStore(Node, 3, AArch64::ST3Threev16b);
				2526	else if (VT == MVT::v4i16)
				2527	return SelectStore(Node, 3, AArch64::ST3Threev4h);
				2528	else if (VT == MVT::v8i16)
				2529	return SelectStore(Node, 3, AArch64::ST3Threev8h);
				2530	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2531	return SelectStore(Node, 3, AArch64::ST3Threev2s);
				2532	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2533	return SelectStore(Node, 3, AArch64::ST3Threev4s);
				2534	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2535	return SelectStore(Node, 3, AArch64::ST3Threev2d);
				2536	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2537	return SelectStore(Node, 3, AArch64::ST1Threev1d);
				2538	break;
				2539	}
				2540	case Intrinsic::aarch64_neon_st4: {
				2541	if (VT == MVT::v8i8)
				2542	return SelectStore(Node, 4, AArch64::ST4Fourv8b);
				2543	else if (VT == MVT::v16i8)
				2544	return SelectStore(Node, 4, AArch64::ST4Fourv16b);
				2545	else if (VT == MVT::v4i16)
				2546	return SelectStore(Node, 4, AArch64::ST4Fourv4h);
				2547	else if (VT == MVT::v8i16)
				2548	return SelectStore(Node, 4, AArch64::ST4Fourv8h);
				2549	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2550	return SelectStore(Node, 4, AArch64::ST4Fourv2s);
				2551	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2552	return SelectStore(Node, 4, AArch64::ST4Fourv4s);
				2553	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2554	return SelectStore(Node, 4, AArch64::ST4Fourv2d);
				2555	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2556	return SelectStore(Node, 4, AArch64::ST1Fourv1d);
				2557	break;
				2558	}
				2559	case Intrinsic::aarch64_neon_st2lane: {
				2560	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2561	return SelectStoreLane(Node, 2, AArch64::ST2i8);
				2562	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2563	return SelectStoreLane(Node, 2, AArch64::ST2i16);
				2564	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2565	VT == MVT::v2f32)
				2566	return SelectStoreLane(Node, 2, AArch64::ST2i32);
				2567	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2568	VT == MVT::v1f64)
				2569	return SelectStoreLane(Node, 2, AArch64::ST2i64);
				2570	break;
				2571	}
				2572	case Intrinsic::aarch64_neon_st3lane: {
				2573	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2574	return SelectStoreLane(Node, 3, AArch64::ST3i8);
				2575	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2576	return SelectStoreLane(Node, 3, AArch64::ST3i16);
				2577	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2578	VT == MVT::v2f32)
				2579	return SelectStoreLane(Node, 3, AArch64::ST3i32);
				2580	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2581	VT == MVT::v1f64)
				2582	return SelectStoreLane(Node, 3, AArch64::ST3i64);
				2583	break;
				2584	}
				2585	case Intrinsic::aarch64_neon_st4lane: {
				2586	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2587	return SelectStoreLane(Node, 4, AArch64::ST4i8);
				2588	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2589	return SelectStoreLane(Node, 4, AArch64::ST4i16);
				2590	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2591	VT == MVT::v2f32)
				2592	return SelectStoreLane(Node, 4, AArch64::ST4i32);
				2593	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2594	VT == MVT::v1f64)
				2595	return SelectStoreLane(Node, 4, AArch64::ST4i64);
				2596	break;
				2597	}
				2598	}
				2599	}
				2600	case AArch64ISD::LD2post: {
				2601	if (VT == MVT::v8i8)
				2602	return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
				2603	else if (VT == MVT::v16i8)
				2604	return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
				2605	else if (VT == MVT::v4i16)
				2606	return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
				2607	else if (VT == MVT::v8i16)
				2608	return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
				2609	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2610	return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
				2611	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2612	return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
				2613	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2614	return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
				2615	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2616	return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
				2617	break;
				2618	}
				2619	case AArch64ISD::LD3post: {
				2620	if (VT == MVT::v8i8)
				2621	return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
				2622	else if (VT == MVT::v16i8)
				2623	return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
				2624	else if (VT == MVT::v4i16)
				2625	return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
				2626	else if (VT == MVT::v8i16)
				2627	return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
				2628	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2629	return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
				2630	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2631	return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
				2632	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2633	return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
				2634	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2635	return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
				2636	break;
				2637	}
				2638	case AArch64ISD::LD4post: {
				2639	if (VT == MVT::v8i8)
				2640	return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
				2641	else if (VT == MVT::v16i8)
				2642	return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
				2643	else if (VT == MVT::v4i16)
				2644	return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
				2645	else if (VT == MVT::v8i16)
				2646	return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
				2647	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2648	return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
				2649	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2650	return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
				2651	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2652	return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
				2653	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2654	return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
				2655	break;
				2656	}
				2657	case AArch64ISD::LD1x2post: {
				2658	if (VT == MVT::v8i8)
				2659	return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
				2660	else if (VT == MVT::v16i8)
				2661	return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
				2662	else if (VT == MVT::v4i16)
				2663	return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
				2664	else if (VT == MVT::v8i16)
				2665	return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
				2666	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2667	return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
				2668	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2669	return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
				2670	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2671	return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
				2672	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2673	return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
				2674	break;
				2675	}
				2676	case AArch64ISD::LD1x3post: {
				2677	if (VT == MVT::v8i8)
				2678	return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
				2679	else if (VT == MVT::v16i8)
				2680	return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
				2681	else if (VT == MVT::v4i16)
				2682	return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
				2683	else if (VT == MVT::v8i16)
				2684	return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
				2685	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2686	return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
				2687	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2688	return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
				2689	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2690	return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
				2691	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2692	return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
				2693	break;
				2694	}
				2695	case AArch64ISD::LD1x4post: {
				2696	if (VT == MVT::v8i8)
				2697	return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
				2698	else if (VT == MVT::v16i8)
				2699	return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
				2700	else if (VT == MVT::v4i16)
				2701	return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
				2702	else if (VT == MVT::v8i16)
				2703	return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
				2704	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2705	return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
				2706	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2707	return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
				2708	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2709	return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
				2710	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2711	return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
				2712	break;
				2713	}
				2714	case AArch64ISD::LD1DUPpost: {
				2715	if (VT == MVT::v8i8)
				2716	return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
				2717	else if (VT == MVT::v16i8)
				2718	return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
				2719	else if (VT == MVT::v4i16)
				2720	return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
				2721	else if (VT == MVT::v8i16)
				2722	return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
				2723	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2724	return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
				2725	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2726	return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
				2727	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2728	return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
				2729	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2730	return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
				2731	break;
				2732	}
				2733	case AArch64ISD::LD2DUPpost: {
				2734	if (VT == MVT::v8i8)
				2735	return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
				2736	else if (VT == MVT::v16i8)
				2737	return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
				2738	else if (VT == MVT::v4i16)
				2739	return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
				2740	else if (VT == MVT::v8i16)
				2741	return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
				2742	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2743	return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
				2744	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2745	return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
				2746	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2747	return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
				2748	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2749	return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
				2750	break;
				2751	}
				2752	case AArch64ISD::LD3DUPpost: {
				2753	if (VT == MVT::v8i8)
				2754	return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
				2755	else if (VT == MVT::v16i8)
				2756	return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
				2757	else if (VT == MVT::v4i16)
				2758	return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
				2759	else if (VT == MVT::v8i16)
				2760	return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
				2761	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2762	return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
				2763	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2764	return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
				2765	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2766	return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
				2767	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2768	return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
				2769	break;
				2770	}
				2771	case AArch64ISD::LD4DUPpost: {
				2772	if (VT == MVT::v8i8)
				2773	return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
				2774	else if (VT == MVT::v16i8)
				2775	return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
				2776	else if (VT == MVT::v4i16)
				2777	return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
				2778	else if (VT == MVT::v8i16)
				2779	return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
				2780	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2781	return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
				2782	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2783	return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
				2784	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2785	return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
				2786	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2787	return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
				2788	break;
				2789	}
				2790	case AArch64ISD::LD1LANEpost: {
				2791	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2792	return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
				2793	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2794	return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
				2795	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2796	VT == MVT::v2f32)
				2797	return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
				2798	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2799	VT == MVT::v1f64)
				2800	return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
				2801	break;
				2802	}
				2803	case AArch64ISD::LD2LANEpost: {
				2804	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2805	return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
				2806	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2807	return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
				2808	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2809	VT == MVT::v2f32)
				2810	return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
				2811	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2812	VT == MVT::v1f64)
				2813	return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
				2814	break;
				2815	}
				2816	case AArch64ISD::LD3LANEpost: {
				2817	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2818	return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
				2819	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2820	return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
				2821	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2822	VT == MVT::v2f32)
				2823	return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
				2824	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2825	VT == MVT::v1f64)
				2826	return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
				2827	break;
				2828	}
				2829	case AArch64ISD::LD4LANEpost: {
				2830	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2831	return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
				2832	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2833	return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
				2834	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2835	VT == MVT::v2f32)
				2836	return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
				2837	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2838	VT == MVT::v1f64)
				2839	return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
				2840	break;
				2841	}
				2842	case AArch64ISD::ST2post: {
				2843	VT = Node->getOperand(1).getValueType();
				2844	if (VT == MVT::v8i8)
				2845	return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
				2846	else if (VT == MVT::v16i8)
				2847	return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
				2848	else if (VT == MVT::v4i16)
				2849	return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
				2850	else if (VT == MVT::v8i16)
				2851	return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
				2852	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2853	return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
				2854	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2855	return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
				2856	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2857	return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
				2858	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2859	return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
				2860	break;
				2861	}
				2862	case AArch64ISD::ST3post: {
				2863	VT = Node->getOperand(1).getValueType();
				2864	if (VT == MVT::v8i8)
				2865	return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
				2866	else if (VT == MVT::v16i8)
				2867	return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
				2868	else if (VT == MVT::v4i16)
				2869	return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
				2870	else if (VT == MVT::v8i16)
				2871	return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
				2872	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2873	return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
				2874	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2875	return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
				2876	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2877	return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
				2878	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2879	return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
				2880	break;
				2881	}
				2882	case AArch64ISD::ST4post: {
				2883	VT = Node->getOperand(1).getValueType();
				2884	if (VT == MVT::v8i8)
				2885	return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
				2886	else if (VT == MVT::v16i8)
				2887	return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
				2888	else if (VT == MVT::v4i16)
				2889	return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
				2890	else if (VT == MVT::v8i16)
				2891	return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
				2892	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2893	return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
				2894	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2895	return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
				2896	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2897	return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
				2898	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2899	return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
				2900	break;
				2901	}
				2902	case AArch64ISD::ST1x2post: {
				2903	VT = Node->getOperand(1).getValueType();
				2904	if (VT == MVT::v8i8)
				2905	return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
				2906	else if (VT == MVT::v16i8)
				2907	return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
				2908	else if (VT == MVT::v4i16)
				2909	return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
				2910	else if (VT == MVT::v8i16)
				2911	return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
				2912	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2913	return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
				2914	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2915	return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
				2916	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2917	return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
				2918	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2919	return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
				2920	break;
				2921	}
				2922	case AArch64ISD::ST1x3post: {
				2923	VT = Node->getOperand(1).getValueType();
				2924	if (VT == MVT::v8i8)
				2925	return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
				2926	else if (VT == MVT::v16i8)
				2927	return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
				2928	else if (VT == MVT::v4i16)
				2929	return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
				2930	else if (VT == MVT::v8i16)
				2931	return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
				2932	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2933	return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
				2934	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2935	return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
				2936	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2937	return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
				2938	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2939	return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
				2940	break;
				2941	}
				2942	case AArch64ISD::ST1x4post: {
				2943	VT = Node->getOperand(1).getValueType();
				2944	if (VT == MVT::v8i8)
				2945	return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
				2946	else if (VT == MVT::v16i8)
				2947	return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
				2948	else if (VT == MVT::v4i16)
				2949	return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
				2950	else if (VT == MVT::v8i16)
				2951	return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
				2952	else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
				2953	return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
				2954	else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
				2955	return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
				2956	else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
				2957	return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
				2958	else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
				2959	return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
				2960	break;
				2961	}
				2962	case AArch64ISD::ST2LANEpost: {
				2963	VT = Node->getOperand(1).getValueType();
				2964	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2965	return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
				2966	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2967	return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
				2968	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2969	VT == MVT::v2f32)
				2970	return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
				2971	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2972	VT == MVT::v1f64)
				2973	return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
				2974	break;
				2975	}
				2976	case AArch64ISD::ST3LANEpost: {
				2977	VT = Node->getOperand(1).getValueType();
				2978	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2979	return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
				2980	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2981	return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
				2982	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2983	VT == MVT::v2f32)
				2984	return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
				2985	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				2986	VT == MVT::v1f64)
				2987	return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
				2988	break;
				2989	}
				2990	case AArch64ISD::ST4LANEpost: {
				2991	VT = Node->getOperand(1).getValueType();
				2992	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
				2993	return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
				2994	else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
				2995	return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
				2996	else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
				2997	VT == MVT::v2f32)
				2998	return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
				2999	else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
				3000	VT == MVT::v1f64)
				3001	return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
				3002	break;
				3003	}
				3004
				3005	case ISD::FCEIL:
				3006	case ISD::FFLOOR:
				3007	case ISD::FTRUNC:
				3008	case ISD::FROUND:
				3009	if (SDNode *I = SelectLIBM(Node))
				3010	return I;
				3011	break;
				3012	}
				3013
				3014	// Select the default instruction
				3015	ResNode = SelectCode(Node);
				3016
				3017	DEBUG(errs() << "=> ");
				3018	if (ResNode == nullptr \|\| ResNode == Node)
				3019	DEBUG(Node->dump(CurDAG));
				3020	else
				3021	DEBUG(ResNode->dump(CurDAG));
				3022	DEBUG(errs() << "\n");
				3023
				3024	return ResNode;
				3025	}
				3026
				3027	/// createAArch64ISelDag - This pass converts a legalized DAG into a
				3028	/// AArch64-specific DAG, ready for instruction scheduling.
				3029	FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
				3030	CodeGenOpt::Level OptLevel) {
				3031	return new AArch64DAGToDAGISel(TM, OptLevel);
				3032	}