Blame - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp - toolchain/llvm-project

blob: fc7cb88c827d22420f7626f20d40af3d3435b702 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief SI Implementation of TargetInstrInfo.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "SIInstrInfo.h"
				17	#include "AMDGPUTargetMachine.h"
Tom Stellard	16a9a20	2013-08-14 23:24:17 +0000	[diff] [blame]	18	#include "SIDefines.h"
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	19	#include "SIMachineFunctionInfo.h"
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	21	#include "llvm/CodeGen/MachineInstrBuilder.h"
				22	#include "llvm/CodeGen/MachineRegisterInfo.h"
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	23	#include "llvm/IR/Function.h"
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	24	#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	25	#include "llvm/MC/MCInstrDesc.h"
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	26	#include "llvm/Support/Debug.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	27
				28	using namespace llvm;
				29
Tom Stellard	2e59a45	2014-06-13 01:32:00 +0000	[diff] [blame]	30	SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
Eric Christopher	6c5b511	2015-03-11 18:43:21 +0000	[diff] [blame]	31	: AMDGPUInstrInfo(st), RI() {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	32
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	33	//===----------------------------------------------------------------------===//
				34	// TargetInstrInfo callbacks
				35	//===----------------------------------------------------------------------===//
				36
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	37	static unsigned getNumOperandsNoGlue(SDNode *Node) {
				38	unsigned N = Node->getNumOperands();
				39	while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
				40	--N;
				41	return N;
				42	}
				43
				44	static SDValue findChainOperand(SDNode *Load) {
				45	SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
				46	assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
				47	return LastOp;
				48	}
				49
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	50	/// \brief Returns true if both nodes have the same value for the given
				51	/// operand \p Op, or if both nodes do not have this operand.
				52	static bool nodesHaveSameOperandValue(SDNode N0, SDNode N1, unsigned OpName) {
				53	unsigned Opc0 = N0->getMachineOpcode();
				54	unsigned Opc1 = N1->getMachineOpcode();
				55
				56	int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
				57	int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
				58
				59	if (Op0Idx == -1 && Op1Idx == -1)
				60	return true;
				61
				62
				63	if ((Op0Idx == -1 && Op1Idx != -1) \|\|
				64	(Op1Idx == -1 && Op0Idx != -1))
				65	return false;
				66
				67	// getNamedOperandIdx returns the index for the MachineInstr's operands,
				68	// which includes the result as the first operand. We are indexing into the
				69	// MachineSDNode's operands, so we need to skip the result operand to get
				70	// the real index.
				71	--Op0Idx;
				72	--Op1Idx;
				73
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	74	return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	75	}
				76
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	77	bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
				78	AliasAnalysis *AA) const {
				79	// TODO: The generic check fails for VALU instructions that should be
				80	// rematerializable due to implicit reads of exec. We really want all of the
				81	// generic logic for this except for this.
				82	switch (MI->getOpcode()) {
				83	case AMDGPU::V_MOV_B32_e32:
				84	case AMDGPU::V_MOV_B32_e64:
Matt Arsenault	80f766a	2015-09-10 01:23:28 +0000	[diff] [blame]	85	case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	86	return true;
				87	default:
				88	return false;
				89	}
				90	}
				91
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	92	bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1,
				93	int64_t &Offset0,
				94	int64_t &Offset1) const {
				95	if (!Load0->isMachineOpcode() \|\| !Load1->isMachineOpcode())
				96	return false;
				97
				98	unsigned Opc0 = Load0->getMachineOpcode();
				99	unsigned Opc1 = Load1->getMachineOpcode();
				100
				101	// Make sure both are actually loads.
				102	if (!get(Opc0).mayLoad() \|\| !get(Opc1).mayLoad())
				103	return false;
				104
				105	if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard	20fa0be	2014-10-07 21:09:20 +0000	[diff] [blame]	106
				107	// FIXME: Handle this case:
				108	if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
				109	return false;
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	110
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	111	// Check base reg.
				112	if (Load0->getOperand(1) != Load1->getOperand(1))
				113	return false;
				114
				115	// Check chain.
				116	if (findChainOperand(Load0) != findChainOperand(Load1))
				117	return false;
				118
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	119	// Skip read2 / write2 variants for simplicity.
				120	// TODO: We should report true if the used offsets are adjacent (excluded
				121	// st64 versions).
				122	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 \|\|
				123	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
				124	return false;
				125
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	126	Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
				127	Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
				128	return true;
				129	}
				130
				131	if (isSMRD(Opc0) && isSMRD(Opc1)) {
				132	assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
				133
				134	// Check base reg.
				135	if (Load0->getOperand(0) != Load1->getOperand(0))
				136	return false;
				137
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	138	const ConstantSDNode *Load0Offset =
				139	dyn_cast<ConstantSDNode>(Load0->getOperand(1));
				140	const ConstantSDNode *Load1Offset =
				141	dyn_cast<ConstantSDNode>(Load1->getOperand(1));
				142
				143	if (!Load0Offset \|\| !Load1Offset)
				144	return false;
				145
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	146	// Check chain.
				147	if (findChainOperand(Load0) != findChainOperand(Load1))
				148	return false;
				149
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	150	Offset0 = Load0Offset->getZExtValue();
				151	Offset1 = Load1Offset->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	152	return true;
				153	}
				154
				155	// MUBUF and MTBUF can access the same addresses.
				156	if ((isMUBUF(Opc0) \|\| isMTBUF(Opc0)) && (isMUBUF(Opc1) \|\| isMTBUF(Opc1))) {
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	157
				158	// MUBUF and MTBUF have vaddr at different indices.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	159	if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) \|\|
				160	findChainOperand(Load0) != findChainOperand(Load1) \|\|
				161	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) \|\|
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	162	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	163	return false;
				164
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	165	int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				166	int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				167
				168	if (OffIdx0 == -1 \|\| OffIdx1 == -1)
				169	return false;
				170
				171	// getNamedOperandIdx returns the index for MachineInstrs. Since they
				172	// inlcude the output in the operand list, but SDNodes don't, we need to
				173	// subtract the index by one.
				174	--OffIdx0;
				175	--OffIdx1;
				176
				177	SDValue Off0 = Load0->getOperand(OffIdx0);
				178	SDValue Off1 = Load1->getOperand(OffIdx1);
				179
				180	// The offset might be a FrameIndexSDNode.
				181	if (!isa<ConstantSDNode>(Off0) \|\| !isa<ConstantSDNode>(Off1))
				182	return false;
				183
				184	Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
				185	Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	186	return true;
				187	}
				188
				189	return false;
				190	}
				191
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	192	static bool isStride64(unsigned Opc) {
				193	switch (Opc) {
				194	case AMDGPU::DS_READ2ST64_B32:
				195	case AMDGPU::DS_READ2ST64_B64:
				196	case AMDGPU::DS_WRITE2ST64_B32:
				197	case AMDGPU::DS_WRITE2ST64_B64:
				198	return true;
				199	default:
				200	return false;
				201	}
				202	}
				203
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	204	bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
Chad Rosier	c27a18f	2016-03-09 16:00:35 +0000	[diff] [blame]	205	int64_t &Offset,
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	206	const TargetRegisterInfo *TRI) const {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	207	unsigned Opc = LdSt->getOpcode();
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	208
				209	if (isDS(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	210	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				211	AMDGPU::OpName::offset);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	212	if (OffsetImm) {
				213	// Normal, single offset LDS instruction.
				214	const MachineOperand AddrReg = getNamedOperand(LdSt,
				215	AMDGPU::OpName::addr);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	216
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	217	BaseReg = AddrReg->getReg();
				218	Offset = OffsetImm->getImm();
				219	return true;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	220	}
				221
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	222	// The 2 offset instructions use offset0 and offset1 instead. We can treat
				223	// these as a load with a single offset if the 2 offsets are consecutive. We
				224	// will use this for some partially aligned loads.
				225	const MachineOperand Offset0Imm = getNamedOperand(LdSt,
				226	AMDGPU::OpName::offset0);
Changpeng Fang	24f035a	2016-03-01 17:51:23 +0000	[diff] [blame]	227	// DS_PERMUTE does not have Offset0Imm (and Offset1Imm).
				228	if (!Offset0Imm)
				229	return false;
				230
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	231	const MachineOperand Offset1Imm = getNamedOperand(LdSt,
				232	AMDGPU::OpName::offset1);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	233
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	234	uint8_t Offset0 = Offset0Imm->getImm();
				235	uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	236
Matt Arsenault	84db5d9	2015-07-14 17:57:36 +0000	[diff] [blame]	237	if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	238	// Each of these offsets is in element sized units, so we need to convert
				239	// to bytes of the individual reads.
				240
				241	unsigned EltSize;
				242	if (LdSt->mayLoad())
				243	EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
				244	else {
				245	assert(LdSt->mayStore());
				246	int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
				247	EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
				248	}
				249
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	250	if (isStride64(Opc))
				251	EltSize *= 64;
				252
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	253	const MachineOperand AddrReg = getNamedOperand(LdSt,
				254	AMDGPU::OpName::addr);
				255	BaseReg = AddrReg->getReg();
				256	Offset = EltSize * Offset0;
				257	return true;
				258	}
				259
				260	return false;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	261	}
				262
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	263	if (isMUBUF(LdSt) \|\| isMTBUF(LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	264	if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
				265	return false;
				266
				267	const MachineOperand AddrReg = getNamedOperand(LdSt,
				268	AMDGPU::OpName::vaddr);
				269	if (!AddrReg)
				270	return false;
				271
				272	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				273	AMDGPU::OpName::offset);
				274	BaseReg = AddrReg->getReg();
				275	Offset = OffsetImm->getImm();
				276	return true;
				277	}
				278
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	279	if (isSMRD(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	280	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				281	AMDGPU::OpName::offset);
				282	if (!OffsetImm)
				283	return false;
				284
				285	const MachineOperand SBaseReg = getNamedOperand(LdSt,
				286	AMDGPU::OpName::sbase);
				287	BaseReg = SBaseReg->getReg();
				288	Offset = OffsetImm->getImm();
				289	return true;
				290	}
				291
				292	return false;
				293	}
				294
Jun Bum Lim	4c5bd58	2016-04-15 14:58:38 +0000	[diff] [blame]	295	bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt,
				296	MachineInstr *SecondLdSt,
				297	unsigned NumLoads) const {
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	298	const MachineOperand *FirstDst = nullptr;
				299	const MachineOperand *SecondDst = nullptr;
				300
				301	if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
				302	FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdst);
				303	SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdst);
				304	}
				305
				306	if (isSMRD(FirstLdSt) && isSMRD(FirstLdSt)) {
				307	FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::sdst);
				308	SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::sdst);
				309	}
				310
				311	if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) \|\|
				312	(isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) {
				313	FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdata);
				314	SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdata);
				315	}
				316
				317	if (!FirstDst \|\| !SecondDst)
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	318	return false;
				319
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	320	// Try to limit clustering based on the total number of bytes loaded
				321	// rather than the number of instructions. This is done to help reduce
				322	// register pressure. The method used is somewhat inexact, though,
				323	// because it assumes that all loads in the cluster will load the
				324	// same number of bytes as FirstLdSt.
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	325
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	326	// The unit of this value is bytes.
				327	// FIXME: This needs finer tuning.
				328	unsigned LoadClusterThreshold = 16;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	329
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	330	const MachineRegisterInfo &MRI =
				331	FirstLdSt->getParent()->getParent()->getRegInfo();
				332	const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
				333
				334	return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	335	}
				336
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	337	void
				338	SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	339	MachineBasicBlock::iterator MI, DebugLoc DL,
				340	unsigned DestReg, unsigned SrcReg,
				341	bool KillSrc) const {
				342
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	343	// If we are trying to copy to or from SCC, there is a bug somewhere else in
				344	// the backend. While it may be theoretically possible to do this, it should
				345	// never be necessary.
				346	assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
				347
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	348	static const int16_t Sub0_15[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	349	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
				350	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
				351	AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	352	AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	353	};
				354
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	355	static const int16_t Sub0_15_64[] = {
				356	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				357	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				358	AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
				359	AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
				360	};
				361
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	362	static const int16_t Sub0_7[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	363	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	364	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	365	};
				366
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	367	static const int16_t Sub0_7_64[] = {
				368	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				369	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				370	};
				371
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	372	static const int16_t Sub0_3[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	373	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	374	};
				375
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	376	static const int16_t Sub0_3_64[] = {
				377	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				378	};
				379
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	380	static const int16_t Sub0_2[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	381	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	382	};
				383
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	384	static const int16_t Sub0_1[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	385	AMDGPU::sub0, AMDGPU::sub1,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	386	};
				387
				388	unsigned Opcode;
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	389	ArrayRef<int16_t> SubIndices;
				390	bool Forward;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	391
				392	if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
				393	assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
				394	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				395	.addReg(SrcReg, getKillRegState(KillSrc));
				396	return;
				397
Tom Stellard	aac1889	2013-02-07 19:39:43 +0000	[diff] [blame]	398	} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	399	if (DestReg == AMDGPU::VCC) {
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	400	if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				401	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
				402	.addReg(SrcReg, getKillRegState(KillSrc));
				403	} else {
				404	// FIXME: Hack until VReg_1 removed.
				405	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault	4635915	2015-08-08 00:41:48 +0000	[diff] [blame]	406	BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	407	.addImm(0)
				408	.addReg(SrcReg, getKillRegState(KillSrc));
				409	}
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	410
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	411	return;
				412	}
				413
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	414	assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
				415	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				416	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	417	return;
				418
				419	} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
				420	assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	421	Opcode = AMDGPU::S_MOV_B64;
				422	SubIndices = Sub0_3_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	423
				424	} else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
				425	assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	426	Opcode = AMDGPU::S_MOV_B64;
				427	SubIndices = Sub0_7_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	428
				429	} else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
				430	assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	431	Opcode = AMDGPU::S_MOV_B64;
				432	SubIndices = Sub0_15_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	433
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	434	} else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
				435	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	436	AMDGPU::SReg_32RegClass.contains(SrcReg));
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	437	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				438	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	439	return;
				440
				441	} else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
				442	assert(AMDGPU::VReg_64RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	443	AMDGPU::SReg_64RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	444	Opcode = AMDGPU::V_MOV_B32_e32;
				445	SubIndices = Sub0_1;
				446
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	447	} else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
				448	assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
				449	Opcode = AMDGPU::V_MOV_B32_e32;
				450	SubIndices = Sub0_2;
				451
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	452	} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
				453	assert(AMDGPU::VReg_128RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	454	AMDGPU::SReg_128RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	455	Opcode = AMDGPU::V_MOV_B32_e32;
				456	SubIndices = Sub0_3;
				457
				458	} else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
				459	assert(AMDGPU::VReg_256RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	460	AMDGPU::SReg_256RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	461	Opcode = AMDGPU::V_MOV_B32_e32;
				462	SubIndices = Sub0_7;
				463
				464	} else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
				465	assert(AMDGPU::VReg_512RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	466	AMDGPU::SReg_512RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	467	Opcode = AMDGPU::V_MOV_B32_e32;
				468	SubIndices = Sub0_15;
				469
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	470	} else {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	471	llvm_unreachable("Can't copy register!");
				472	}
				473
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	474	if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
				475	Forward = true;
				476	else
				477	Forward = false;
				478
				479	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				480	unsigned SubIdx;
				481	if (Forward)
				482	SubIdx = SubIndices[Idx];
				483	else
				484	SubIdx = SubIndices[SubIndices.size() - Idx - 1];
				485
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	486	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				487	get(Opcode), RI.getSubReg(DestReg, SubIdx));
				488
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	489	Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	490
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	491	if (Idx == SubIndices.size() - 1)
				492	Builder.addReg(SrcReg, RegState::Kill \| RegState::Implicit);
				493
				494	if (Idx == 0)
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	495	Builder.addReg(DestReg, RegState::Define \| RegState::Implicit);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	496	}
				497	}
				498
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	499	int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
Matt Arsenault	f5b2cd8	2015-03-23 18:45:30 +0000	[diff] [blame]	500	const unsigned Opcode = MI.getOpcode();
				501
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	502	int NewOpc;
				503
				504	// Try to map original to commuted opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	505	NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	506	if (NewOpc != -1)
				507	// Check if the commuted (REV) opcode exists on the target.
				508	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	509
				510	// Try to map commuted to original opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	511	NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	512	if (NewOpc != -1)
				513	// Check if the original (non-REV) opcode exists on the target.
				514	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	515
				516	return Opcode;
				517	}
				518
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	519	unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
				520
				521	if (DstRC->getSize() == 4) {
				522	return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
				523	} else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
				524	return AMDGPU::S_MOV_B64;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	525	} else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
				526	return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	527	}
				528	return AMDGPU::COPY;
				529	}
				530
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	531	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
				532	switch (Size) {
				533	case 4:
				534	return AMDGPU::SI_SPILL_S32_SAVE;
				535	case 8:
				536	return AMDGPU::SI_SPILL_S64_SAVE;
				537	case 16:
				538	return AMDGPU::SI_SPILL_S128_SAVE;
				539	case 32:
				540	return AMDGPU::SI_SPILL_S256_SAVE;
				541	case 64:
				542	return AMDGPU::SI_SPILL_S512_SAVE;
				543	default:
				544	llvm_unreachable("unknown register size");
				545	}
				546	}
				547
				548	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
				549	switch (Size) {
				550	case 4:
				551	return AMDGPU::SI_SPILL_V32_SAVE;
				552	case 8:
				553	return AMDGPU::SI_SPILL_V64_SAVE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	554	case 12:
				555	return AMDGPU::SI_SPILL_V96_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	556	case 16:
				557	return AMDGPU::SI_SPILL_V128_SAVE;
				558	case 32:
				559	return AMDGPU::SI_SPILL_V256_SAVE;
				560	case 64:
				561	return AMDGPU::SI_SPILL_V512_SAVE;
				562	default:
				563	llvm_unreachable("unknown register size");
				564	}
				565	}
				566
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	567	void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
				568	MachineBasicBlock::iterator MI,
				569	unsigned SrcReg, bool isKill,
				570	int FrameIndex,
				571	const TargetRegisterClass *RC,
				572	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	573	MachineFunction *MF = MBB.getParent();
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	574	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	575	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	576	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	577
				578	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
				579	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				580	MachinePointerInfo PtrInfo
				581	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				582	MachineMemOperand *MMO
				583	= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
				584	Size, Align);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	585
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	586	if (RI.isSGPRClass(RC)) {
Matt Arsenault	5b22dfa	2015-11-05 05:27:10 +0000	[diff] [blame]	587	MFI->setHasSpilledSGPRs();
				588
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	589	// We are only allowed to create one new instruction when spilling
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	590	// registers, so we need to use pseudo instruction for spilling
				591	// SGPRs.
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	592	unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
				593	BuildMI(MBB, MI, DL, get(Opcode))
				594	.addReg(SrcReg) // src
				595	.addFrameIndex(FrameIndex) // frame_idx
				596	.addMemOperand(MMO);
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	597
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	598	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	599	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	600
Nicolai Haehnle	df3a20c	2016-04-06 19:40:20 +0000	[diff] [blame]	601	if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	602	LLVMContext &Ctx = MF->getFunction()->getContext();
				603	Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
				604	" spill register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	605	BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	606	.addReg(SrcReg);
				607
				608	return;
				609	}
				610
				611	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				612
				613	unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
				614	MFI->setHasSpilledVGPRs();
				615	BuildMI(MBB, MI, DL, get(Opcode))
				616	.addReg(SrcReg) // src
				617	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	618	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				619	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Tom Stellard	649b5db	2016-03-04 18:31:18 +0000	[diff] [blame]	620	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	621	.addMemOperand(MMO);
				622	}
				623
				624	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
				625	switch (Size) {
				626	case 4:
				627	return AMDGPU::SI_SPILL_S32_RESTORE;
				628	case 8:
				629	return AMDGPU::SI_SPILL_S64_RESTORE;
				630	case 16:
				631	return AMDGPU::SI_SPILL_S128_RESTORE;
				632	case 32:
				633	return AMDGPU::SI_SPILL_S256_RESTORE;
				634	case 64:
				635	return AMDGPU::SI_SPILL_S512_RESTORE;
				636	default:
				637	llvm_unreachable("unknown register size");
				638	}
				639	}
				640
				641	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
				642	switch (Size) {
				643	case 4:
				644	return AMDGPU::SI_SPILL_V32_RESTORE;
				645	case 8:
				646	return AMDGPU::SI_SPILL_V64_RESTORE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	647	case 12:
				648	return AMDGPU::SI_SPILL_V96_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	649	case 16:
				650	return AMDGPU::SI_SPILL_V128_RESTORE;
				651	case 32:
				652	return AMDGPU::SI_SPILL_V256_RESTORE;
				653	case 64:
				654	return AMDGPU::SI_SPILL_V512_RESTORE;
				655	default:
				656	llvm_unreachable("unknown register size");
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	657	}
				658	}
				659
				660	void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
				661	MachineBasicBlock::iterator MI,
				662	unsigned DestReg, int FrameIndex,
				663	const TargetRegisterClass *RC,
				664	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	665	MachineFunction *MF = MBB.getParent();
Tom Stellard	e99fb65	2015-01-20 19:33:04 +0000	[diff] [blame]	666	const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	667	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	668	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	669	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				670	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	671
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	672	MachinePointerInfo PtrInfo
				673	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				674
				675	MachineMemOperand *MMO = MF->getMachineMemOperand(
				676	PtrInfo, MachineMemOperand::MOLoad, Size, Align);
				677
				678	if (RI.isSGPRClass(RC)) {
				679	// FIXME: Maybe this should not include a memoperand because it will be
				680	// lowered to non-memory instructions.
				681	unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
				682	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				683	.addFrameIndex(FrameIndex) // frame_idx
				684	.addMemOperand(MMO);
				685
				686	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	687	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	688
Nicolai Haehnle	df3a20c	2016-04-06 19:40:20 +0000	[diff] [blame]	689	if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	690	LLVMContext &Ctx = MF->getFunction()->getContext();
				691	Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
				692	" restore register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	693	BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	694
				695	return;
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	696	}
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	697
				698	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				699
				700	unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
				701	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				702	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	703	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				704	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Tom Stellard	649b5db	2016-03-04 18:31:18 +0000	[diff] [blame]	705	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	706	.addMemOperand(MMO);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	707	}
				708
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	709	/// \param @Offset Offset in bytes of the FrameIndex being spilled
				710	unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
				711	MachineBasicBlock::iterator MI,
				712	RegScavenger *RS, unsigned TmpReg,
				713	unsigned FrameOffset,
				714	unsigned Size) const {
				715	MachineFunction *MF = MBB.getParent();
				716	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Eric Christopher	7792e32	2015-01-30 23:24:40 +0000	[diff] [blame]	717	const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	718	const SIRegisterInfo *TRI =
				719	static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
				720	DebugLoc DL = MBB.findDebugLoc(MI);
				721	unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
				722	unsigned WavefrontSize = ST.getWavefrontSize();
				723
				724	unsigned TIDReg = MFI->getTIDReg();
				725	if (!MFI->hasCalculatedTID()) {
				726	MachineBasicBlock &Entry = MBB.getParent()->front();
				727	MachineBasicBlock::iterator Insert = Entry.front();
				728	DebugLoc DL = Insert->getDebugLoc();
				729
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	730	TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	731	if (TIDReg == AMDGPU::NoRegister)
				732	return TIDReg;
				733
				734
Nicolai Haehnle	df3a20c	2016-04-06 19:40:20 +0000	[diff] [blame]	735	if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	736	WorkGroupSize > WavefrontSize) {
				737
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	738	unsigned TIDIGXReg
				739	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
				740	unsigned TIDIGYReg
				741	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
				742	unsigned TIDIGZReg
				743	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	744	unsigned InputPtrReg =
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	745	TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer	7149aab	2015-03-01 18:09:56 +0000	[diff] [blame]	746	for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	747	if (!Entry.isLiveIn(Reg))
				748	Entry.addLiveIn(Reg);
				749	}
				750
Matthias Braun	7dc03f0	2016-04-06 02:47:09 +0000	[diff] [blame]	751	RS->enterBasicBlock(Entry);
Matt Arsenault	0c90e95	2015-11-06 18:17:45 +0000	[diff] [blame]	752	// FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	753	unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				754	unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				755	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
				756	.addReg(InputPtrReg)
				757	.addImm(SI::KernelInputOffsets::NGROUPS_Z);
				758	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
				759	.addReg(InputPtrReg)
				760	.addImm(SI::KernelInputOffsets::NGROUPS_Y);
				761
				762	// NGROUPS.X * NGROUPS.Y
				763	BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
				764	.addReg(STmp1)
				765	.addReg(STmp0);
				766	// (NGROUPS.X * NGROUPS.Y) * TIDIG.X
				767	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
				768	.addReg(STmp1)
				769	.addReg(TIDIGXReg);
				770	// NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
				771	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
				772	.addReg(STmp0)
				773	.addReg(TIDIGYReg)
				774	.addReg(TIDReg);
				775	// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
				776	BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
				777	.addReg(TIDReg)
				778	.addReg(TIDIGZReg);
				779	} else {
				780	// Get the wave id
				781	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
				782	TIDReg)
				783	.addImm(-1)
				784	.addImm(0);
				785
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	786	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	787	TIDReg)
				788	.addImm(-1)
				789	.addReg(TIDReg);
				790	}
				791
				792	BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
				793	TIDReg)
				794	.addImm(2)
				795	.addReg(TIDReg);
				796	MFI->setTIDReg(TIDReg);
				797	}
				798
				799	// Add FrameIndex to LDS offset
				800	unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
				801	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
				802	.addImm(LDSOffset)
				803	.addReg(TIDReg);
				804
				805	return TmpReg;
				806	}
				807
Tom Stellard	d37630e	2016-04-07 14:47:07 +0000	[diff] [blame]	808	void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
				809	MachineBasicBlock::iterator MI,
Nicolai Haehnle	87323da	2015-12-17 16:46:42 +0000	[diff] [blame]	810	int Count) const {
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	811	while (Count > 0) {
				812	int Arg;
				813	if (Count >= 8)
				814	Arg = 7;
				815	else
				816	Arg = Count - 1;
				817	Count -= 8;
Tom Stellard	d37630e	2016-04-07 14:47:07 +0000	[diff] [blame]	818	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	819	.addImm(Arg);
				820	}
				821	}
				822
				823	bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	824	MachineBasicBlock &MBB = *MI->getParent();
				825	DebugLoc DL = MBB.findDebugLoc(MI);
				826	switch (MI->getOpcode()) {
				827	default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
				828
Tom Stellard	60024a0	2014-09-24 01:33:24 +0000	[diff] [blame]	829	case AMDGPU::SGPR_USE:
				830	// This is just a placeholder for register allocation.
				831	MI->eraseFromParent();
				832	break;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	833
				834	case AMDGPU::V_MOV_B64_PSEUDO: {
				835	unsigned Dst = MI->getOperand(0).getReg();
				836	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				837	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				838
				839	const MachineOperand &SrcOp = MI->getOperand(1);
				840	// FIXME: Will this work for 64-bit floating point immediates?
				841	assert(!SrcOp.isFPImm());
				842	if (SrcOp.isImm()) {
				843	APInt Imm(64, SrcOp.getImm());
				844	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				845	.addImm(Imm.getLoBits(32).getZExtValue())
				846	.addReg(Dst, RegState::Implicit);
				847	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				848	.addImm(Imm.getHiBits(32).getZExtValue())
				849	.addReg(Dst, RegState::Implicit);
				850	} else {
				851	assert(SrcOp.isReg());
				852	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				853	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
				854	.addReg(Dst, RegState::Implicit);
				855	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				856	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
				857	.addReg(Dst, RegState::Implicit);
				858	}
				859	MI->eraseFromParent();
				860	break;
				861	}
Marek Olsak	7d77728	2015-03-24 13:40:15 +0000	[diff] [blame]	862
				863	case AMDGPU::V_CNDMASK_B64_PSEUDO: {
				864	unsigned Dst = MI->getOperand(0).getReg();
				865	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				866	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				867	unsigned Src0 = MI->getOperand(1).getReg();
				868	unsigned Src1 = MI->getOperand(2).getReg();
				869	const MachineOperand &SrcCond = MI->getOperand(3);
				870
				871	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
				872	.addReg(RI.getSubReg(Src0, AMDGPU::sub0))
				873	.addReg(RI.getSubReg(Src1, AMDGPU::sub0))
				874	.addOperand(SrcCond);
				875	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
				876	.addReg(RI.getSubReg(Src0, AMDGPU::sub1))
				877	.addReg(RI.getSubReg(Src1, AMDGPU::sub1))
				878	.addOperand(SrcCond);
				879	MI->eraseFromParent();
				880	break;
				881	}
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	882
				883	case AMDGPU::SI_CONSTDATA_PTR: {
				884	const SIRegisterInfo *TRI =
				885	static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
				886	MachineFunction &MF = *MBB.getParent();
				887	unsigned Reg = MI->getOperand(0).getReg();
				888	unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
				889	unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
				890
				891	// Create a bundle so these instructions won't be re-ordered by the
				892	// post-RA scheduler.
				893	MIBundleBuilder Bundler(MBB, MI);
				894	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
				895
				896	// Add 32-bit offset from this instruction to the start of the
				897	// constant data.
				898	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
				899	.addReg(RegLo)
				900	.addOperand(MI->getOperand(1)));
				901	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
				902	.addReg(RegHi)
				903	.addImm(0));
				904
				905	llvm::finalizeBundle(MBB, Bundler.begin());
				906
				907	MI->eraseFromParent();
				908	break;
				909	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	910	}
				911	return true;
				912	}
				913
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	914	/// Commutes the operands in the given instruction.
				915	/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
				916	///
				917	/// Do not call this method for a non-commutable instruction or for
				918	/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
				919	/// Even though the instruction is commutable, the method may still
				920	/// fail to commute the operands, null pointer is returned in such cases.
				921	MachineInstr SIInstrInfo::commuteInstructionImpl(MachineInstr MI,
				922	bool NewMI,
				923	unsigned OpIdx0,
				924	unsigned OpIdx1) const {
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	925	int CommutedOpcode = commuteOpcode(*MI);
				926	if (CommutedOpcode == -1)
				927	return nullptr;
				928
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	929	int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				930	AMDGPU::OpName::src0);
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	931	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				932	if (!Src0.isReg())
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	933	return nullptr;
				934
				935	int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				936	AMDGPU::OpName::src1);
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	937
				938	if ((OpIdx0 != static_cast<unsigned>(Src0Idx) \|\|
				939	OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
				940	(OpIdx0 != static_cast<unsigned>(Src1Idx) \|\|
				941	OpIdx1 != static_cast<unsigned>(Src0Idx)))
				942	return nullptr;
				943
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	944	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				945
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	946
Nicolai Haehnle	e2dda4f	2016-04-19 21:58:22 +0000	[diff] [blame^]	947	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	948	const MCInstrDesc &InstrDesc = MI->getDesc();
Nicolai Haehnle	e2dda4f	2016-04-19 21:58:22 +0000	[diff] [blame^]	949	// For VOP2 and VOPC instructions, any operand type is valid to use for
				950	// src0. Make sure we can use the src0 as src1.
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	951	//
				952	// We could be stricter here and only allow commuting if there is a reason
				953	// to do so. i.e. if both operands are VGPRs there is no real benefit,
				954	// although MachineCSE attempts to find matches by commuting.
				955	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				956	if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
				957	return nullptr;
Matt Arsenault	3c34ae2	2015-02-18 02:04:31 +0000	[diff] [blame]	958	}
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	959
				960	if (!Src1.isReg()) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	961	// Allow commuting instructions with Imm operands.
				962	if (NewMI \|\| !Src1.isImm() \|\|
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	963	(!isVOP2(MI) && !isVOP3(MI))) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	964	return nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	965	}
Matt Arsenault	d282ada	2014-10-17 18:00:48 +0000	[diff] [blame]	966	// Be sure to copy the source modifiers to the right place.
				967	if (MachineOperand *Src0Mods
				968	= getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
				969	MachineOperand *Src1Mods
				970	= getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
				971
				972	int Src0ModsVal = Src0Mods->getImm();
				973	if (!Src1Mods && Src0ModsVal != 0)
				974	return nullptr;
				975
				976	// XXX - This assert might be a lie. It might be useful to have a neg
				977	// modifier with 0.0.
				978	int Src1ModsVal = Src1Mods->getImm();
				979	assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
				980
				981	Src1Mods->setImm(Src0ModsVal);
				982	Src0Mods->setImm(Src1ModsVal);
				983	}
				984
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	985	unsigned Reg = Src0.getReg();
				986	unsigned SubReg = Src0.getSubReg();
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	987	if (Src1.isImm())
				988	Src0.ChangeToImmediate(Src1.getImm());
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	989	else
				990	llvm_unreachable("Should only have immediates");
				991
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	992	Src1.ChangeToRegister(Reg, false);
				993	Src1.setSubReg(SubReg);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	994	} else {
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	995	MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	996	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	997
				998	if (MI)
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	999	MI->setDesc(get(CommutedOpcode));
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1000
				1001	return MI;
Christian Konig	76edd4f	2013-02-26 17:52:29 +0000	[diff] [blame]	1002	}
				1003
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1004	// This needs to be implemented because the source modifiers may be inserted
				1005	// between the true commutable operands, and the base
				1006	// TargetInstrInfo::commuteInstruction uses it.
				1007	bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1008	unsigned &SrcOpIdx0,
				1009	unsigned &SrcOpIdx1) const {
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1010	const MCInstrDesc &MCID = MI->getDesc();
				1011	if (!MCID.isCommutable())
				1012	return false;
				1013
				1014	unsigned Opc = MI->getOpcode();
				1015	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1016	if (Src0Idx == -1)
				1017	return false;
				1018
				1019	// FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1020	// immediate. Also, immediate src0 operand is not handled in
				1021	// SIInstrInfo::commuteInstruction();
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1022	if (!MI->getOperand(Src0Idx).isReg())
				1023	return false;
				1024
				1025	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1026	if (Src1Idx == -1)
				1027	return false;
				1028
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1029	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1030	if (Src1.isImm()) {
				1031	// SIInstrInfo::commuteInstruction() does support commuting the immediate
				1032	// operand src1 in 2 and 3 operand instructions.
				1033	if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
				1034	return false;
				1035	} else if (Src1.isReg()) {
				1036	// If any source modifiers are set, the generic instruction commuting won't
				1037	// understand how to copy the source modifiers.
				1038	if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) \|\|
				1039	hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
				1040	return false;
				1041	} else
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1042	return false;
				1043
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1044	return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1045	}
				1046
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1047	static void removeModOperands(MachineInstr &MI) {
				1048	unsigned Opc = MI.getOpcode();
				1049	int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1050	AMDGPU::OpName::src0_modifiers);
				1051	int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1052	AMDGPU::OpName::src1_modifiers);
				1053	int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1054	AMDGPU::OpName::src2_modifiers);
				1055
				1056	MI.RemoveOperand(Src2ModIdx);
				1057	MI.RemoveOperand(Src1ModIdx);
				1058	MI.RemoveOperand(Src0ModIdx);
				1059	}
				1060
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	1061	// TODO: Maybe this should be removed this and custom fold everything in
				1062	// SIFoldOperands?
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1063	bool SIInstrInfo::FoldImmediate(MachineInstr UseMI, MachineInstr DefMI,
				1064	unsigned Reg, MachineRegisterInfo *MRI) const {
				1065	if (!MRI->hasOneNonDBGUse(Reg))
				1066	return false;
				1067
				1068	unsigned Opc = UseMI->getOpcode();
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1069	if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1070	// Don't fold if we are using source modifiers. The new VOP2 instructions
				1071	// don't have them.
				1072	if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) \|\|
				1073	hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) \|\|
				1074	hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
				1075	return false;
				1076	}
				1077
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	1078	const MachineOperand &ImmOp = DefMI->getOperand(1);
				1079
				1080	// If this is a free constant, there's no reason to do this.
				1081	// TODO: We could fold this here instead of letting SIFoldOperands do it
				1082	// later.
				1083	if (isInlineConstant(ImmOp, 4))
				1084	return false;
				1085
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1086	MachineOperand Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
				1087	MachineOperand Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
				1088	MachineOperand Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
				1089
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1090	// Multiplied part is the constant: Use v_madmk_f32
				1091	// We should only expect these to be on src0 due to canonicalizations.
				1092	if (Src0->isReg() && Src0->getReg() == Reg) {
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	1093	if (!Src1->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1094	return false;
				1095
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	1096	if (!Src2->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1097	return false;
				1098
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	1099	// We need to swap operands 0 and 1 since madmk constant is at operand 1.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1100
				1101	const int64_t Imm = DefMI->getOperand(1).getImm();
				1102
				1103	// FIXME: This would be a lot easier if we could return a new instruction
				1104	// instead of having to modify in place.
				1105
				1106	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1107	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1108	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1109	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1110	AMDGPU::OpName::clamp));
				1111
				1112	unsigned Src1Reg = Src1->getReg();
				1113	unsigned Src1SubReg = Src1->getSubReg();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1114	Src0->setReg(Src1Reg);
				1115	Src0->setSubReg(Src1SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	1116	Src0->setIsKill(Src1->isKill());
				1117
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1118	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1119	UseMI->untieRegOperand(
				1120	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1121	}
				1122
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	1123	Src1->ChangeToImmediate(Imm);
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1124
				1125	removeModOperands(*UseMI);
				1126	UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
				1127
				1128	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1129	if (DeleteDef)
				1130	DefMI->eraseFromParent();
				1131
				1132	return true;
				1133	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1134
				1135	// Added part is the constant: Use v_madak_f32
				1136	if (Src2->isReg() && Src2->getReg() == Reg) {
				1137	// Not allowed to use constant bus for another operand.
				1138	// We can however allow an inline immediate as src0.
				1139	if (!Src0->isImm() &&
				1140	(Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
				1141	return false;
				1142
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	1143	if (!Src1->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1144	return false;
				1145
				1146	const int64_t Imm = DefMI->getOperand(1).getImm();
				1147
				1148	// FIXME: This would be a lot easier if we could return a new instruction
				1149	// instead of having to modify in place.
				1150
				1151	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1152	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1153	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1154	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1155	AMDGPU::OpName::clamp));
				1156
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1157	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1158	UseMI->untieRegOperand(
				1159	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1160	}
				1161
				1162	// ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1163	Src2->ChangeToImmediate(Imm);
				1164
				1165	// These come before src2.
				1166	removeModOperands(*UseMI);
				1167	UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
				1168
				1169	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1170	if (DeleteDef)
				1171	DefMI->eraseFromParent();
				1172
				1173	return true;
				1174	}
				1175	}
				1176
				1177	return false;
				1178	}
				1179
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1180	static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
				1181	int WidthB, int OffsetB) {
				1182	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
				1183	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
				1184	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
				1185	return LowOffset + LowWidth <= HighOffset;
				1186	}
				1187
				1188	bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
				1189	MachineInstr *MIb) const {
Chad Rosier	c27a18f	2016-03-09 16:00:35 +0000	[diff] [blame]	1190	unsigned BaseReg0, BaseReg1;
				1191	int64_t Offset0, Offset1;
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1192
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	1193	if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
				1194	getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1195	assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
				1196	"read2 / write2 not expected here yet");
				1197	unsigned Width0 = (*MIa->memoperands_begin())->getSize();
				1198	unsigned Width1 = (*MIb->memoperands_begin())->getSize();
				1199	if (BaseReg0 == BaseReg1 &&
				1200	offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
				1201	return true;
				1202	}
				1203	}
				1204
				1205	return false;
				1206	}
				1207
				1208	bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
				1209	MachineInstr *MIb,
				1210	AliasAnalysis *AA) const {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1211	assert(MIa && (MIa->mayLoad() \|\| MIa->mayStore()) &&
				1212	"MIa must load from or modify a memory location");
				1213	assert(MIb && (MIb->mayLoad() \|\| MIb->mayStore()) &&
				1214	"MIb must load from or modify a memory location");
				1215
				1216	if (MIa->hasUnmodeledSideEffects() \|\| MIb->hasUnmodeledSideEffects())
				1217	return false;
				1218
				1219	// XXX - Can we relax this between address spaces?
				1220	if (MIa->hasOrderedMemoryRef() \|\| MIb->hasOrderedMemoryRef())
				1221	return false;
				1222
				1223	// TODO: Should we check the address space from the MachineMemOperand? That
				1224	// would allow us to distinguish objects we know don't alias based on the
Benjamin Kramer	df005cb	2015-08-08 18:27:36 +0000	[diff] [blame]	1225	// underlying address space, even if it was lowered to a different one,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1226	// e.g. private accesses lowered to use MUBUF instructions on a scratch
				1227	// buffer.
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1228	if (isDS(*MIa)) {
				1229	if (isDS(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1230	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1231
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1232	return !isFLAT(*MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1233	}
				1234
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1235	if (isMUBUF(MIa) \|\| isMTBUF(MIa)) {
				1236	if (isMUBUF(MIb) \|\| isMTBUF(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1237	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1238
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1239	return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1240	}
				1241
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1242	if (isSMRD(*MIa)) {
				1243	if (isSMRD(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1244	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1245
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1246	return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(*MIa);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1247	}
				1248
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1249	if (isFLAT(*MIa)) {
				1250	if (isFLAT(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1251	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1252
				1253	return false;
				1254	}
				1255
				1256	return false;
				1257	}
				1258
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1259	MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
				1260	MachineBasicBlock::iterator &MI,
				1261	LiveVariables *LV) const {
				1262
				1263	switch (MI->getOpcode()) {
				1264	default: return nullptr;
				1265	case AMDGPU::V_MAC_F32_e64: break;
				1266	case AMDGPU::V_MAC_F32_e32: {
				1267	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1268	if (Src0->isImm() && !isInlineConstant(*Src0, 4))
				1269	return nullptr;
				1270	break;
				1271	}
				1272	}
				1273
Tom Stellard	cc4c871	2016-02-16 18:14:56 +0000	[diff] [blame]	1274	const MachineOperand Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1275	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1276	const MachineOperand Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				1277	const MachineOperand Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				1278
				1279	return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
				1280	.addOperand(*Dst)
				1281	.addImm(0) // Src0 mods
				1282	.addOperand(*Src0)
				1283	.addImm(0) // Src1 mods
				1284	.addOperand(*Src1)
				1285	.addImm(0) // Src mods
				1286	.addOperand(*Src2)
				1287	.addImm(0) // clamp
				1288	.addImm(0); // omod
				1289	}
				1290
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	1291	bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
				1292	const MachineBasicBlock *MBB,
				1293	const MachineFunction &MF) const {
				1294	// Target-independent instructions do not have an implicit-use of EXEC, even
				1295	// when they operate on VGPRs. Treating EXEC modifications as scheduling
				1296	// boundaries prevents incorrect movements of such instructions.
				1297	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
				1298	if (MI->modifiesRegister(AMDGPU::EXEC, TRI))
				1299	return true;
				1300
				1301	return AMDGPUInstrInfo::isSchedulingBoundary(MI, MBB, MF);
				1302	}
				1303
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1304	bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1305	int64_t SVal = Imm.getSExtValue();
				1306	if (SVal >= -16 && SVal <= 64)
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1307	return true;
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1308
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1309	if (Imm.getBitWidth() == 64) {
				1310	uint64_t Val = Imm.getZExtValue();
				1311	return (DoubleToBits(0.0) == Val) \|\|
				1312	(DoubleToBits(1.0) == Val) \|\|
				1313	(DoubleToBits(-1.0) == Val) \|\|
				1314	(DoubleToBits(0.5) == Val) \|\|
				1315	(DoubleToBits(-0.5) == Val) \|\|
				1316	(DoubleToBits(2.0) == Val) \|\|
				1317	(DoubleToBits(-2.0) == Val) \|\|
				1318	(DoubleToBits(4.0) == Val) \|\|
				1319	(DoubleToBits(-4.0) == Val);
				1320	}
				1321
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1322	// The actual type of the operand does not seem to matter as long
				1323	// as the bits match one of the inline immediate values. For example:
				1324	//
				1325	// -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
				1326	// so it is a legal inline immediate.
				1327	//
				1328	// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
				1329	// floating-point, so it is a legal inline immediate.
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1330	uint32_t Val = Imm.getZExtValue();
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1331
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1332	return (FloatToBits(0.0f) == Val) \|\|
				1333	(FloatToBits(1.0f) == Val) \|\|
				1334	(FloatToBits(-1.0f) == Val) \|\|
				1335	(FloatToBits(0.5f) == Val) \|\|
				1336	(FloatToBits(-0.5f) == Val) \|\|
				1337	(FloatToBits(2.0f) == Val) \|\|
				1338	(FloatToBits(-2.0f) == Val) \|\|
				1339	(FloatToBits(4.0f) == Val) \|\|
				1340	(FloatToBits(-4.0f) == Val);
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1341	}
				1342
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1343	bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
				1344	unsigned OpSize) const {
				1345	if (MO.isImm()) {
				1346	// MachineOperand provides no way to tell the true operand size, since it
				1347	// only records a 64-bit value. We need to know the size to determine if a
				1348	// 32-bit floating point immediate bit pattern is legal for an integer
				1349	// immediate. It would be for any 32-bit integer operand, but would not be
				1350	// for a 64-bit one.
				1351
				1352	unsigned BitSize = 8 * OpSize;
				1353	return isInlineConstant(APInt(BitSize, MO.getImm(), true));
				1354	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1355
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1356	return false;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1357	}
				1358
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1359	bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
				1360	unsigned OpSize) const {
				1361	return MO.isImm() && !isInlineConstant(MO, OpSize);
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1362	}
				1363
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1364	static bool compareMachineOp(const MachineOperand &Op0,
				1365	const MachineOperand &Op1) {
				1366	if (Op0.getType() != Op1.getType())
				1367	return false;
				1368
				1369	switch (Op0.getType()) {
				1370	case MachineOperand::MO_Register:
				1371	return Op0.getReg() == Op1.getReg();
				1372	case MachineOperand::MO_Immediate:
				1373	return Op0.getImm() == Op1.getImm();
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1374	default:
				1375	llvm_unreachable("Didn't expect to be comparing these operand types");
				1376	}
				1377	}
				1378
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1379	bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
				1380	const MachineOperand &MO) const {
				1381	const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
				1382
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1383	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1384
				1385	if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
				1386	return true;
				1387
				1388	if (OpInfo.RegClass < 0)
				1389	return false;
				1390
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1391	unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
				1392	if (isLiteralConstant(MO, OpSize))
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1393	return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1394
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1395	return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1396	}
				1397
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1398	bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	1399	int Op32 = AMDGPU::getVOPe32(Opcode);
				1400	if (Op32 == -1)
				1401	return false;
				1402
				1403	return pseudoToMCOpcode(Op32) != -1;
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1404	}
				1405
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1406	bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
				1407	// The src0_modifier operand is present on all instructions
				1408	// that have modifiers.
				1409
				1410	return AMDGPU::getNamedOperandIdx(Opcode,
				1411	AMDGPU::OpName::src0_modifiers) != -1;
				1412	}
				1413
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	1414	bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
				1415	unsigned OpName) const {
				1416	const MachineOperand *Mods = getNamedOperand(MI, OpName);
				1417	return Mods && Mods->getImm();
				1418	}
				1419
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1420	bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1421	const MachineOperand &MO,
				1422	unsigned OpSize) const {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1423	// Literal constants use the constant bus.
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1424	if (isLiteralConstant(MO, OpSize))
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1425	return true;
				1426
				1427	if (!MO.isReg() \|\| !MO.isUse())
				1428	return false;
				1429
				1430	if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
				1431	return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
				1432
				1433	// FLAT_SCR is just an SGPR pair.
				1434	if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
				1435	return true;
				1436
				1437	// EXEC register uses the constant bus.
				1438	if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
				1439	return true;
				1440
				1441	// SGPRs use the constant bus
Matt Arsenault	8226fc4	2016-03-02 23:00:21 +0000	[diff] [blame]	1442	return (MO.getReg() == AMDGPU::VCC \|\| MO.getReg() == AMDGPU::M0 \|\|
				1443	(!MO.isImplicit() &&
				1444	(AMDGPU::SGPR_32RegClass.contains(MO.getReg()) \|\|
				1445	AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1446	}
				1447
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1448	static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
				1449	for (const MachineOperand &MO : MI.implicit_operands()) {
				1450	// We only care about reads.
				1451	if (MO.isDef())
				1452	continue;
				1453
				1454	switch (MO.getReg()) {
				1455	case AMDGPU::VCC:
				1456	case AMDGPU::M0:
				1457	case AMDGPU::FLAT_SCR:
				1458	return MO.getReg();
				1459
				1460	default:
				1461	break;
				1462	}
				1463	}
				1464
				1465	return AMDGPU::NoRegister;
				1466	}
				1467
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1468	bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
				1469	StringRef &ErrInfo) const {
				1470	uint16_t Opcode = MI->getOpcode();
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1471	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1472	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
				1473	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
				1474	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
				1475
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1476	// Make sure we don't have SCC live-ins to basic blocks. moveToVALU assumes
				1477	// all SCC users are in the same blocks as their defs.
				1478	const MachineBasicBlock *MBB = MI->getParent();
				1479	if (MI == &MBB->front()) {
				1480	if (MBB->isLiveIn(AMDGPU::SCC)) {
				1481	ErrInfo = "scc register cannot be live across blocks.";
				1482	return false;
				1483	}
				1484	}
				1485
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1486	// Make sure the number of operands is correct.
				1487	const MCInstrDesc &Desc = get(Opcode);
				1488	if (!Desc.isVariadic() &&
				1489	Desc.getNumOperands() != MI->getNumExplicitOperands()) {
				1490	ErrInfo = "Instruction has wrong number of operands.";
				1491	return false;
				1492	}
				1493
Changpeng Fang	c996393	2015-12-18 20:04:28 +0000	[diff] [blame]	1494	// Make sure the register classes are correct.
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1495	for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1496	if (MI->getOperand(i).isFPImm()) {
				1497	ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
				1498	"all fp values to integers.";
				1499	return false;
				1500	}
				1501
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1502	int RegClass = Desc.OpInfo[i].RegClass;
				1503
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1504	switch (Desc.OpInfo[i].OperandType) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1505	case MCOI::OPERAND_REGISTER:
Matt Arsenault	63bef0d	2015-02-13 02:47:22 +0000	[diff] [blame]	1506	if (MI->getOperand(i).isImm()) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1507	ErrInfo = "Illegal immediate value for operand.";
				1508	return false;
				1509	}
				1510	break;
				1511	case AMDGPU::OPERAND_REG_IMM32:
				1512	break;
				1513	case AMDGPU::OPERAND_REG_INLINE_C:
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1514	if (isLiteralConstant(MI->getOperand(i),
				1515	RI.getRegClass(RegClass)->getSize())) {
				1516	ErrInfo = "Illegal immediate value for operand.";
				1517	return false;
Tom Stellard	a305f93	2014-07-02 20:53:44 +0000	[diff] [blame]	1518	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1519	break;
				1520	case MCOI::OPERAND_IMMEDIATE:
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1521	// Check if this operand is an immediate.
				1522	// FrameIndex operands will be replaced by immediates, so they are
				1523	// allowed.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1524	if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1525	ErrInfo = "Expected immediate, but got non-immediate";
				1526	return false;
				1527	}
				1528	// Fall-through
				1529	default:
				1530	continue;
				1531	}
				1532
				1533	if (!MI->getOperand(i).isReg())
				1534	continue;
				1535
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1536	if (RegClass != -1) {
				1537	unsigned Reg = MI->getOperand(i).getReg();
				1538	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1539	continue;
				1540
				1541	const TargetRegisterClass *RC = RI.getRegClass(RegClass);
				1542	if (!RC->contains(Reg)) {
				1543	ErrInfo = "Operand has incorrect register class.";
				1544	return false;
				1545	}
				1546	}
				1547	}
				1548
				1549
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1550	// Verify VOP*
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1551	if (isVOP1(MI) \|\| isVOP2(MI) \|\| isVOP3(MI) \|\| isVOPC(MI)) {
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1552	// Only look at the true operands. Only a real operand can use the constant
				1553	// bus, and we don't want to check pseudo-operands like the source modifier
				1554	// flags.
				1555	const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
				1556
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1557	unsigned ConstantBusCount = 0;
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1558	unsigned SGPRUsed = findImplicitSGPRRead(*MI);
				1559	if (SGPRUsed != AMDGPU::NoRegister)
				1560	++ConstantBusCount;
				1561
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1562	for (int OpIdx : OpIndices) {
				1563	if (OpIdx == -1)
				1564	break;
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1565	const MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1566	if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1567	if (MO.isReg()) {
				1568	if (MO.getReg() != SGPRUsed)
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1569	++ConstantBusCount;
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1570	SGPRUsed = MO.getReg();
				1571	} else {
				1572	++ConstantBusCount;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1573	}
				1574	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1575	}
				1576	if (ConstantBusCount > 1) {
				1577	ErrInfo = "VOP* instruction uses the constant bus more than once";
				1578	return false;
				1579	}
				1580	}
				1581
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1582	// Verify misc. restrictions on specific instructions.
				1583	if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 \|\|
				1584	Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Matt Arsenault	262407b	2014-09-24 02:17:09 +0000	[diff] [blame]	1585	const MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1586	const MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1587	const MachineOperand &Src2 = MI->getOperand(Src2Idx);
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1588	if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
				1589	if (!compareMachineOp(Src0, Src1) &&
				1590	!compareMachineOp(Src0, Src2)) {
				1591	ErrInfo = "v_div_scale_{f32\|f64} require src0 = src1 or src2";
				1592	return false;
				1593	}
				1594	}
				1595	}
				1596
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	1597	// Make sure we aren't losing exec uses in the td files. This mostly requires
				1598	// being careful when using let Uses to try to add other use registers.
				1599	if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
				1600	const MachineOperand *Exec = MI->findRegisterUseOperand(AMDGPU::EXEC);
				1601	if (!Exec \|\| !Exec->isImplicit()) {
				1602	ErrInfo = "VALU instruction does not implicitly read exec mask";
				1603	return false;
				1604	}
				1605	}
				1606
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1607	return true;
				1608	}
				1609
Matt Arsenault	f14032a	2013-11-15 22:02:28 +0000	[diff] [blame]	1610	unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1611	switch (MI.getOpcode()) {
				1612	default: return AMDGPU::INSTRUCTION_LIST_END;
				1613	case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
				1614	case AMDGPU::COPY: return AMDGPU::COPY;
				1615	case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard	204e61b	2014-04-07 19:45:45 +0000	[diff] [blame]	1616	case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	1617	case AMDGPU::S_MOV_B32:
				1618	return MI.getOperand(1).isReg() ?
Tom Stellard	8c12fd9	2014-03-24 16:12:34 +0000	[diff] [blame]	1619	AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1620	case AMDGPU::S_ADD_I32:
				1621	case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1622	case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1623	case AMDGPU::S_SUB_I32:
				1624	case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1625	case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault	869cd07	2014-09-03 23:24:35 +0000	[diff] [blame]	1626	case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Matt Arsenault	8e2581b	2014-03-21 18:01:18 +0000	[diff] [blame]	1627	case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
				1628	case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
				1629	case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
				1630	case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
				1631	case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
				1632	case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
				1633	case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1634	case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
				1635	case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
				1636	case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
				1637	case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
				1638	case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
				1639	case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	1640	case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
				1641	case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	1642	case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
				1643	case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak	63a7b08	2015-03-24 13:40:21 +0000	[diff] [blame]	1644	case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault	43160e7	2014-06-18 17:13:57 +0000	[diff] [blame]	1645	case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault	2c33562	2014-04-09 07:16:16 +0000	[diff] [blame]	1646	case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	1647	case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	0cb92e1	2014-04-11 19:25:18 +0000	[diff] [blame]	1648	case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
				1649	case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
				1650	case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
				1651	case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
				1652	case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
				1653	case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1654	case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
				1655	case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
				1656	case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
				1657	case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
				1658	case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
				1659	case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	1660	case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault	295b86e	2014-06-17 17:36:27 +0000	[diff] [blame]	1661	case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault	8579601	2014-06-17 17:36:24 +0000	[diff] [blame]	1662	case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsak	d2af89d	2015-03-04 17:33:45 +0000	[diff] [blame]	1663	case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1664	case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
				1665	case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1666	}
				1667	}
				1668
				1669	bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
				1670	return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
				1671	}
				1672
				1673	const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
				1674	unsigned OpNo) const {
				1675	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				1676	const MCInstrDesc &Desc = get(MI.getOpcode());
				1677	if (MI.isVariadic() \|\| OpNo >= Desc.getNumOperands() \|\|
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1678	Desc.OpInfo[OpNo].RegClass == -1) {
				1679	unsigned Reg = MI.getOperand(OpNo).getReg();
				1680
				1681	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1682	return MRI.getRegClass(Reg);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1683	return RI.getPhysRegClass(Reg);
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1684	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1685
				1686	unsigned RCID = Desc.OpInfo[OpNo].RegClass;
				1687	return RI.getRegClass(RCID);
				1688	}
				1689
				1690	bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
				1691	switch (MI.getOpcode()) {
				1692	case AMDGPU::COPY:
				1693	case AMDGPU::REG_SEQUENCE:
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	1694	case AMDGPU::PHI:
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	1695	case AMDGPU::INSERT_SUBREG:
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1696	return RI.hasVGPRs(getOpRegClass(MI, 0));
				1697	default:
				1698	return RI.hasVGPRs(getOpRegClass(MI, OpNo));
				1699	}
				1700	}
				1701
				1702	void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
				1703	MachineBasicBlock::iterator I = MI;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1704	MachineBasicBlock *MBB = MI->getParent();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1705	MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1706	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1707	unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
				1708	const TargetRegisterClass *RC = RI.getRegClass(RCID);
				1709	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1710	if (MO.isReg())
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1711	Opcode = AMDGPU::COPY;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1712	else if (RI.isSGPRClass(RC))
Matt Arsenault	671a005	2013-11-14 10:08:50 +0000	[diff] [blame]	1713	Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1714
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1715
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1716	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1717	if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard	0c93c9e	2014-09-05 14:08:01 +0000	[diff] [blame]	1718	VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1719	else
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	1720	VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1721
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1722	unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1723	DebugLoc DL = MBB->findDebugLoc(I);
				1724	BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
				1725	.addOperand(MO);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1726	MO.ChangeToRegister(Reg, false);
				1727	}
				1728
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1729	unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
				1730	MachineRegisterInfo &MRI,
				1731	MachineOperand &SuperReg,
				1732	const TargetRegisterClass *SuperRC,
				1733	unsigned SubIdx,
				1734	const TargetRegisterClass *SubRC)
				1735	const {
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1736	MachineBasicBlock *MBB = MI->getParent();
				1737	DebugLoc DL = MI->getDebugLoc();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1738	unsigned SubReg = MRI.createVirtualRegister(SubRC);
				1739
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1740	if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
				1741	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1742	.addReg(SuperReg.getReg(), 0, SubIdx);
				1743	return SubReg;
				1744	}
				1745
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1746	// Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault	08d8494	2014-06-03 23:06:13 +0000	[diff] [blame]	1747	// value so we don't need to worry about merging its subreg index with the
				1748	// SubIdx passed to this function. The register coalescer should be able to
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1749	// eliminate this extra copy.
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1750	unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1751
Matt Arsenault	7480a0e	2014-11-17 21:11:37 +0000	[diff] [blame]	1752	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
				1753	.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
				1754
				1755	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1756	.addReg(NewSuperReg, 0, SubIdx);
				1757
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1758	return SubReg;
				1759	}
				1760
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	1761	MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
				1762	MachineBasicBlock::iterator MII,
				1763	MachineRegisterInfo &MRI,
				1764	MachineOperand &Op,
				1765	const TargetRegisterClass *SuperRC,
				1766	unsigned SubIdx,
				1767	const TargetRegisterClass *SubRC) const {
				1768	if (Op.isImm()) {
				1769	// XXX - Is there a better way to do this?
				1770	if (SubIdx == AMDGPU::sub0)
				1771	return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
				1772	if (SubIdx == AMDGPU::sub1)
				1773	return MachineOperand::CreateImm(Op.getImm() >> 32);
				1774
				1775	llvm_unreachable("Unhandled register index for immediate");
				1776	}
				1777
				1778	unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
				1779	SubIdx, SubRC);
				1780	return MachineOperand::CreateReg(SubReg, false);
				1781	}
				1782
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	1783	// Change the order of operands from (0, 1, 2) to (0, 2, 1)
				1784	void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
				1785	assert(Inst->getNumExplicitOperands() == 3);
				1786	MachineOperand Op1 = Inst->getOperand(1);
				1787	Inst->RemoveOperand(1);
				1788	Inst->addOperand(Op1);
				1789	}
				1790
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1791	bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
				1792	const MCOperandInfo &OpInfo,
				1793	const MachineOperand &MO) const {
				1794	if (!MO.isReg())
				1795	return false;
				1796
				1797	unsigned Reg = MO.getReg();
				1798	const TargetRegisterClass *RC =
				1799	TargetRegisterInfo::isVirtualRegister(Reg) ?
				1800	MRI.getRegClass(Reg) :
				1801	RI.getPhysRegClass(Reg);
				1802
Nicolai Haehnle	82fc962	2016-01-07 17:10:29 +0000	[diff] [blame]	1803	const SIRegisterInfo *TRI =
				1804	static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
				1805	RC = TRI->getSubRegClass(RC, MO.getSubReg());
				1806
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1807	// In order to be legal, the common sub-class must be equal to the
				1808	// class of the current operand. For example:
				1809	//
				1810	// v_mov_b32 s0 ; Operand defined as vsrc_32
				1811	// ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
				1812	//
				1813	// s_sendmsg 0, s0 ; Operand defined as m0reg
				1814	// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
				1815
				1816	return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
				1817	}
				1818
				1819	bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
				1820	const MCOperandInfo &OpInfo,
				1821	const MachineOperand &MO) const {
				1822	if (MO.isReg())
				1823	return isLegalRegOperand(MRI, OpInfo, MO);
				1824
				1825	// Handle non-register types that are treated like immediates.
				1826	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
				1827	return true;
				1828	}
				1829
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1830	bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
				1831	const MachineOperand *MO) const {
				1832	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1833	const MCInstrDesc &InstDesc = MI->getDesc();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1834	const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
				1835	const TargetRegisterClass *DefinedRC =
				1836	OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
				1837	if (!MO)
				1838	MO = &MI->getOperand(OpIdx);
				1839
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1840	if (isVALU(*MI) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1841	usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1842
				1843	RegSubRegPair SGPRUsed;
				1844	if (MO->isReg())
				1845	SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
				1846
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1847	for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
				1848	if (i == OpIdx)
				1849	continue;
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1850	const MachineOperand &Op = MI->getOperand(i);
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1851	if (Op.isReg() &&
				1852	(Op.getReg() != SGPRUsed.Reg \|\| Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1853	usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1854	return false;
				1855	}
				1856	}
				1857	}
				1858
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1859	if (MO->isReg()) {
				1860	assert(DefinedRC);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1861	return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1862	}
				1863
				1864
				1865	// Handle non-register types that are treated like immediates.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1866	assert(MO->isImm() \|\| MO->isTargetIndex() \|\| MO->isFI());
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1867
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	1868	if (!DefinedRC) {
				1869	// This operand expects an immediate.
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1870	return true;
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	1871	}
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1872
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1873	return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1874	}
				1875
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1876	void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
				1877	MachineInstr *MI) const {
				1878	unsigned Opc = MI->getOpcode();
				1879	const MCInstrDesc &InstrDesc = get(Opc);
				1880
				1881	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1882	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1883
				1884	// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
				1885	// we need to only have one constant bus use.
				1886	//
				1887	// Note we do not need to worry about literal constants here. They are
				1888	// disabled for the operand type for instructions because they will always
				1889	// violate the one constant bus use rule.
				1890	bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
				1891	if (HasImplicitSGPR) {
				1892	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1893	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1894
				1895	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
				1896	legalizeOpWithMove(MI, Src0Idx);
				1897	}
				1898
				1899	// VOP2 src0 instructions support all operand types, so we don't need to check
				1900	// their legality. If src1 is already legal, we don't need to do anything.
				1901	if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
				1902	return;
				1903
				1904	// We do not use commuteInstruction here because it is too aggressive and will
				1905	// commute if it is possible. We only want to commute here if it improves
				1906	// legality. This can be called a fairly large number of times so don't waste
				1907	// compile time pointlessly swapping and checking legality again.
				1908	if (HasImplicitSGPR \|\| !MI->isCommutable()) {
				1909	legalizeOpWithMove(MI, Src1Idx);
				1910	return;
				1911	}
				1912
				1913	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1914	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1915
				1916	// If src0 can be used as src1, commuting will make the operands legal.
				1917	// Otherwise we have to give up and insert a move.
				1918	//
				1919	// TODO: Other immediate-like operand kinds could be commuted if there was a
				1920	// MachineOperand::ChangeTo* for them.
				1921	if ((!Src1.isImm() && !Src1.isReg()) \|\|
				1922	!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
				1923	legalizeOpWithMove(MI, Src1Idx);
				1924	return;
				1925	}
				1926
				1927	int CommutedOpc = commuteOpcode(*MI);
				1928	if (CommutedOpc == -1) {
				1929	legalizeOpWithMove(MI, Src1Idx);
				1930	return;
				1931	}
				1932
				1933	MI->setDesc(get(CommutedOpc));
				1934
				1935	unsigned Src0Reg = Src0.getReg();
				1936	unsigned Src0SubReg = Src0.getSubReg();
				1937	bool Src0Kill = Src0.isKill();
				1938
				1939	if (Src1.isImm())
				1940	Src0.ChangeToImmediate(Src1.getImm());
				1941	else if (Src1.isReg()) {
				1942	Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
				1943	Src0.setSubReg(Src1.getSubReg());
				1944	} else
				1945	llvm_unreachable("Should only have register or immediate operands");
				1946
				1947	Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
				1948	Src1.setSubReg(Src0SubReg);
				1949	}
				1950
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	1951	// Legalize VOP3 operands. Because all operand types are supported for any
				1952	// operand, and since literal constants are not allowed and should never be
				1953	// seen, we only need to worry about inserting copies if we use multiple SGPR
				1954	// operands.
				1955	void SIInstrInfo::legalizeOperandsVOP3(
				1956	MachineRegisterInfo &MRI,
				1957	MachineInstr *MI) const {
				1958	unsigned Opc = MI->getOpcode();
				1959
				1960	int VOP3Idx[3] = {
				1961	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
				1962	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
				1963	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
				1964	};
				1965
				1966	// Find the one SGPR operand we are allowed to use.
				1967	unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
				1968
				1969	for (unsigned i = 0; i < 3; ++i) {
				1970	int Idx = VOP3Idx[i];
				1971	if (Idx == -1)
				1972	break;
				1973	MachineOperand &MO = MI->getOperand(Idx);
				1974
				1975	// We should never see a VOP3 instruction with an illegal immediate operand.
				1976	if (!MO.isReg())
				1977	continue;
				1978
				1979	if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
				1980	continue; // VGPRs are legal
				1981
				1982	if (SGPRReg == AMDGPU::NoRegister \|\| SGPRReg == MO.getReg()) {
				1983	SGPRReg = MO.getReg();
				1984	// We can use one SGPR in each VOP3 instruction.
				1985	continue;
				1986	}
				1987
				1988	// If we make it this far, then the operand is not legal and we must
				1989	// legalize it.
				1990	legalizeOpWithMove(MI, Idx);
				1991	}
				1992	}
				1993
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	1994	unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
				1995	MachineRegisterInfo &MRI) const {
				1996	const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
				1997	const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
				1998	unsigned DstReg = MRI.createVirtualRegister(SRC);
				1999	unsigned SubRegs = VRC->getSize() / 4;
				2000
				2001	SmallVector<unsigned, 8> SRegs;
				2002	for (unsigned i = 0; i < SubRegs; ++i) {
				2003	unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2004	BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
				2005	get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
				2006	.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
				2007	SRegs.push_back(SGPR);
				2008	}
				2009
				2010	MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI,
				2011	UseMI->getDebugLoc(),
				2012	get(AMDGPU::REG_SEQUENCE), DstReg);
				2013	for (unsigned i = 0; i < SubRegs; ++i) {
				2014	MIB.addReg(SRegs[i]);
				2015	MIB.addImm(RI.getSubRegFromChannel(i));
				2016	}
				2017	return DstReg;
				2018	}
				2019
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	2020	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
				2021	MachineInstr *MI) const {
				2022
				2023	// If the pointer is store in VGPRs, then we need to move them to
				2024	// SGPRs using v_readfirstlane. This is safe because we only select
				2025	// loads with uniform pointers to SMRD instruction so we know the
				2026	// pointer value is uniform.
				2027	MachineOperand SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
				2028	if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
				2029	unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
				2030	SBase->setReg(SGPR);
				2031	}
				2032	}
				2033
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2034	void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
				2035	MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2036
				2037	// Legalize VOP2
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2038	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	2039	legalizeOperandsVOP2(MRI, MI);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2040	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2041	}
				2042
				2043	// Legalize VOP3
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	2044	if (isVOP3(*MI)) {
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	2045	legalizeOperandsVOP3(MRI, MI);
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2046	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2047	}
				2048
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	2049	// Legalize SMRD
				2050	if (isSMRD(*MI)) {
				2051	legalizeOperandsSMRD(MRI, MI);
				2052	return;
				2053	}
				2054
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2055	// Legalize REG_SEQUENCE and PHI
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2056	// The register class of the operands much be the same type as the register
				2057	// class of the output.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2058	if (MI->getOpcode() == AMDGPU::PHI) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	2059	const TargetRegisterClass RC = nullptr, SRC = nullptr, *VRC = nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2060	for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
				2061	if (!MI->getOperand(i).isReg() \|\|
				2062	!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
				2063	continue;
				2064	const TargetRegisterClass *OpRC =
				2065	MRI.getRegClass(MI->getOperand(i).getReg());
				2066	if (RI.hasVGPRs(OpRC)) {
				2067	VRC = OpRC;
				2068	} else {
				2069	SRC = OpRC;
				2070	}
				2071	}
				2072
				2073	// If any of the operands are VGPR registers, then they all most be
				2074	// otherwise we will create illegal VGPR->SGPR copies when legalizing
				2075	// them.
				2076	if (VRC \|\| !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
				2077	if (!VRC) {
				2078	assert(SRC);
				2079	VRC = RI.getEquivalentVGPRClass(SRC);
				2080	}
				2081	RC = VRC;
				2082	} else {
				2083	RC = SRC;
				2084	}
				2085
				2086	// Update all the operands so they have the same type.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2087	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2088	MachineOperand &Op = MI->getOperand(I);
				2089	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2090	continue;
				2091	unsigned DstReg = MRI.createVirtualRegister(RC);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2092
				2093	// MI is a PHI instruction.
				2094	MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
				2095	MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
				2096
				2097	BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2098	.addOperand(Op);
				2099	Op.setReg(DstReg);
				2100	}
				2101	}
				2102
				2103	// REG_SEQUENCE doesn't really require operand legalization, but if one has a
				2104	// VGPR dest type and SGPR sources, insert copies so all operands are
				2105	// VGPRs. This seems to help operand folding / the register coalescer.
				2106	if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
				2107	MachineBasicBlock *MBB = MI->getParent();
				2108	const TargetRegisterClass DstRC = getOpRegClass(MI, 0);
				2109	if (RI.hasVGPRs(DstRC)) {
				2110	// Update all the operands so they are VGPR register classes. These may
				2111	// not be the same register class because REG_SEQUENCE supports mixing
				2112	// subregister index types e.g. sub0_sub1 + sub2 + sub3
				2113	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2114	MachineOperand &Op = MI->getOperand(I);
				2115	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
				2116	continue;
				2117
				2118	const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
				2119	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
				2120	if (VRC == OpRC)
				2121	continue;
				2122
				2123	unsigned DstReg = MRI.createVirtualRegister(VRC);
				2124
				2125	BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2126	.addOperand(Op);
				2127
				2128	Op.setReg(DstReg);
				2129	Op.setIsKill();
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2130	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2131	}
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2132
				2133	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2134	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2135
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	2136	// Legalize INSERT_SUBREG
				2137	// src0 must have the same register class as dst
				2138	if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
				2139	unsigned Dst = MI->getOperand(0).getReg();
				2140	unsigned Src0 = MI->getOperand(1).getReg();
				2141	const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
				2142	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
				2143	if (DstRC != Src0RC) {
				2144	MachineBasicBlock &MBB = *MI->getParent();
				2145	unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
				2146	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
				2147	.addReg(Src0);
				2148	MI->getOperand(1).setReg(NewSrc0);
				2149	}
				2150	return;
				2151	}
				2152
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	2153	// Legalize MIMG
				2154	if (isMIMG(*MI)) {
				2155	MachineOperand SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
				2156	if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
				2157	unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
				2158	SRsrc->setReg(SGPR);
				2159	}
				2160
				2161	MachineOperand SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
				2162	if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
				2163	unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
				2164	SSamp->setReg(SGPR);
				2165	}
				2166	return;
				2167	}
				2168
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2169	// Legalize MUBUF* instructions
				2170	// FIXME: If we start using the non-addr64 instructions for compute, we
				2171	// may need to legalize them here.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2172	int SRsrcIdx =
				2173	AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
				2174	if (SRsrcIdx != -1) {
				2175	// We have an MUBUF instruction
				2176	MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
				2177	unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
				2178	if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
				2179	RI.getRegClass(SRsrcRC))) {
				2180	// The operands are legal.
				2181	// FIXME: We may need to legalize operands besided srsrc.
				2182	return;
				2183	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2184
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2185	MachineBasicBlock &MBB = *MI->getParent();
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2186
Eric Christopher	572e03a	2015-06-19 01:53:21 +0000	[diff] [blame]	2187	// Extract the ptr from the resource descriptor.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2188	unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
				2189	&AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2190
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2191	// Create an empty resource descriptor
				2192	unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				2193	unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2194	unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2195	unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2196	uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2197
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2198	// Zero64 = 0
				2199	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
				2200	Zero64)
				2201	.addImm(0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2202
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2203	// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
				2204	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2205	SRsrcFormatLo)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2206	.addImm(RsrcDataFormat & 0xFFFFFFFF);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2207
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2208	// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
				2209	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2210	SRsrcFormatHi)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2211	.addImm(RsrcDataFormat >> 32);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2212
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2213	// NewSRsrc = {Zero64, SRsrcFormat}
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2214	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
				2215	.addReg(Zero64)
				2216	.addImm(AMDGPU::sub0_sub1)
				2217	.addReg(SRsrcFormatLo)
				2218	.addImm(AMDGPU::sub2)
				2219	.addReg(SRsrcFormatHi)
				2220	.addImm(AMDGPU::sub3);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2221
				2222	MachineOperand VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
				2223	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2224	if (VAddr) {
				2225	// This is already an ADDR64 instruction so we need to add the pointer
				2226	// extracted from the resource descriptor to the current value of VAddr.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2227	unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2228	unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2229
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2230	// NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2231	DebugLoc DL = MI->getDebugLoc();
				2232	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2233	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2234	.addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2235
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2236	// NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2237	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2238	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2239	.addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2240
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2241	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2242	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2243	.addReg(NewVAddrLo)
				2244	.addImm(AMDGPU::sub0)
				2245	.addReg(NewVAddrHi)
				2246	.addImm(AMDGPU::sub1);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2247	} else {
				2248	// This instructions is the _OFFSET variant, so we need to convert it to
				2249	// ADDR64.
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2250	assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
				2251	< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
				2252	"FIXME: Need to emit flat atomics here");
				2253
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2254	MachineOperand VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
				2255	MachineOperand Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				2256	MachineOperand SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2257	unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2258
				2259	// Atomics rith return have have an additional tied operand and are
				2260	// missing some of the special bits.
				2261	MachineOperand VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
				2262	MachineInstr *Addr64;
				2263
				2264	if (!VDataIn) {
				2265	// Regular buffer load / store.
				2266	MachineInstrBuilder MIB
				2267	= BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2268	.addOperand(*VData)
				2269	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2270	// This will be replaced later
				2271	// with the new value of vaddr.
				2272	.addOperand(*SRsrc)
				2273	.addOperand(*SOffset)
				2274	.addOperand(*Offset);
				2275
				2276	// Atomics do not have this operand.
				2277	if (const MachineOperand *GLC
				2278	= getNamedOperand(*MI, AMDGPU::OpName::glc)) {
				2279	MIB.addImm(GLC->getImm());
				2280	}
				2281
				2282	MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
				2283
				2284	if (const MachineOperand *TFE
				2285	= getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
				2286	MIB.addImm(TFE->getImm());
				2287	}
				2288
				2289	MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2290	Addr64 = MIB;
				2291	} else {
				2292	// Atomics with return.
				2293	Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2294	.addOperand(*VData)
				2295	.addOperand(*VDataIn)
				2296	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2297	// This will be replaced later
				2298	// with the new value of vaddr.
				2299	.addOperand(*SRsrc)
				2300	.addOperand(*SOffset)
				2301	.addOperand(*Offset)
				2302	.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
				2303	.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2304	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2305
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2306	MI->removeFromParent();
				2307	MI = Addr64;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2308
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2309	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2310	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2311	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
				2312	.addImm(AMDGPU::sub0)
				2313	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
				2314	.addImm(AMDGPU::sub1);
				2315
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2316	VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
				2317	SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2318	}
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2319
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2320	// Update the instruction to use NewVaddr
				2321	VAddr->setReg(NewVAddr);
				2322	// Update the instruction to use NewSRsrc
				2323	SRsrc->setReg(NewSRsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2324	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2325	}
				2326
				2327	void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
				2328	SmallVector<MachineInstr *, 128> Worklist;
				2329	Worklist.push_back(&TopInst);
				2330
				2331	while (!Worklist.empty()) {
				2332	MachineInstr *Inst = Worklist.pop_back_val();
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2333	MachineBasicBlock *MBB = Inst->getParent();
				2334	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				2335
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2336	unsigned Opcode = Inst->getOpcode();
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2337	unsigned NewOpcode = getVALUOp(*Inst);
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2338
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2339	// Handle some special cases
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2340	switch (Opcode) {
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2341	default:
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2342	break;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2343	case AMDGPU::S_AND_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2344	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2345	Inst->eraseFromParent();
				2346	continue;
				2347
				2348	case AMDGPU::S_OR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2349	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2350	Inst->eraseFromParent();
				2351	continue;
				2352
				2353	case AMDGPU::S_XOR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2354	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2355	Inst->eraseFromParent();
				2356	continue;
				2357
				2358	case AMDGPU::S_NOT_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2359	splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2360	Inst->eraseFromParent();
				2361	continue;
				2362
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2363	case AMDGPU::S_BCNT1_I32_B64:
				2364	splitScalar64BitBCNT(Worklist, Inst);
				2365	Inst->eraseFromParent();
				2366	continue;
				2367
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2368	case AMDGPU::S_BFE_I64: {
				2369	splitScalar64BitBFE(Worklist, Inst);
				2370	Inst->eraseFromParent();
				2371	continue;
				2372	}
				2373
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2374	case AMDGPU::S_LSHL_B32:
				2375	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2376	NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
				2377	swapOperands(Inst);
				2378	}
				2379	break;
				2380	case AMDGPU::S_ASHR_I32:
				2381	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2382	NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
				2383	swapOperands(Inst);
				2384	}
				2385	break;
				2386	case AMDGPU::S_LSHR_B32:
				2387	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2388	NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
				2389	swapOperands(Inst);
				2390	}
				2391	break;
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	2392	case AMDGPU::S_LSHL_B64:
				2393	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2394	NewOpcode = AMDGPU::V_LSHLREV_B64;
				2395	swapOperands(Inst);
				2396	}
				2397	break;
				2398	case AMDGPU::S_ASHR_I64:
				2399	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2400	NewOpcode = AMDGPU::V_ASHRREV_I64;
				2401	swapOperands(Inst);
				2402	}
				2403	break;
				2404	case AMDGPU::S_LSHR_B64:
				2405	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2406	NewOpcode = AMDGPU::V_LSHRREV_B64;
				2407	swapOperands(Inst);
				2408	}
				2409	break;
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2410
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2411	case AMDGPU::S_ABS_I32:
				2412	lowerScalarAbs(Worklist, Inst);
				2413	Inst->eraseFromParent();
				2414	continue;
				2415
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2416	case AMDGPU::S_CBRANCH_SCC0:
				2417	case AMDGPU::S_CBRANCH_SCC1:
				2418	// Clear unused bits of vcc
				2419	BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC)
				2420	.addReg(AMDGPU::EXEC)
				2421	.addReg(AMDGPU::VCC);
				2422	break;
				2423
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2424	case AMDGPU::S_BFE_U64:
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2425	case AMDGPU::S_BFM_B64:
				2426	llvm_unreachable("Moving this op to VALU not implemented");
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2427	}
				2428
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2429	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
				2430	// We cannot move this instruction to the VALU, so we should try to
				2431	// legalize its operands instead.
				2432	legalizeOperands(Inst);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2433	continue;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2434	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2435
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2436	// Use the new VALU Opcode.
				2437	const MCInstrDesc &NewDesc = get(NewOpcode);
				2438	Inst->setDesc(NewDesc);
				2439
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2440	// Remove any references to SCC. Vector instructions can't read from it, and
				2441	// We're just about to add the implicit use / defs of VCC, and we don't want
				2442	// both.
				2443	for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
				2444	MachineOperand &Op = Inst->getOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2445	if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2446	Inst->RemoveOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2447	addSCCDefUsersToVALUWorklist(Inst, Worklist);
				2448	}
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2449	}
				2450
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2451	if (Opcode == AMDGPU::S_SEXT_I32_I8 \|\| Opcode == AMDGPU::S_SEXT_I32_I16) {
				2452	// We are converting these to a BFE, so we need to add the missing
				2453	// operands for the size and offset.
				2454	unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
				2455	Inst->addOperand(MachineOperand::CreateImm(0));
				2456	Inst->addOperand(MachineOperand::CreateImm(Size));
				2457
Matt Arsenault	b5b5110	2014-06-10 19:18:21 +0000	[diff] [blame]	2458	} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
				2459	// The VALU version adds the second operand to the result, so insert an
				2460	// extra 0 operand.
				2461	Inst->addOperand(MachineOperand::CreateImm(0));
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2462	}
				2463
Alex Lorenz	b4d0d6a	2015-07-31 23:30:09 +0000	[diff] [blame]	2464	Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2465
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2466	if (Opcode == AMDGPU::S_BFE_I32 \|\| Opcode == AMDGPU::S_BFE_U32) {
				2467	const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
				2468	// If we need to move this to VGPRs, we need to unpack the second operand
				2469	// back into the 2 separate ones for bit offset and width.
				2470	assert(OffsetWidthOp.isImm() &&
				2471	"Scalar BFE is only implemented for constant width and offset");
				2472	uint32_t Imm = OffsetWidthOp.getImm();
				2473
				2474	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2475	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2476	Inst->RemoveOperand(2); // Remove old immediate.
				2477	Inst->addOperand(MachineOperand::CreateImm(Offset));
Vincent Lejeune	94af31f	2014-05-10 19:18:33 +0000	[diff] [blame]	2478	Inst->addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2479	}
				2480
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2481	bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef();
				2482	unsigned NewDstReg = AMDGPU::NoRegister;
				2483	if (HasDst) {
				2484	// Update the destination register class.
				2485	const TargetRegisterClass NewDstRC = getDestEquivalentVGPRClass(Inst);
				2486	if (!NewDstRC)
				2487	continue;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2488
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2489	unsigned DstReg = Inst->getOperand(0).getReg();
				2490	NewDstReg = MRI.createVirtualRegister(NewDstRC);
				2491	MRI.replaceRegWith(DstReg, NewDstReg);
				2492	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2493
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	2494	// Legalize the operands
				2495	legalizeOperands(Inst);
				2496
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2497	if (HasDst)
				2498	addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2499	}
				2500	}
				2501
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2502	//===----------------------------------------------------------------------===//
				2503	// Indirect addressing callbacks
				2504	//===----------------------------------------------------------------------===//
				2505
Tom Stellard	26a3b67	2013-10-22 18:19:10 +0000	[diff] [blame]	2506	const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	2507	return &AMDGPU::VGPR_32RegClass;
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2508	}
				2509
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2510	void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
				2511	MachineInstr *Inst) const {
				2512	MachineBasicBlock &MBB = *Inst->getParent();
				2513	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2514	MachineBasicBlock::iterator MII = Inst;
				2515	DebugLoc DL = Inst->getDebugLoc();
				2516
				2517	MachineOperand &Dest = Inst->getOperand(0);
				2518	MachineOperand &Src = Inst->getOperand(1);
				2519	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2520	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2521
				2522	BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
				2523	.addImm(0)
				2524	.addReg(Src.getReg());
				2525
				2526	BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
				2527	.addReg(Src.getReg())
				2528	.addReg(TmpReg);
				2529
				2530	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2531	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				2532	}
				2533
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2534	void SIInstrInfo::splitScalar64BitUnaryOp(
				2535	SmallVectorImpl<MachineInstr *> &Worklist,
				2536	MachineInstr *Inst,
				2537	unsigned Opcode) const {
				2538	MachineBasicBlock &MBB = *Inst->getParent();
				2539	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2540
				2541	MachineOperand &Dest = Inst->getOperand(0);
				2542	MachineOperand &Src0 = Inst->getOperand(1);
				2543	DebugLoc DL = Inst->getDebugLoc();
				2544
				2545	MachineBasicBlock::iterator MII = Inst;
				2546
				2547	const MCInstrDesc &InstDesc = get(Opcode);
				2548	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2549	MRI.getRegClass(Src0.getReg()) :
				2550	&AMDGPU::SGPR_32RegClass;
				2551
				2552	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2553
				2554	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2555	AMDGPU::sub0, Src0SubRC);
				2556
				2557	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2558	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2559	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2560
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2561	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
				2562	BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2563	.addOperand(SrcReg0Sub0);
				2564
				2565	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2566	AMDGPU::sub1, Src0SubRC);
				2567
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2568	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
				2569	BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2570	.addOperand(SrcReg0Sub1);
				2571
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2572	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2573	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2574	.addReg(DestSub0)
				2575	.addImm(AMDGPU::sub0)
				2576	.addReg(DestSub1)
				2577	.addImm(AMDGPU::sub1);
				2578
				2579	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2580
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2581	// We don't need to legalizeOperands here because for a single operand, src0
				2582	// will support any kind of input.
				2583
				2584	// Move all users of this moved value.
				2585	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2586	}
				2587
				2588	void SIInstrInfo::splitScalar64BitBinaryOp(
				2589	SmallVectorImpl<MachineInstr *> &Worklist,
				2590	MachineInstr *Inst,
				2591	unsigned Opcode) const {
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2592	MachineBasicBlock &MBB = *Inst->getParent();
				2593	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2594
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2595	MachineOperand &Dest = Inst->getOperand(0);
				2596	MachineOperand &Src0 = Inst->getOperand(1);
				2597	MachineOperand &Src1 = Inst->getOperand(2);
				2598	DebugLoc DL = Inst->getDebugLoc();
				2599
				2600	MachineBasicBlock::iterator MII = Inst;
				2601
				2602	const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2603	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2604	MRI.getRegClass(Src0.getReg()) :
				2605	&AMDGPU::SGPR_32RegClass;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2606
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2607	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2608	const TargetRegisterClass *Src1RC = Src1.isReg() ?
				2609	MRI.getRegClass(Src1.getReg()) :
				2610	&AMDGPU::SGPR_32RegClass;
				2611
				2612	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				2613
				2614	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2615	AMDGPU::sub0, Src0SubRC);
				2616	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2617	AMDGPU::sub0, Src1SubRC);
				2618
				2619	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2620	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2621	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2622
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2623	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2624	MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2625	.addOperand(SrcReg0Sub0)
				2626	.addOperand(SrcReg1Sub0);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2627
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2628	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2629	AMDGPU::sub1, Src0SubRC);
				2630	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2631	AMDGPU::sub1, Src1SubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2632
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2633	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2634	MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2635	.addOperand(SrcReg0Sub1)
				2636	.addOperand(SrcReg1Sub1);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2637
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2638	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2639	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2640	.addReg(DestSub0)
				2641	.addImm(AMDGPU::sub0)
				2642	.addReg(DestSub1)
				2643	.addImm(AMDGPU::sub1);
				2644
				2645	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2646
				2647	// Try to legalize the operands in case we need to swap the order to keep it
				2648	// valid.
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2649	legalizeOperands(LoHalf);
				2650	legalizeOperands(HiHalf);
				2651
				2652	// Move all users of this moved vlaue.
				2653	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2654	}
				2655
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2656	void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
				2657	MachineInstr *Inst) const {
				2658	MachineBasicBlock &MBB = *Inst->getParent();
				2659	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2660
				2661	MachineBasicBlock::iterator MII = Inst;
				2662	DebugLoc DL = Inst->getDebugLoc();
				2663
				2664	MachineOperand &Dest = Inst->getOperand(0);
				2665	MachineOperand &Src = Inst->getOperand(1);
				2666
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	2667	const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2668	const TargetRegisterClass *SrcRC = Src.isReg() ?
				2669	MRI.getRegClass(Src.getReg()) :
				2670	&AMDGPU::SGPR_32RegClass;
				2671
				2672	unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2673	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2674
				2675	const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
				2676
				2677	MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2678	AMDGPU::sub0, SrcSubRC);
				2679	MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2680	AMDGPU::sub1, SrcSubRC);
				2681
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2682	BuildMI(MBB, MII, DL, InstDesc, MidReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2683	.addOperand(SrcRegSub0)
				2684	.addImm(0);
				2685
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2686	BuildMI(MBB, MII, DL, InstDesc, ResultReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2687	.addOperand(SrcRegSub1)
				2688	.addReg(MidReg);
				2689
				2690	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2691
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2692	// We don't need to legalize operands here. src0 for etiher instruction can be
				2693	// an SGPR, and the second input is unused or determined here.
				2694	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2695	}
				2696
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2697	void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
				2698	MachineInstr *Inst) const {
				2699	MachineBasicBlock &MBB = *Inst->getParent();
				2700	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2701	MachineBasicBlock::iterator MII = Inst;
				2702	DebugLoc DL = Inst->getDebugLoc();
				2703
				2704	MachineOperand &Dest = Inst->getOperand(0);
				2705	uint32_t Imm = Inst->getOperand(2).getImm();
				2706	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2707	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
				2708
Matt Arsenault	6ad3426	2014-11-14 18:40:49 +0000	[diff] [blame]	2709	(void) Offset;
				2710
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2711	// Only sext_inreg cases handled.
				2712	assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
				2713	BitWidth <= 32 &&
				2714	Offset == 0 &&
				2715	"Not implemented");
				2716
				2717	if (BitWidth < 32) {
				2718	unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2719	unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2720	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2721
				2722	BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
				2723	.addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
				2724	.addImm(0)
				2725	.addImm(BitWidth);
				2726
				2727	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
				2728	.addImm(31)
				2729	.addReg(MidRegLo);
				2730
				2731	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2732	.addReg(MidRegLo)
				2733	.addImm(AMDGPU::sub0)
				2734	.addReg(MidRegHi)
				2735	.addImm(AMDGPU::sub1);
				2736
				2737	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2738	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2739	return;
				2740	}
				2741
				2742	MachineOperand &Src = Inst->getOperand(1);
				2743	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2744	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2745
				2746	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
				2747	.addImm(31)
				2748	.addReg(Src.getReg(), 0, AMDGPU::sub0);
				2749
				2750	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2751	.addReg(Src.getReg(), 0, AMDGPU::sub0)
				2752	.addImm(AMDGPU::sub0)
				2753	.addReg(TmpReg)
				2754	.addImm(AMDGPU::sub1);
				2755
				2756	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2757	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2758	}
				2759
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2760	void SIInstrInfo::addUsersToMoveToVALUWorklist(
				2761	unsigned DstReg,
				2762	MachineRegisterInfo &MRI,
				2763	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2764	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
				2765	E = MRI.use_end(); I != E; ++I) {
				2766	MachineInstr &UseMI = *I->getParent();
				2767	if (!canReadVGPR(UseMI, I.getOperandNo())) {
				2768	Worklist.push_back(&UseMI);
				2769	}
				2770	}
				2771	}
				2772
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2773	void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst,
				2774	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2775	// This assumes that all the users of SCC are in the same block
				2776	// as the SCC def.
				2777	for (MachineBasicBlock::iterator I = SCCDefInst,
				2778	E = SCCDefInst->getParent()->end(); I != E; ++I) {
				2779
				2780	// Exit if we find another SCC def.
				2781	if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
				2782	return;
				2783
				2784	if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
				2785	Worklist.push_back(I);
				2786	}
				2787	}
				2788
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	2789	const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
				2790	const MachineInstr &Inst) const {
				2791	const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
				2792
				2793	switch (Inst.getOpcode()) {
				2794	// For target instructions, getOpRegClass just returns the virtual register
				2795	// class associated with the operand, so we need to find an equivalent VGPR
				2796	// register class in order to move the instruction to the VALU.
				2797	case AMDGPU::COPY:
				2798	case AMDGPU::PHI:
				2799	case AMDGPU::REG_SEQUENCE:
				2800	case AMDGPU::INSERT_SUBREG:
				2801	if (RI.hasVGPRs(NewDstRC))
				2802	return nullptr;
				2803
				2804	NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
				2805	if (!NewDstRC)
				2806	return nullptr;
				2807	return NewDstRC;
				2808	default:
				2809	return NewDstRC;
				2810	}
				2811	}
				2812
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2813	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2814	unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
				2815	int OpIndices[3]) const {
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2816	const MCInstrDesc &Desc = MI->getDesc();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2817
				2818	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2819	//
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2820	// First we need to consider the instruction's operand requirements before
				2821	// legalizing. Some operands are required to be SGPRs, such as implicit uses
				2822	// of VCC, but we are still bound by the constant bus requirement to only use
				2823	// one.
				2824	//
				2825	// If the operand's class is an SGPR, we can never move it.
				2826
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2827	unsigned SGPRReg = findImplicitSGPRRead(*MI);
				2828	if (SGPRReg != AMDGPU::NoRegister)
				2829	return SGPRReg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2830
				2831	unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
				2832	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				2833
				2834	for (unsigned i = 0; i < 3; ++i) {
				2835	int Idx = OpIndices[i];
				2836	if (Idx == -1)
				2837	break;
				2838
				2839	const MachineOperand &MO = MI->getOperand(Idx);
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2840	if (!MO.isReg())
				2841	continue;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2842
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2843	// Is this operand statically required to be an SGPR based on the operand
				2844	// constraints?
				2845	const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
				2846	bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
				2847	if (IsRequiredSGPR)
				2848	return MO.getReg();
				2849
				2850	// If this could be a VGPR or an SGPR, Check the dynamic register class.
				2851	unsigned Reg = MO.getReg();
				2852	const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
				2853	if (RI.isSGPRClass(RegRC))
				2854	UsedSGPRs[i] = Reg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2855	}
				2856
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2857	// We don't have a required SGPR operand, so we have a bit more freedom in
				2858	// selecting operands to move.
				2859
				2860	// Try to select the most used SGPR. If an SGPR is equal to one of the
				2861	// others, we choose that.
				2862	//
				2863	// e.g.
				2864	// V_FMA_F32 v0, s0, s0, s0 -> No moves
				2865	// V_FMA_F32 v0, s0, s1, s0 -> Move s1
				2866
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2867	// TODO: If some of the operands are 64-bit SGPRs and some 32, we should
				2868	// prefer those.
				2869
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2870	if (UsedSGPRs[0] != AMDGPU::NoRegister) {
				2871	if (UsedSGPRs[0] == UsedSGPRs[1] \|\| UsedSGPRs[0] == UsedSGPRs[2])
				2872	SGPRReg = UsedSGPRs[0];
				2873	}
				2874
				2875	if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
				2876	if (UsedSGPRs[1] == UsedSGPRs[2])
				2877	SGPRReg = UsedSGPRs[1];
				2878	}
				2879
				2880	return SGPRReg;
				2881	}
				2882
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2883	void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
				2884	const MachineFunction &MF) const {
				2885	int End = getIndirectIndexEnd(MF);
				2886	int Begin = getIndirectIndexBegin(MF);
				2887
				2888	if (End == -1)
				2889	return;
				2890
				2891
				2892	for (int Index = Begin; Index <= End; ++Index)
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	2893	Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2894
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2895	for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2896	Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
				2897
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2898	for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2899	Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
				2900
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2901	for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2902	Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
				2903
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2904	for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2905	Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
				2906
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2907	for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2908	Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2909	}
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	2910
Tom Stellard	6407e1e	2014-08-01 00:32:33 +0000	[diff] [blame]	2911	MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	2912	unsigned OperandName) const {
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	2913	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
				2914	if (Idx == -1)
				2915	return nullptr;
				2916
				2917	return &MI.getOperand(Idx);
				2918	}
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2919
				2920	uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
				2921	uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	2922	if (ST.isAmdHsaOS()) {
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2923	RsrcDataFormat \|= (1ULL << 56);
				2924
Michel Danzer	beb79ce	2016-03-16 09:10:35 +0000	[diff] [blame]	2925	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				2926	// Set MTYPE = 2
				2927	RsrcDataFormat \|= (2ULL << 59);
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	2928	}
				2929
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2930	return RsrcDataFormat;
				2931	}
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	2932
				2933	uint64_t SIInstrInfo::getScratchRsrcWords23() const {
				2934	uint64_t Rsrc23 = getDefaultRsrcDataFormat() \|
				2935	AMDGPU::RSRC_TID_ENABLE \|
				2936	0xffffffff; // Size;
				2937
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	2938	uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
				2939
				2940	Rsrc23 \|= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
				2941
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	2942	// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
				2943	// Clear them unless we want a huge stride.
				2944	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				2945	Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
				2946
				2947	return Rsrc23;
				2948	}
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	2949
				2950	bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
				2951	unsigned Opc = MI->getOpcode();
				2952
				2953	return isSMRD(Opc);
				2954	}
				2955
				2956	bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
				2957	unsigned Opc = MI->getOpcode();
				2958
				2959	return isMUBUF(Opc) \|\| isMTBUF(Opc) \|\| isMIMG(Opc);
				2960	}
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	2961
				2962	ArrayRef<std::pair<int, const char *>>
				2963	SIInstrInfo::getSerializableTargetIndices() const {
				2964	static const std::pair<int, const char *> TargetIndices[] = {
				2965	{AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
				2966	{AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
				2967	{AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
				2968	{AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
				2969	{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
				2970	return makeArrayRef(TargetIndices);
				2971	}