Blame - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp - toolchain/llvm-project

blob: f1aa4ccfcdcfaf03d300cbe1e69f774a4e861406 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief SI Implementation of TargetInstrInfo.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "SIInstrInfo.h"
				17	#include "AMDGPUTargetMachine.h"
Tom Stellard	16a9a20	2013-08-14 23:24:17 +0000	[diff] [blame]	18	#include "SIDefines.h"
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	19	#include "SIMachineFunctionInfo.h"
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	21	#include "llvm/CodeGen/MachineInstrBuilder.h"
				22	#include "llvm/CodeGen/MachineRegisterInfo.h"
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	23	#include "llvm/IR/Function.h"
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	24	#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	25	#include "llvm/MC/MCInstrDesc.h"
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	26	#include "llvm/Support/Debug.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	27
				28	using namespace llvm;
				29
Tom Stellard	2e59a45	2014-06-13 01:32:00 +0000	[diff] [blame]	30	SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
Eric Christopher	6c5b511	2015-03-11 18:43:21 +0000	[diff] [blame]	31	: AMDGPUInstrInfo(st), RI() {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	32
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	33	//===----------------------------------------------------------------------===//
				34	// TargetInstrInfo callbacks
				35	//===----------------------------------------------------------------------===//
				36
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	37	static unsigned getNumOperandsNoGlue(SDNode *Node) {
				38	unsigned N = Node->getNumOperands();
				39	while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
				40	--N;
				41	return N;
				42	}
				43
				44	static SDValue findChainOperand(SDNode *Load) {
				45	SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
				46	assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
				47	return LastOp;
				48	}
				49
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	50	/// \brief Returns true if both nodes have the same value for the given
				51	/// operand \p Op, or if both nodes do not have this operand.
				52	static bool nodesHaveSameOperandValue(SDNode N0, SDNode N1, unsigned OpName) {
				53	unsigned Opc0 = N0->getMachineOpcode();
				54	unsigned Opc1 = N1->getMachineOpcode();
				55
				56	int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
				57	int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
				58
				59	if (Op0Idx == -1 && Op1Idx == -1)
				60	return true;
				61
				62
				63	if ((Op0Idx == -1 && Op1Idx != -1) \|\|
				64	(Op1Idx == -1 && Op0Idx != -1))
				65	return false;
				66
				67	// getNamedOperandIdx returns the index for the MachineInstr's operands,
				68	// which includes the result as the first operand. We are indexing into the
				69	// MachineSDNode's operands, so we need to skip the result operand to get
				70	// the real index.
				71	--Op0Idx;
				72	--Op1Idx;
				73
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	74	return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	75	}
				76
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	77	bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
				78	AliasAnalysis *AA) const {
				79	// TODO: The generic check fails for VALU instructions that should be
				80	// rematerializable due to implicit reads of exec. We really want all of the
				81	// generic logic for this except for this.
				82	switch (MI->getOpcode()) {
				83	case AMDGPU::V_MOV_B32_e32:
				84	case AMDGPU::V_MOV_B32_e64:
Matt Arsenault	80f766a	2015-09-10 01:23:28 +0000	[diff] [blame]	85	case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	86	return true;
				87	default:
				88	return false;
				89	}
				90	}
				91
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	92	bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1,
				93	int64_t &Offset0,
				94	int64_t &Offset1) const {
				95	if (!Load0->isMachineOpcode() \|\| !Load1->isMachineOpcode())
				96	return false;
				97
				98	unsigned Opc0 = Load0->getMachineOpcode();
				99	unsigned Opc1 = Load1->getMachineOpcode();
				100
				101	// Make sure both are actually loads.
				102	if (!get(Opc0).mayLoad() \|\| !get(Opc1).mayLoad())
				103	return false;
				104
				105	if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard	20fa0be	2014-10-07 21:09:20 +0000	[diff] [blame]	106
				107	// FIXME: Handle this case:
				108	if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
				109	return false;
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	110
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	111	// Check base reg.
				112	if (Load0->getOperand(1) != Load1->getOperand(1))
				113	return false;
				114
				115	// Check chain.
				116	if (findChainOperand(Load0) != findChainOperand(Load1))
				117	return false;
				118
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	119	// Skip read2 / write2 variants for simplicity.
				120	// TODO: We should report true if the used offsets are adjacent (excluded
				121	// st64 versions).
				122	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 \|\|
				123	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
				124	return false;
				125
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	126	Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
				127	Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
				128	return true;
				129	}
				130
				131	if (isSMRD(Opc0) && isSMRD(Opc1)) {
				132	assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
				133
				134	// Check base reg.
				135	if (Load0->getOperand(0) != Load1->getOperand(0))
				136	return false;
				137
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	138	const ConstantSDNode *Load0Offset =
				139	dyn_cast<ConstantSDNode>(Load0->getOperand(1));
				140	const ConstantSDNode *Load1Offset =
				141	dyn_cast<ConstantSDNode>(Load1->getOperand(1));
				142
				143	if (!Load0Offset \|\| !Load1Offset)
				144	return false;
				145
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	146	// Check chain.
				147	if (findChainOperand(Load0) != findChainOperand(Load1))
				148	return false;
				149
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	150	Offset0 = Load0Offset->getZExtValue();
				151	Offset1 = Load1Offset->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	152	return true;
				153	}
				154
				155	// MUBUF and MTBUF can access the same addresses.
				156	if ((isMUBUF(Opc0) \|\| isMTBUF(Opc0)) && (isMUBUF(Opc1) \|\| isMTBUF(Opc1))) {
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	157
				158	// MUBUF and MTBUF have vaddr at different indices.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	159	if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) \|\|
				160	findChainOperand(Load0) != findChainOperand(Load1) \|\|
				161	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) \|\|
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	162	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	163	return false;
				164
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	165	int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				166	int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				167
				168	if (OffIdx0 == -1 \|\| OffIdx1 == -1)
				169	return false;
				170
				171	// getNamedOperandIdx returns the index for MachineInstrs. Since they
				172	// inlcude the output in the operand list, but SDNodes don't, we need to
				173	// subtract the index by one.
				174	--OffIdx0;
				175	--OffIdx1;
				176
				177	SDValue Off0 = Load0->getOperand(OffIdx0);
				178	SDValue Off1 = Load1->getOperand(OffIdx1);
				179
				180	// The offset might be a FrameIndexSDNode.
				181	if (!isa<ConstantSDNode>(Off0) \|\| !isa<ConstantSDNode>(Off1))
				182	return false;
				183
				184	Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
				185	Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	186	return true;
				187	}
				188
				189	return false;
				190	}
				191
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	192	static bool isStride64(unsigned Opc) {
				193	switch (Opc) {
				194	case AMDGPU::DS_READ2ST64_B32:
				195	case AMDGPU::DS_READ2ST64_B64:
				196	case AMDGPU::DS_WRITE2ST64_B32:
				197	case AMDGPU::DS_WRITE2ST64_B64:
				198	return true;
				199	default:
				200	return false;
				201	}
				202	}
				203
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	204	bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
				205	unsigned &Offset,
				206	const TargetRegisterInfo *TRI) const {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	207	unsigned Opc = LdSt->getOpcode();
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	208
				209	if (isDS(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	210	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				211	AMDGPU::OpName::offset);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	212	if (OffsetImm) {
				213	// Normal, single offset LDS instruction.
				214	const MachineOperand AddrReg = getNamedOperand(LdSt,
				215	AMDGPU::OpName::addr);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	216
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	217	BaseReg = AddrReg->getReg();
				218	Offset = OffsetImm->getImm();
				219	return true;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	220	}
				221
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	222	// The 2 offset instructions use offset0 and offset1 instead. We can treat
				223	// these as a load with a single offset if the 2 offsets are consecutive. We
				224	// will use this for some partially aligned loads.
				225	const MachineOperand Offset0Imm = getNamedOperand(LdSt,
				226	AMDGPU::OpName::offset0);
Changpeng Fang	24f035a	2016-03-01 17:51:23 +0000	[diff] [blame^]	227	// DS_PERMUTE does not have Offset0Imm (and Offset1Imm).
				228	if (!Offset0Imm)
				229	return false;
				230
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	231	const MachineOperand Offset1Imm = getNamedOperand(LdSt,
				232	AMDGPU::OpName::offset1);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	233
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	234	uint8_t Offset0 = Offset0Imm->getImm();
				235	uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	236
Matt Arsenault	84db5d9	2015-07-14 17:57:36 +0000	[diff] [blame]	237	if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	238	// Each of these offsets is in element sized units, so we need to convert
				239	// to bytes of the individual reads.
				240
				241	unsigned EltSize;
				242	if (LdSt->mayLoad())
				243	EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
				244	else {
				245	assert(LdSt->mayStore());
				246	int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
				247	EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
				248	}
				249
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	250	if (isStride64(Opc))
				251	EltSize *= 64;
				252
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	253	const MachineOperand AddrReg = getNamedOperand(LdSt,
				254	AMDGPU::OpName::addr);
				255	BaseReg = AddrReg->getReg();
				256	Offset = EltSize * Offset0;
				257	return true;
				258	}
				259
				260	return false;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	261	}
				262
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	263	if (isMUBUF(LdSt) \|\| isMTBUF(LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	264	if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
				265	return false;
				266
				267	const MachineOperand AddrReg = getNamedOperand(LdSt,
				268	AMDGPU::OpName::vaddr);
				269	if (!AddrReg)
				270	return false;
				271
				272	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				273	AMDGPU::OpName::offset);
				274	BaseReg = AddrReg->getReg();
				275	Offset = OffsetImm->getImm();
				276	return true;
				277	}
				278
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	279	if (isSMRD(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	280	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				281	AMDGPU::OpName::offset);
				282	if (!OffsetImm)
				283	return false;
				284
				285	const MachineOperand SBaseReg = getNamedOperand(LdSt,
				286	AMDGPU::OpName::sbase);
				287	BaseReg = SBaseReg->getReg();
				288	Offset = OffsetImm->getImm();
				289	return true;
				290	}
				291
				292	return false;
				293	}
				294
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	295	bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
				296	MachineInstr *SecondLdSt,
				297	unsigned NumLoads) const {
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	298	// TODO: This needs finer tuning
				299	if (NumLoads > 4)
				300	return false;
				301
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	302	if (isDS(FirstLdSt) && isDS(SecondLdSt))
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	303	return true;
				304
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	305	if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt))
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	306	return true;
				307
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	308	if ((isMUBUF(FirstLdSt) \|\| isMTBUF(FirstLdSt)) &&
				309	(isMUBUF(SecondLdSt) \|\| isMTBUF(SecondLdSt)))
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	310	return true;
				311
				312	return false;
				313	}
				314
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	315	void
				316	SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	317	MachineBasicBlock::iterator MI, DebugLoc DL,
				318	unsigned DestReg, unsigned SrcReg,
				319	bool KillSrc) const {
				320
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	321	// If we are trying to copy to or from SCC, there is a bug somewhere else in
				322	// the backend. While it may be theoretically possible to do this, it should
				323	// never be necessary.
				324	assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
				325
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	326	static const int16_t Sub0_15[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	327	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
				328	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
				329	AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	330	AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	331	};
				332
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	333	static const int16_t Sub0_15_64[] = {
				334	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				335	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				336	AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
				337	AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
				338	};
				339
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	340	static const int16_t Sub0_7[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	341	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	342	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	343	};
				344
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	345	static const int16_t Sub0_7_64[] = {
				346	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				347	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				348	};
				349
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	350	static const int16_t Sub0_3[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	351	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	352	};
				353
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	354	static const int16_t Sub0_3_64[] = {
				355	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				356	};
				357
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	358	static const int16_t Sub0_2[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	359	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	360	};
				361
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	362	static const int16_t Sub0_1[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	363	AMDGPU::sub0, AMDGPU::sub1,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	364	};
				365
				366	unsigned Opcode;
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	367	ArrayRef<int16_t> SubIndices;
				368	bool Forward;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	369
				370	if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
				371	assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
				372	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				373	.addReg(SrcReg, getKillRegState(KillSrc));
				374	return;
				375
Tom Stellard	aac1889	2013-02-07 19:39:43 +0000	[diff] [blame]	376	} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	377	if (DestReg == AMDGPU::VCC) {
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	378	if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				379	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
				380	.addReg(SrcReg, getKillRegState(KillSrc));
				381	} else {
				382	// FIXME: Hack until VReg_1 removed.
				383	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault	4635915	2015-08-08 00:41:48 +0000	[diff] [blame]	384	BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	385	.addImm(0)
				386	.addReg(SrcReg, getKillRegState(KillSrc));
				387	}
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	388
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	389	return;
				390	}
				391
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	392	assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
				393	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				394	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	395	return;
				396
				397	} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
				398	assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	399	Opcode = AMDGPU::S_MOV_B64;
				400	SubIndices = Sub0_3_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	401
				402	} else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
				403	assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	404	Opcode = AMDGPU::S_MOV_B64;
				405	SubIndices = Sub0_7_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	406
				407	} else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
				408	assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	409	Opcode = AMDGPU::S_MOV_B64;
				410	SubIndices = Sub0_15_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	411
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	412	} else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
				413	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	414	AMDGPU::SReg_32RegClass.contains(SrcReg));
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	415	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				416	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	417	return;
				418
				419	} else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
				420	assert(AMDGPU::VReg_64RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	421	AMDGPU::SReg_64RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	422	Opcode = AMDGPU::V_MOV_B32_e32;
				423	SubIndices = Sub0_1;
				424
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	425	} else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
				426	assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
				427	Opcode = AMDGPU::V_MOV_B32_e32;
				428	SubIndices = Sub0_2;
				429
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	430	} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
				431	assert(AMDGPU::VReg_128RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	432	AMDGPU::SReg_128RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	433	Opcode = AMDGPU::V_MOV_B32_e32;
				434	SubIndices = Sub0_3;
				435
				436	} else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
				437	assert(AMDGPU::VReg_256RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	438	AMDGPU::SReg_256RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	439	Opcode = AMDGPU::V_MOV_B32_e32;
				440	SubIndices = Sub0_7;
				441
				442	} else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
				443	assert(AMDGPU::VReg_512RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	444	AMDGPU::SReg_512RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	445	Opcode = AMDGPU::V_MOV_B32_e32;
				446	SubIndices = Sub0_15;
				447
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	448	} else {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	449	llvm_unreachable("Can't copy register!");
				450	}
				451
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	452	if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
				453	Forward = true;
				454	else
				455	Forward = false;
				456
				457	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				458	unsigned SubIdx;
				459	if (Forward)
				460	SubIdx = SubIndices[Idx];
				461	else
				462	SubIdx = SubIndices[SubIndices.size() - Idx - 1];
				463
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	464	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				465	get(Opcode), RI.getSubReg(DestReg, SubIdx));
				466
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	467	Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	468
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	469	if (Idx == SubIndices.size() - 1)
				470	Builder.addReg(SrcReg, RegState::Kill \| RegState::Implicit);
				471
				472	if (Idx == 0)
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	473	Builder.addReg(DestReg, RegState::Define \| RegState::Implicit);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	474	}
				475	}
				476
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	477	int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
Matt Arsenault	f5b2cd8	2015-03-23 18:45:30 +0000	[diff] [blame]	478	const unsigned Opcode = MI.getOpcode();
				479
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	480	int NewOpc;
				481
				482	// Try to map original to commuted opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	483	NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	484	if (NewOpc != -1)
				485	// Check if the commuted (REV) opcode exists on the target.
				486	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	487
				488	// Try to map commuted to original opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	489	NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	490	if (NewOpc != -1)
				491	// Check if the original (non-REV) opcode exists on the target.
				492	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	493
				494	return Opcode;
				495	}
				496
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	497	unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
				498
				499	if (DstRC->getSize() == 4) {
				500	return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
				501	} else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
				502	return AMDGPU::S_MOV_B64;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	503	} else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
				504	return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	505	}
				506	return AMDGPU::COPY;
				507	}
				508
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	509	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
				510	switch (Size) {
				511	case 4:
				512	return AMDGPU::SI_SPILL_S32_SAVE;
				513	case 8:
				514	return AMDGPU::SI_SPILL_S64_SAVE;
				515	case 16:
				516	return AMDGPU::SI_SPILL_S128_SAVE;
				517	case 32:
				518	return AMDGPU::SI_SPILL_S256_SAVE;
				519	case 64:
				520	return AMDGPU::SI_SPILL_S512_SAVE;
				521	default:
				522	llvm_unreachable("unknown register size");
				523	}
				524	}
				525
				526	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
				527	switch (Size) {
				528	case 4:
				529	return AMDGPU::SI_SPILL_V32_SAVE;
				530	case 8:
				531	return AMDGPU::SI_SPILL_V64_SAVE;
				532	case 16:
				533	return AMDGPU::SI_SPILL_V128_SAVE;
				534	case 32:
				535	return AMDGPU::SI_SPILL_V256_SAVE;
				536	case 64:
				537	return AMDGPU::SI_SPILL_V512_SAVE;
				538	default:
				539	llvm_unreachable("unknown register size");
				540	}
				541	}
				542
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	543	void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
				544	MachineBasicBlock::iterator MI,
				545	unsigned SrcReg, bool isKill,
				546	int FrameIndex,
				547	const TargetRegisterClass *RC,
				548	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	549	MachineFunction *MF = MBB.getParent();
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	550	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	551	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	552	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	553
				554	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
				555	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				556	MachinePointerInfo PtrInfo
				557	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				558	MachineMemOperand *MMO
				559	= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
				560	Size, Align);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	561
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	562	if (RI.isSGPRClass(RC)) {
Matt Arsenault	5b22dfa	2015-11-05 05:27:10 +0000	[diff] [blame]	563	MFI->setHasSpilledSGPRs();
				564
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	565	// We are only allowed to create one new instruction when spilling
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	566	// registers, so we need to use pseudo instruction for spilling
				567	// SGPRs.
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	568	unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
				569	BuildMI(MBB, MI, DL, get(Opcode))
				570	.addReg(SrcReg) // src
				571	.addFrameIndex(FrameIndex) // frame_idx
				572	.addMemOperand(MMO);
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	573
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	574	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	575	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	576
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	577	if (!ST.isVGPRSpillingEnabled(MFI)) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	578	LLVMContext &Ctx = MF->getFunction()->getContext();
				579	Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
				580	" spill register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	581	BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	582	.addReg(SrcReg);
				583
				584	return;
				585	}
				586
				587	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				588
				589	unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
				590	MFI->setHasSpilledVGPRs();
				591	BuildMI(MBB, MI, DL, get(Opcode))
				592	.addReg(SrcReg) // src
				593	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	594	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				595	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	596	.addMemOperand(MMO);
				597	}
				598
				599	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
				600	switch (Size) {
				601	case 4:
				602	return AMDGPU::SI_SPILL_S32_RESTORE;
				603	case 8:
				604	return AMDGPU::SI_SPILL_S64_RESTORE;
				605	case 16:
				606	return AMDGPU::SI_SPILL_S128_RESTORE;
				607	case 32:
				608	return AMDGPU::SI_SPILL_S256_RESTORE;
				609	case 64:
				610	return AMDGPU::SI_SPILL_S512_RESTORE;
				611	default:
				612	llvm_unreachable("unknown register size");
				613	}
				614	}
				615
				616	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
				617	switch (Size) {
				618	case 4:
				619	return AMDGPU::SI_SPILL_V32_RESTORE;
				620	case 8:
				621	return AMDGPU::SI_SPILL_V64_RESTORE;
				622	case 16:
				623	return AMDGPU::SI_SPILL_V128_RESTORE;
				624	case 32:
				625	return AMDGPU::SI_SPILL_V256_RESTORE;
				626	case 64:
				627	return AMDGPU::SI_SPILL_V512_RESTORE;
				628	default:
				629	llvm_unreachable("unknown register size");
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	630	}
				631	}
				632
				633	void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
				634	MachineBasicBlock::iterator MI,
				635	unsigned DestReg, int FrameIndex,
				636	const TargetRegisterClass *RC,
				637	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	638	MachineFunction *MF = MBB.getParent();
Tom Stellard	e99fb65	2015-01-20 19:33:04 +0000	[diff] [blame]	639	const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	640	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	641	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	642	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				643	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	644
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	645	MachinePointerInfo PtrInfo
				646	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				647
				648	MachineMemOperand *MMO = MF->getMachineMemOperand(
				649	PtrInfo, MachineMemOperand::MOLoad, Size, Align);
				650
				651	if (RI.isSGPRClass(RC)) {
				652	// FIXME: Maybe this should not include a memoperand because it will be
				653	// lowered to non-memory instructions.
				654	unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
				655	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				656	.addFrameIndex(FrameIndex) // frame_idx
				657	.addMemOperand(MMO);
				658
				659	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	660	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	661
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	662	if (!ST.isVGPRSpillingEnabled(MFI)) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	663	LLVMContext &Ctx = MF->getFunction()->getContext();
				664	Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
				665	" restore register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	666	BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	667
				668	return;
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	669	}
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	670
				671	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				672
				673	unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
				674	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				675	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	676	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				677	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	678	.addMemOperand(MMO);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	679	}
				680
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	681	/// \param @Offset Offset in bytes of the FrameIndex being spilled
				682	unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
				683	MachineBasicBlock::iterator MI,
				684	RegScavenger *RS, unsigned TmpReg,
				685	unsigned FrameOffset,
				686	unsigned Size) const {
				687	MachineFunction *MF = MBB.getParent();
				688	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Eric Christopher	7792e32	2015-01-30 23:24:40 +0000	[diff] [blame]	689	const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	690	const SIRegisterInfo *TRI =
				691	static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
				692	DebugLoc DL = MBB.findDebugLoc(MI);
				693	unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
				694	unsigned WavefrontSize = ST.getWavefrontSize();
				695
				696	unsigned TIDReg = MFI->getTIDReg();
				697	if (!MFI->hasCalculatedTID()) {
				698	MachineBasicBlock &Entry = MBB.getParent()->front();
				699	MachineBasicBlock::iterator Insert = Entry.front();
				700	DebugLoc DL = Insert->getDebugLoc();
				701
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	702	TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	703	if (TIDReg == AMDGPU::NoRegister)
				704	return TIDReg;
				705
				706
				707	if (MFI->getShaderType() == ShaderType::COMPUTE &&
				708	WorkGroupSize > WavefrontSize) {
				709
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	710	unsigned TIDIGXReg
				711	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
				712	unsigned TIDIGYReg
				713	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
				714	unsigned TIDIGZReg
				715	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	716	unsigned InputPtrReg =
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	717	TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer	7149aab	2015-03-01 18:09:56 +0000	[diff] [blame]	718	for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	719	if (!Entry.isLiveIn(Reg))
				720	Entry.addLiveIn(Reg);
				721	}
				722
				723	RS->enterBasicBlock(&Entry);
Matt Arsenault	0c90e95	2015-11-06 18:17:45 +0000	[diff] [blame]	724	// FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	725	unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				726	unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				727	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
				728	.addReg(InputPtrReg)
				729	.addImm(SI::KernelInputOffsets::NGROUPS_Z);
				730	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
				731	.addReg(InputPtrReg)
				732	.addImm(SI::KernelInputOffsets::NGROUPS_Y);
				733
				734	// NGROUPS.X * NGROUPS.Y
				735	BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
				736	.addReg(STmp1)
				737	.addReg(STmp0);
				738	// (NGROUPS.X * NGROUPS.Y) * TIDIG.X
				739	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
				740	.addReg(STmp1)
				741	.addReg(TIDIGXReg);
				742	// NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
				743	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
				744	.addReg(STmp0)
				745	.addReg(TIDIGYReg)
				746	.addReg(TIDReg);
				747	// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
				748	BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
				749	.addReg(TIDReg)
				750	.addReg(TIDIGZReg);
				751	} else {
				752	// Get the wave id
				753	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
				754	TIDReg)
				755	.addImm(-1)
				756	.addImm(0);
				757
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	758	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	759	TIDReg)
				760	.addImm(-1)
				761	.addReg(TIDReg);
				762	}
				763
				764	BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
				765	TIDReg)
				766	.addImm(2)
				767	.addReg(TIDReg);
				768	MFI->setTIDReg(TIDReg);
				769	}
				770
				771	// Add FrameIndex to LDS offset
				772	unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
				773	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
				774	.addImm(LDSOffset)
				775	.addReg(TIDReg);
				776
				777	return TmpReg;
				778	}
				779
Nicolai Haehnle	87323da	2015-12-17 16:46:42 +0000	[diff] [blame]	780	void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI,
				781	int Count) const {
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	782	while (Count > 0) {
				783	int Arg;
				784	if (Count >= 8)
				785	Arg = 7;
				786	else
				787	Arg = Count - 1;
				788	Count -= 8;
				789	BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
				790	.addImm(Arg);
				791	}
				792	}
				793
				794	bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	795	MachineBasicBlock &MBB = *MI->getParent();
				796	DebugLoc DL = MBB.findDebugLoc(MI);
				797	switch (MI->getOpcode()) {
				798	default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
				799
Tom Stellard	60024a0	2014-09-24 01:33:24 +0000	[diff] [blame]	800	case AMDGPU::SGPR_USE:
				801	// This is just a placeholder for register allocation.
				802	MI->eraseFromParent();
				803	break;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	804
				805	case AMDGPU::V_MOV_B64_PSEUDO: {
				806	unsigned Dst = MI->getOperand(0).getReg();
				807	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				808	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				809
				810	const MachineOperand &SrcOp = MI->getOperand(1);
				811	// FIXME: Will this work for 64-bit floating point immediates?
				812	assert(!SrcOp.isFPImm());
				813	if (SrcOp.isImm()) {
				814	APInt Imm(64, SrcOp.getImm());
				815	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				816	.addImm(Imm.getLoBits(32).getZExtValue())
				817	.addReg(Dst, RegState::Implicit);
				818	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				819	.addImm(Imm.getHiBits(32).getZExtValue())
				820	.addReg(Dst, RegState::Implicit);
				821	} else {
				822	assert(SrcOp.isReg());
				823	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				824	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
				825	.addReg(Dst, RegState::Implicit);
				826	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				827	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
				828	.addReg(Dst, RegState::Implicit);
				829	}
				830	MI->eraseFromParent();
				831	break;
				832	}
Marek Olsak	7d77728	2015-03-24 13:40:15 +0000	[diff] [blame]	833
				834	case AMDGPU::V_CNDMASK_B64_PSEUDO: {
				835	unsigned Dst = MI->getOperand(0).getReg();
				836	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				837	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				838	unsigned Src0 = MI->getOperand(1).getReg();
				839	unsigned Src1 = MI->getOperand(2).getReg();
				840	const MachineOperand &SrcCond = MI->getOperand(3);
				841
				842	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
				843	.addReg(RI.getSubReg(Src0, AMDGPU::sub0))
				844	.addReg(RI.getSubReg(Src1, AMDGPU::sub0))
				845	.addOperand(SrcCond);
				846	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
				847	.addReg(RI.getSubReg(Src0, AMDGPU::sub1))
				848	.addReg(RI.getSubReg(Src1, AMDGPU::sub1))
				849	.addOperand(SrcCond);
				850	MI->eraseFromParent();
				851	break;
				852	}
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	853
				854	case AMDGPU::SI_CONSTDATA_PTR: {
				855	const SIRegisterInfo *TRI =
				856	static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
				857	MachineFunction &MF = *MBB.getParent();
				858	unsigned Reg = MI->getOperand(0).getReg();
				859	unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
				860	unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
				861
				862	// Create a bundle so these instructions won't be re-ordered by the
				863	// post-RA scheduler.
				864	MIBundleBuilder Bundler(MBB, MI);
				865	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
				866
				867	// Add 32-bit offset from this instruction to the start of the
				868	// constant data.
				869	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
				870	.addReg(RegLo)
				871	.addOperand(MI->getOperand(1)));
				872	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
				873	.addReg(RegHi)
				874	.addImm(0));
				875
				876	llvm::finalizeBundle(MBB, Bundler.begin());
				877
				878	MI->eraseFromParent();
				879	break;
				880	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	881	}
				882	return true;
				883	}
				884
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	885	/// Commutes the operands in the given instruction.
				886	/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
				887	///
				888	/// Do not call this method for a non-commutable instruction or for
				889	/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
				890	/// Even though the instruction is commutable, the method may still
				891	/// fail to commute the operands, null pointer is returned in such cases.
				892	MachineInstr SIInstrInfo::commuteInstructionImpl(MachineInstr MI,
				893	bool NewMI,
				894	unsigned OpIdx0,
				895	unsigned OpIdx1) const {
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	896	int CommutedOpcode = commuteOpcode(*MI);
				897	if (CommutedOpcode == -1)
				898	return nullptr;
				899
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	900	int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				901	AMDGPU::OpName::src0);
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	902	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				903	if (!Src0.isReg())
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	904	return nullptr;
				905
				906	int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				907	AMDGPU::OpName::src1);
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	908
				909	if ((OpIdx0 != static_cast<unsigned>(Src0Idx) \|\|
				910	OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
				911	(OpIdx0 != static_cast<unsigned>(Src1Idx) \|\|
				912	OpIdx1 != static_cast<unsigned>(Src0Idx)))
				913	return nullptr;
				914
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	915	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				916
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	917
				918	if (isVOP2(*MI)) {
				919	const MCInstrDesc &InstrDesc = MI->getDesc();
				920	// For VOP2 instructions, any operand type is valid to use for src0. Make
				921	// sure we can use the src1 as src0.
				922	//
				923	// We could be stricter here and only allow commuting if there is a reason
				924	// to do so. i.e. if both operands are VGPRs there is no real benefit,
				925	// although MachineCSE attempts to find matches by commuting.
				926	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				927	if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
				928	return nullptr;
Matt Arsenault	3c34ae2	2015-02-18 02:04:31 +0000	[diff] [blame]	929	}
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	930
				931	if (!Src1.isReg()) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	932	// Allow commuting instructions with Imm operands.
				933	if (NewMI \|\| !Src1.isImm() \|\|
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	934	(!isVOP2(MI) && !isVOP3(MI))) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	935	return nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	936	}
Matt Arsenault	d282ada	2014-10-17 18:00:48 +0000	[diff] [blame]	937	// Be sure to copy the source modifiers to the right place.
				938	if (MachineOperand *Src0Mods
				939	= getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
				940	MachineOperand *Src1Mods
				941	= getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
				942
				943	int Src0ModsVal = Src0Mods->getImm();
				944	if (!Src1Mods && Src0ModsVal != 0)
				945	return nullptr;
				946
				947	// XXX - This assert might be a lie. It might be useful to have a neg
				948	// modifier with 0.0.
				949	int Src1ModsVal = Src1Mods->getImm();
				950	assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
				951
				952	Src1Mods->setImm(Src0ModsVal);
				953	Src0Mods->setImm(Src1ModsVal);
				954	}
				955
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	956	unsigned Reg = Src0.getReg();
				957	unsigned SubReg = Src0.getSubReg();
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	958	if (Src1.isImm())
				959	Src0.ChangeToImmediate(Src1.getImm());
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	960	else
				961	llvm_unreachable("Should only have immediates");
				962
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	963	Src1.ChangeToRegister(Reg, false);
				964	Src1.setSubReg(SubReg);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	965	} else {
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	966	MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	967	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	968
				969	if (MI)
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	970	MI->setDesc(get(CommutedOpcode));
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	971
				972	return MI;
Christian Konig	76edd4f	2013-02-26 17:52:29 +0000	[diff] [blame]	973	}
				974
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	975	// This needs to be implemented because the source modifiers may be inserted
				976	// between the true commutable operands, and the base
				977	// TargetInstrInfo::commuteInstruction uses it.
				978	bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	979	unsigned &SrcOpIdx0,
				980	unsigned &SrcOpIdx1) const {
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	981	const MCInstrDesc &MCID = MI->getDesc();
				982	if (!MCID.isCommutable())
				983	return false;
				984
				985	unsigned Opc = MI->getOpcode();
				986	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				987	if (Src0Idx == -1)
				988	return false;
				989
				990	// FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	991	// immediate. Also, immediate src0 operand is not handled in
				992	// SIInstrInfo::commuteInstruction();
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	993	if (!MI->getOperand(Src0Idx).isReg())
				994	return false;
				995
				996	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				997	if (Src1Idx == -1)
				998	return false;
				999
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1000	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1001	if (Src1.isImm()) {
				1002	// SIInstrInfo::commuteInstruction() does support commuting the immediate
				1003	// operand src1 in 2 and 3 operand instructions.
				1004	if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
				1005	return false;
				1006	} else if (Src1.isReg()) {
				1007	// If any source modifiers are set, the generic instruction commuting won't
				1008	// understand how to copy the source modifiers.
				1009	if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) \|\|
				1010	hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
				1011	return false;
				1012	} else
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1013	return false;
				1014
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1015	return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1016	}
				1017
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1018	static void removeModOperands(MachineInstr &MI) {
				1019	unsigned Opc = MI.getOpcode();
				1020	int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1021	AMDGPU::OpName::src0_modifiers);
				1022	int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1023	AMDGPU::OpName::src1_modifiers);
				1024	int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1025	AMDGPU::OpName::src2_modifiers);
				1026
				1027	MI.RemoveOperand(Src2ModIdx);
				1028	MI.RemoveOperand(Src1ModIdx);
				1029	MI.RemoveOperand(Src0ModIdx);
				1030	}
				1031
				1032	bool SIInstrInfo::FoldImmediate(MachineInstr UseMI, MachineInstr DefMI,
				1033	unsigned Reg, MachineRegisterInfo *MRI) const {
				1034	if (!MRI->hasOneNonDBGUse(Reg))
				1035	return false;
				1036
				1037	unsigned Opc = UseMI->getOpcode();
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1038	if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1039	// Don't fold if we are using source modifiers. The new VOP2 instructions
				1040	// don't have them.
				1041	if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) \|\|
				1042	hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) \|\|
				1043	hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
				1044	return false;
				1045	}
				1046
				1047	MachineOperand Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
				1048	MachineOperand Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
				1049	MachineOperand Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
				1050
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1051	// Multiplied part is the constant: Use v_madmk_f32
				1052	// We should only expect these to be on src0 due to canonicalizations.
				1053	if (Src0->isReg() && Src0->getReg() == Reg) {
				1054	if (!Src1->isReg() \|\|
				1055	(Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
				1056	return false;
				1057
				1058	if (!Src2->isReg() \|\|
				1059	(Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
				1060	return false;
				1061
				1062	// We need to do some weird looking operand shuffling since the madmk
				1063	// operands are out of the normal expected order with the multiplied
				1064	// constant as the last operand.
				1065	//
				1066	// v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
				1067	// src0 -> src2 K
				1068	// src1 -> src0
				1069	// src2 -> src1
				1070
				1071	const int64_t Imm = DefMI->getOperand(1).getImm();
				1072
				1073	// FIXME: This would be a lot easier if we could return a new instruction
				1074	// instead of having to modify in place.
				1075
				1076	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1077	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1078	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1079	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1080	AMDGPU::OpName::clamp));
				1081
				1082	unsigned Src1Reg = Src1->getReg();
				1083	unsigned Src1SubReg = Src1->getSubReg();
				1084	unsigned Src2Reg = Src2->getReg();
				1085	unsigned Src2SubReg = Src2->getSubReg();
				1086	Src0->setReg(Src1Reg);
				1087	Src0->setSubReg(Src1SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	1088	Src0->setIsKill(Src1->isKill());
				1089
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1090	Src1->setReg(Src2Reg);
				1091	Src1->setSubReg(Src2SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	1092	Src1->setIsKill(Src2->isKill());
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1093
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1094	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1095	UseMI->untieRegOperand(
				1096	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1097	}
				1098
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1099	Src2->ChangeToImmediate(Imm);
				1100
				1101	removeModOperands(*UseMI);
				1102	UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
				1103
				1104	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1105	if (DeleteDef)
				1106	DefMI->eraseFromParent();
				1107
				1108	return true;
				1109	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1110
				1111	// Added part is the constant: Use v_madak_f32
				1112	if (Src2->isReg() && Src2->getReg() == Reg) {
				1113	// Not allowed to use constant bus for another operand.
				1114	// We can however allow an inline immediate as src0.
				1115	if (!Src0->isImm() &&
				1116	(Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
				1117	return false;
				1118
				1119	if (!Src1->isReg() \|\|
				1120	(Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
				1121	return false;
				1122
				1123	const int64_t Imm = DefMI->getOperand(1).getImm();
				1124
				1125	// FIXME: This would be a lot easier if we could return a new instruction
				1126	// instead of having to modify in place.
				1127
				1128	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1129	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1130	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1131	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1132	AMDGPU::OpName::clamp));
				1133
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1134	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1135	UseMI->untieRegOperand(
				1136	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1137	}
				1138
				1139	// ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1140	Src2->ChangeToImmediate(Imm);
				1141
				1142	// These come before src2.
				1143	removeModOperands(*UseMI);
				1144	UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
				1145
				1146	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1147	if (DeleteDef)
				1148	DefMI->eraseFromParent();
				1149
				1150	return true;
				1151	}
				1152	}
				1153
				1154	return false;
				1155	}
				1156
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1157	static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
				1158	int WidthB, int OffsetB) {
				1159	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
				1160	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
				1161	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
				1162	return LowOffset + LowWidth <= HighOffset;
				1163	}
				1164
				1165	bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
				1166	MachineInstr *MIb) const {
				1167	unsigned BaseReg0, Offset0;
				1168	unsigned BaseReg1, Offset1;
				1169
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	1170	if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
				1171	getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1172	assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
				1173	"read2 / write2 not expected here yet");
				1174	unsigned Width0 = (*MIa->memoperands_begin())->getSize();
				1175	unsigned Width1 = (*MIb->memoperands_begin())->getSize();
				1176	if (BaseReg0 == BaseReg1 &&
				1177	offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
				1178	return true;
				1179	}
				1180	}
				1181
				1182	return false;
				1183	}
				1184
				1185	bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
				1186	MachineInstr *MIb,
				1187	AliasAnalysis *AA) const {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1188	assert(MIa && (MIa->mayLoad() \|\| MIa->mayStore()) &&
				1189	"MIa must load from or modify a memory location");
				1190	assert(MIb && (MIb->mayLoad() \|\| MIb->mayStore()) &&
				1191	"MIb must load from or modify a memory location");
				1192
				1193	if (MIa->hasUnmodeledSideEffects() \|\| MIb->hasUnmodeledSideEffects())
				1194	return false;
				1195
				1196	// XXX - Can we relax this between address spaces?
				1197	if (MIa->hasOrderedMemoryRef() \|\| MIb->hasOrderedMemoryRef())
				1198	return false;
				1199
				1200	// TODO: Should we check the address space from the MachineMemOperand? That
				1201	// would allow us to distinguish objects we know don't alias based on the
Benjamin Kramer	df005cb	2015-08-08 18:27:36 +0000	[diff] [blame]	1202	// underlying address space, even if it was lowered to a different one,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1203	// e.g. private accesses lowered to use MUBUF instructions on a scratch
				1204	// buffer.
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1205	if (isDS(*MIa)) {
				1206	if (isDS(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1207	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1208
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1209	return !isFLAT(*MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1210	}
				1211
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1212	if (isMUBUF(MIa) \|\| isMTBUF(MIa)) {
				1213	if (isMUBUF(MIb) \|\| isMTBUF(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1214	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1215
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1216	return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1217	}
				1218
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1219	if (isSMRD(*MIa)) {
				1220	if (isSMRD(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1221	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1222
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1223	return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(*MIa);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1224	}
				1225
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1226	if (isFLAT(*MIa)) {
				1227	if (isFLAT(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1228	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1229
				1230	return false;
				1231	}
				1232
				1233	return false;
				1234	}
				1235
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1236	MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
				1237	MachineBasicBlock::iterator &MI,
				1238	LiveVariables *LV) const {
				1239
				1240	switch (MI->getOpcode()) {
				1241	default: return nullptr;
				1242	case AMDGPU::V_MAC_F32_e64: break;
				1243	case AMDGPU::V_MAC_F32_e32: {
				1244	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1245	if (Src0->isImm() && !isInlineConstant(*Src0, 4))
				1246	return nullptr;
				1247	break;
				1248	}
				1249	}
				1250
Tom Stellard	cc4c871	2016-02-16 18:14:56 +0000	[diff] [blame]	1251	const MachineOperand Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1252	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1253	const MachineOperand Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				1254	const MachineOperand Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				1255
				1256	return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
				1257	.addOperand(*Dst)
				1258	.addImm(0) // Src0 mods
				1259	.addOperand(*Src0)
				1260	.addImm(0) // Src1 mods
				1261	.addOperand(*Src1)
				1262	.addImm(0) // Src mods
				1263	.addOperand(*Src2)
				1264	.addImm(0) // clamp
				1265	.addImm(0); // omod
				1266	}
				1267
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1268	bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1269	int64_t SVal = Imm.getSExtValue();
				1270	if (SVal >= -16 && SVal <= 64)
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1271	return true;
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1272
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1273	if (Imm.getBitWidth() == 64) {
				1274	uint64_t Val = Imm.getZExtValue();
				1275	return (DoubleToBits(0.0) == Val) \|\|
				1276	(DoubleToBits(1.0) == Val) \|\|
				1277	(DoubleToBits(-1.0) == Val) \|\|
				1278	(DoubleToBits(0.5) == Val) \|\|
				1279	(DoubleToBits(-0.5) == Val) \|\|
				1280	(DoubleToBits(2.0) == Val) \|\|
				1281	(DoubleToBits(-2.0) == Val) \|\|
				1282	(DoubleToBits(4.0) == Val) \|\|
				1283	(DoubleToBits(-4.0) == Val);
				1284	}
				1285
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1286	// The actual type of the operand does not seem to matter as long
				1287	// as the bits match one of the inline immediate values. For example:
				1288	//
				1289	// -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
				1290	// so it is a legal inline immediate.
				1291	//
				1292	// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
				1293	// floating-point, so it is a legal inline immediate.
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1294	uint32_t Val = Imm.getZExtValue();
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1295
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1296	return (FloatToBits(0.0f) == Val) \|\|
				1297	(FloatToBits(1.0f) == Val) \|\|
				1298	(FloatToBits(-1.0f) == Val) \|\|
				1299	(FloatToBits(0.5f) == Val) \|\|
				1300	(FloatToBits(-0.5f) == Val) \|\|
				1301	(FloatToBits(2.0f) == Val) \|\|
				1302	(FloatToBits(-2.0f) == Val) \|\|
				1303	(FloatToBits(4.0f) == Val) \|\|
				1304	(FloatToBits(-4.0f) == Val);
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1305	}
				1306
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1307	bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
				1308	unsigned OpSize) const {
				1309	if (MO.isImm()) {
				1310	// MachineOperand provides no way to tell the true operand size, since it
				1311	// only records a 64-bit value. We need to know the size to determine if a
				1312	// 32-bit floating point immediate bit pattern is legal for an integer
				1313	// immediate. It would be for any 32-bit integer operand, but would not be
				1314	// for a 64-bit one.
				1315
				1316	unsigned BitSize = 8 * OpSize;
				1317	return isInlineConstant(APInt(BitSize, MO.getImm(), true));
				1318	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1319
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1320	return false;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1321	}
				1322
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1323	bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
				1324	unsigned OpSize) const {
				1325	return MO.isImm() && !isInlineConstant(MO, OpSize);
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1326	}
				1327
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1328	static bool compareMachineOp(const MachineOperand &Op0,
				1329	const MachineOperand &Op1) {
				1330	if (Op0.getType() != Op1.getType())
				1331	return false;
				1332
				1333	switch (Op0.getType()) {
				1334	case MachineOperand::MO_Register:
				1335	return Op0.getReg() == Op1.getReg();
				1336	case MachineOperand::MO_Immediate:
				1337	return Op0.getImm() == Op1.getImm();
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1338	default:
				1339	llvm_unreachable("Didn't expect to be comparing these operand types");
				1340	}
				1341	}
				1342
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1343	bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
				1344	const MachineOperand &MO) const {
				1345	const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
				1346
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1347	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1348
				1349	if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
				1350	return true;
				1351
				1352	if (OpInfo.RegClass < 0)
				1353	return false;
				1354
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1355	unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
				1356	if (isLiteralConstant(MO, OpSize))
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1357	return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1358
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1359	return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1360	}
				1361
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1362	bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	1363	int Op32 = AMDGPU::getVOPe32(Opcode);
				1364	if (Op32 == -1)
				1365	return false;
				1366
				1367	return pseudoToMCOpcode(Op32) != -1;
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1368	}
				1369
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1370	bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
				1371	// The src0_modifier operand is present on all instructions
				1372	// that have modifiers.
				1373
				1374	return AMDGPU::getNamedOperandIdx(Opcode,
				1375	AMDGPU::OpName::src0_modifiers) != -1;
				1376	}
				1377
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	1378	bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
				1379	unsigned OpName) const {
				1380	const MachineOperand *Mods = getNamedOperand(MI, OpName);
				1381	return Mods && Mods->getImm();
				1382	}
				1383
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1384	bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1385	const MachineOperand &MO,
				1386	unsigned OpSize) const {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1387	// Literal constants use the constant bus.
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1388	if (isLiteralConstant(MO, OpSize))
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1389	return true;
				1390
				1391	if (!MO.isReg() \|\| !MO.isUse())
				1392	return false;
				1393
				1394	if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
				1395	return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
				1396
				1397	// FLAT_SCR is just an SGPR pair.
				1398	if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
				1399	return true;
				1400
				1401	// EXEC register uses the constant bus.
				1402	if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
				1403	return true;
				1404
				1405	// SGPRs use the constant bus
				1406	if (MO.getReg() == AMDGPU::M0 \|\| MO.getReg() == AMDGPU::VCC \|\|
				1407	(!MO.isImplicit() &&
				1408	(AMDGPU::SGPR_32RegClass.contains(MO.getReg()) \|\|
				1409	AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
				1410	return true;
				1411	}
				1412
				1413	return false;
				1414	}
				1415
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1416	static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
				1417	for (const MachineOperand &MO : MI.implicit_operands()) {
				1418	// We only care about reads.
				1419	if (MO.isDef())
				1420	continue;
				1421
				1422	switch (MO.getReg()) {
				1423	case AMDGPU::VCC:
				1424	case AMDGPU::M0:
				1425	case AMDGPU::FLAT_SCR:
				1426	return MO.getReg();
				1427
				1428	default:
				1429	break;
				1430	}
				1431	}
				1432
				1433	return AMDGPU::NoRegister;
				1434	}
				1435
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1436	bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
				1437	StringRef &ErrInfo) const {
				1438	uint16_t Opcode = MI->getOpcode();
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1439	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1440	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
				1441	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
				1442	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
				1443
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1444	// Make sure we don't have SCC live-ins to basic blocks. moveToVALU assumes
				1445	// all SCC users are in the same blocks as their defs.
				1446	const MachineBasicBlock *MBB = MI->getParent();
				1447	if (MI == &MBB->front()) {
				1448	if (MBB->isLiveIn(AMDGPU::SCC)) {
				1449	ErrInfo = "scc register cannot be live across blocks.";
				1450	return false;
				1451	}
				1452	}
				1453
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1454	// Make sure the number of operands is correct.
				1455	const MCInstrDesc &Desc = get(Opcode);
				1456	if (!Desc.isVariadic() &&
				1457	Desc.getNumOperands() != MI->getNumExplicitOperands()) {
				1458	ErrInfo = "Instruction has wrong number of operands.";
				1459	return false;
				1460	}
				1461
Changpeng Fang	c996393	2015-12-18 20:04:28 +0000	[diff] [blame]	1462	// Make sure the register classes are correct.
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1463	for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1464	if (MI->getOperand(i).isFPImm()) {
				1465	ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
				1466	"all fp values to integers.";
				1467	return false;
				1468	}
				1469
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1470	int RegClass = Desc.OpInfo[i].RegClass;
				1471
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1472	switch (Desc.OpInfo[i].OperandType) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1473	case MCOI::OPERAND_REGISTER:
Matt Arsenault	63bef0d	2015-02-13 02:47:22 +0000	[diff] [blame]	1474	if (MI->getOperand(i).isImm()) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1475	ErrInfo = "Illegal immediate value for operand.";
				1476	return false;
				1477	}
				1478	break;
				1479	case AMDGPU::OPERAND_REG_IMM32:
				1480	break;
				1481	case AMDGPU::OPERAND_REG_INLINE_C:
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1482	if (isLiteralConstant(MI->getOperand(i),
				1483	RI.getRegClass(RegClass)->getSize())) {
				1484	ErrInfo = "Illegal immediate value for operand.";
				1485	return false;
Tom Stellard	a305f93	2014-07-02 20:53:44 +0000	[diff] [blame]	1486	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1487	break;
				1488	case MCOI::OPERAND_IMMEDIATE:
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1489	// Check if this operand is an immediate.
				1490	// FrameIndex operands will be replaced by immediates, so they are
				1491	// allowed.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1492	if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1493	ErrInfo = "Expected immediate, but got non-immediate";
				1494	return false;
				1495	}
				1496	// Fall-through
				1497	default:
				1498	continue;
				1499	}
				1500
				1501	if (!MI->getOperand(i).isReg())
				1502	continue;
				1503
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1504	if (RegClass != -1) {
				1505	unsigned Reg = MI->getOperand(i).getReg();
				1506	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1507	continue;
				1508
				1509	const TargetRegisterClass *RC = RI.getRegClass(RegClass);
				1510	if (!RC->contains(Reg)) {
				1511	ErrInfo = "Operand has incorrect register class.";
				1512	return false;
				1513	}
				1514	}
				1515	}
				1516
				1517
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1518	// Verify VOP*
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1519	if (isVOP1(MI) \|\| isVOP2(MI) \|\| isVOP3(MI) \|\| isVOPC(MI)) {
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1520	// Only look at the true operands. Only a real operand can use the constant
				1521	// bus, and we don't want to check pseudo-operands like the source modifier
				1522	// flags.
				1523	const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
				1524
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1525	unsigned ConstantBusCount = 0;
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1526	unsigned SGPRUsed = findImplicitSGPRRead(*MI);
				1527	if (SGPRUsed != AMDGPU::NoRegister)
				1528	++ConstantBusCount;
				1529
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1530	for (int OpIdx : OpIndices) {
				1531	if (OpIdx == -1)
				1532	break;
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1533	const MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1534	if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1535	if (MO.isReg()) {
				1536	if (MO.getReg() != SGPRUsed)
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1537	++ConstantBusCount;
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1538	SGPRUsed = MO.getReg();
				1539	} else {
				1540	++ConstantBusCount;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1541	}
				1542	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1543	}
				1544	if (ConstantBusCount > 1) {
				1545	ErrInfo = "VOP* instruction uses the constant bus more than once";
				1546	return false;
				1547	}
				1548	}
				1549
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1550	// Verify misc. restrictions on specific instructions.
				1551	if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 \|\|
				1552	Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Matt Arsenault	262407b	2014-09-24 02:17:09 +0000	[diff] [blame]	1553	const MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1554	const MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1555	const MachineOperand &Src2 = MI->getOperand(Src2Idx);
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1556	if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
				1557	if (!compareMachineOp(Src0, Src1) &&
				1558	!compareMachineOp(Src0, Src2)) {
				1559	ErrInfo = "v_div_scale_{f32\|f64} require src0 = src1 or src2";
				1560	return false;
				1561	}
				1562	}
				1563	}
				1564
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	1565	// Make sure we aren't losing exec uses in the td files. This mostly requires
				1566	// being careful when using let Uses to try to add other use registers.
				1567	if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
				1568	const MachineOperand *Exec = MI->findRegisterUseOperand(AMDGPU::EXEC);
				1569	if (!Exec \|\| !Exec->isImplicit()) {
				1570	ErrInfo = "VALU instruction does not implicitly read exec mask";
				1571	return false;
				1572	}
				1573	}
				1574
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1575	return true;
				1576	}
				1577
Matt Arsenault	f14032a	2013-11-15 22:02:28 +0000	[diff] [blame]	1578	unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1579	switch (MI.getOpcode()) {
				1580	default: return AMDGPU::INSTRUCTION_LIST_END;
				1581	case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
				1582	case AMDGPU::COPY: return AMDGPU::COPY;
				1583	case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard	204e61b	2014-04-07 19:45:45 +0000	[diff] [blame]	1584	case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	1585	case AMDGPU::S_MOV_B32:
				1586	return MI.getOperand(1).isReg() ?
Tom Stellard	8c12fd9	2014-03-24 16:12:34 +0000	[diff] [blame]	1587	AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1588	case AMDGPU::S_ADD_I32:
				1589	case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1590	case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1591	case AMDGPU::S_SUB_I32:
				1592	case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1593	case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault	869cd07	2014-09-03 23:24:35 +0000	[diff] [blame]	1594	case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Matt Arsenault	8e2581b	2014-03-21 18:01:18 +0000	[diff] [blame]	1595	case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
				1596	case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
				1597	case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
				1598	case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
				1599	case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
				1600	case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
				1601	case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1602	case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
				1603	case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
				1604	case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
				1605	case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
				1606	case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
				1607	case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	1608	case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
				1609	case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	1610	case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
				1611	case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak	63a7b08	2015-03-24 13:40:21 +0000	[diff] [blame]	1612	case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault	43160e7	2014-06-18 17:13:57 +0000	[diff] [blame]	1613	case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault	2c33562	2014-04-09 07:16:16 +0000	[diff] [blame]	1614	case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	1615	case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	0cb92e1	2014-04-11 19:25:18 +0000	[diff] [blame]	1616	case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
				1617	case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
				1618	case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
				1619	case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
				1620	case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
				1621	case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1622	case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
				1623	case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
				1624	case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
				1625	case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
				1626	case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
				1627	case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	1628	case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault	295b86e	2014-06-17 17:36:27 +0000	[diff] [blame]	1629	case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault	8579601	2014-06-17 17:36:24 +0000	[diff] [blame]	1630	case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsak	d2af89d	2015-03-04 17:33:45 +0000	[diff] [blame]	1631	case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1632	case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
				1633	case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1634	}
				1635	}
				1636
				1637	bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
				1638	return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
				1639	}
				1640
				1641	const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
				1642	unsigned OpNo) const {
				1643	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				1644	const MCInstrDesc &Desc = get(MI.getOpcode());
				1645	if (MI.isVariadic() \|\| OpNo >= Desc.getNumOperands() \|\|
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1646	Desc.OpInfo[OpNo].RegClass == -1) {
				1647	unsigned Reg = MI.getOperand(OpNo).getReg();
				1648
				1649	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1650	return MRI.getRegClass(Reg);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1651	return RI.getPhysRegClass(Reg);
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1652	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1653
				1654	unsigned RCID = Desc.OpInfo[OpNo].RegClass;
				1655	return RI.getRegClass(RCID);
				1656	}
				1657
				1658	bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
				1659	switch (MI.getOpcode()) {
				1660	case AMDGPU::COPY:
				1661	case AMDGPU::REG_SEQUENCE:
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	1662	case AMDGPU::PHI:
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	1663	case AMDGPU::INSERT_SUBREG:
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1664	return RI.hasVGPRs(getOpRegClass(MI, 0));
				1665	default:
				1666	return RI.hasVGPRs(getOpRegClass(MI, OpNo));
				1667	}
				1668	}
				1669
				1670	void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
				1671	MachineBasicBlock::iterator I = MI;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1672	MachineBasicBlock *MBB = MI->getParent();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1673	MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1674	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1675	unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
				1676	const TargetRegisterClass *RC = RI.getRegClass(RCID);
				1677	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1678	if (MO.isReg())
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1679	Opcode = AMDGPU::COPY;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1680	else if (RI.isSGPRClass(RC))
Matt Arsenault	671a005	2013-11-14 10:08:50 +0000	[diff] [blame]	1681	Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1682
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1683
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1684	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1685	if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard	0c93c9e	2014-09-05 14:08:01 +0000	[diff] [blame]	1686	VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1687	else
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	1688	VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1689
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1690	unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1691	DebugLoc DL = MBB->findDebugLoc(I);
				1692	BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
				1693	.addOperand(MO);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1694	MO.ChangeToRegister(Reg, false);
				1695	}
				1696
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1697	unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
				1698	MachineRegisterInfo &MRI,
				1699	MachineOperand &SuperReg,
				1700	const TargetRegisterClass *SuperRC,
				1701	unsigned SubIdx,
				1702	const TargetRegisterClass *SubRC)
				1703	const {
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1704	MachineBasicBlock *MBB = MI->getParent();
				1705	DebugLoc DL = MI->getDebugLoc();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1706	unsigned SubReg = MRI.createVirtualRegister(SubRC);
				1707
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1708	if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
				1709	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1710	.addReg(SuperReg.getReg(), 0, SubIdx);
				1711	return SubReg;
				1712	}
				1713
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1714	// Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault	08d8494	2014-06-03 23:06:13 +0000	[diff] [blame]	1715	// value so we don't need to worry about merging its subreg index with the
				1716	// SubIdx passed to this function. The register coalescer should be able to
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1717	// eliminate this extra copy.
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1718	unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1719
Matt Arsenault	7480a0e	2014-11-17 21:11:37 +0000	[diff] [blame]	1720	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
				1721	.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
				1722
				1723	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1724	.addReg(NewSuperReg, 0, SubIdx);
				1725
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1726	return SubReg;
				1727	}
				1728
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	1729	MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
				1730	MachineBasicBlock::iterator MII,
				1731	MachineRegisterInfo &MRI,
				1732	MachineOperand &Op,
				1733	const TargetRegisterClass *SuperRC,
				1734	unsigned SubIdx,
				1735	const TargetRegisterClass *SubRC) const {
				1736	if (Op.isImm()) {
				1737	// XXX - Is there a better way to do this?
				1738	if (SubIdx == AMDGPU::sub0)
				1739	return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
				1740	if (SubIdx == AMDGPU::sub1)
				1741	return MachineOperand::CreateImm(Op.getImm() >> 32);
				1742
				1743	llvm_unreachable("Unhandled register index for immediate");
				1744	}
				1745
				1746	unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
				1747	SubIdx, SubRC);
				1748	return MachineOperand::CreateReg(SubReg, false);
				1749	}
				1750
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	1751	// Change the order of operands from (0, 1, 2) to (0, 2, 1)
				1752	void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
				1753	assert(Inst->getNumExplicitOperands() == 3);
				1754	MachineOperand Op1 = Inst->getOperand(1);
				1755	Inst->RemoveOperand(1);
				1756	Inst->addOperand(Op1);
				1757	}
				1758
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1759	bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
				1760	const MCOperandInfo &OpInfo,
				1761	const MachineOperand &MO) const {
				1762	if (!MO.isReg())
				1763	return false;
				1764
				1765	unsigned Reg = MO.getReg();
				1766	const TargetRegisterClass *RC =
				1767	TargetRegisterInfo::isVirtualRegister(Reg) ?
				1768	MRI.getRegClass(Reg) :
				1769	RI.getPhysRegClass(Reg);
				1770
Nicolai Haehnle	82fc962	2016-01-07 17:10:29 +0000	[diff] [blame]	1771	const SIRegisterInfo *TRI =
				1772	static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
				1773	RC = TRI->getSubRegClass(RC, MO.getSubReg());
				1774
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1775	// In order to be legal, the common sub-class must be equal to the
				1776	// class of the current operand. For example:
				1777	//
				1778	// v_mov_b32 s0 ; Operand defined as vsrc_32
				1779	// ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
				1780	//
				1781	// s_sendmsg 0, s0 ; Operand defined as m0reg
				1782	// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
				1783
				1784	return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
				1785	}
				1786
				1787	bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
				1788	const MCOperandInfo &OpInfo,
				1789	const MachineOperand &MO) const {
				1790	if (MO.isReg())
				1791	return isLegalRegOperand(MRI, OpInfo, MO);
				1792
				1793	// Handle non-register types that are treated like immediates.
				1794	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
				1795	return true;
				1796	}
				1797
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1798	bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
				1799	const MachineOperand *MO) const {
				1800	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1801	const MCInstrDesc &InstDesc = MI->getDesc();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1802	const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
				1803	const TargetRegisterClass *DefinedRC =
				1804	OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
				1805	if (!MO)
				1806	MO = &MI->getOperand(OpIdx);
				1807
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1808	if (isVALU(*MI) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1809	usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1810
				1811	RegSubRegPair SGPRUsed;
				1812	if (MO->isReg())
				1813	SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
				1814
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1815	for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
				1816	if (i == OpIdx)
				1817	continue;
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1818	const MachineOperand &Op = MI->getOperand(i);
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1819	if (Op.isReg() &&
				1820	(Op.getReg() != SGPRUsed.Reg \|\| Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1821	usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1822	return false;
				1823	}
				1824	}
				1825	}
				1826
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1827	if (MO->isReg()) {
				1828	assert(DefinedRC);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1829	return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1830	}
				1831
				1832
				1833	// Handle non-register types that are treated like immediates.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1834	assert(MO->isImm() \|\| MO->isTargetIndex() \|\| MO->isFI());
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1835
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	1836	if (!DefinedRC) {
				1837	// This operand expects an immediate.
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1838	return true;
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	1839	}
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1840
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1841	return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1842	}
				1843
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1844	void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
				1845	MachineInstr *MI) const {
				1846	unsigned Opc = MI->getOpcode();
				1847	const MCInstrDesc &InstrDesc = get(Opc);
				1848
				1849	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1850	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1851
				1852	// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
				1853	// we need to only have one constant bus use.
				1854	//
				1855	// Note we do not need to worry about literal constants here. They are
				1856	// disabled for the operand type for instructions because they will always
				1857	// violate the one constant bus use rule.
				1858	bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
				1859	if (HasImplicitSGPR) {
				1860	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1861	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1862
				1863	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
				1864	legalizeOpWithMove(MI, Src0Idx);
				1865	}
				1866
				1867	// VOP2 src0 instructions support all operand types, so we don't need to check
				1868	// their legality. If src1 is already legal, we don't need to do anything.
				1869	if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
				1870	return;
				1871
				1872	// We do not use commuteInstruction here because it is too aggressive and will
				1873	// commute if it is possible. We only want to commute here if it improves
				1874	// legality. This can be called a fairly large number of times so don't waste
				1875	// compile time pointlessly swapping and checking legality again.
				1876	if (HasImplicitSGPR \|\| !MI->isCommutable()) {
				1877	legalizeOpWithMove(MI, Src1Idx);
				1878	return;
				1879	}
				1880
				1881	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1882	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1883
				1884	// If src0 can be used as src1, commuting will make the operands legal.
				1885	// Otherwise we have to give up and insert a move.
				1886	//
				1887	// TODO: Other immediate-like operand kinds could be commuted if there was a
				1888	// MachineOperand::ChangeTo* for them.
				1889	if ((!Src1.isImm() && !Src1.isReg()) \|\|
				1890	!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
				1891	legalizeOpWithMove(MI, Src1Idx);
				1892	return;
				1893	}
				1894
				1895	int CommutedOpc = commuteOpcode(*MI);
				1896	if (CommutedOpc == -1) {
				1897	legalizeOpWithMove(MI, Src1Idx);
				1898	return;
				1899	}
				1900
				1901	MI->setDesc(get(CommutedOpc));
				1902
				1903	unsigned Src0Reg = Src0.getReg();
				1904	unsigned Src0SubReg = Src0.getSubReg();
				1905	bool Src0Kill = Src0.isKill();
				1906
				1907	if (Src1.isImm())
				1908	Src0.ChangeToImmediate(Src1.getImm());
				1909	else if (Src1.isReg()) {
				1910	Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
				1911	Src0.setSubReg(Src1.getSubReg());
				1912	} else
				1913	llvm_unreachable("Should only have register or immediate operands");
				1914
				1915	Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
				1916	Src1.setSubReg(Src0SubReg);
				1917	}
				1918
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	1919	// Legalize VOP3 operands. Because all operand types are supported for any
				1920	// operand, and since literal constants are not allowed and should never be
				1921	// seen, we only need to worry about inserting copies if we use multiple SGPR
				1922	// operands.
				1923	void SIInstrInfo::legalizeOperandsVOP3(
				1924	MachineRegisterInfo &MRI,
				1925	MachineInstr *MI) const {
				1926	unsigned Opc = MI->getOpcode();
				1927
				1928	int VOP3Idx[3] = {
				1929	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
				1930	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
				1931	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
				1932	};
				1933
				1934	// Find the one SGPR operand we are allowed to use.
				1935	unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
				1936
				1937	for (unsigned i = 0; i < 3; ++i) {
				1938	int Idx = VOP3Idx[i];
				1939	if (Idx == -1)
				1940	break;
				1941	MachineOperand &MO = MI->getOperand(Idx);
				1942
				1943	// We should never see a VOP3 instruction with an illegal immediate operand.
				1944	if (!MO.isReg())
				1945	continue;
				1946
				1947	if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
				1948	continue; // VGPRs are legal
				1949
				1950	if (SGPRReg == AMDGPU::NoRegister \|\| SGPRReg == MO.getReg()) {
				1951	SGPRReg = MO.getReg();
				1952	// We can use one SGPR in each VOP3 instruction.
				1953	continue;
				1954	}
				1955
				1956	// If we make it this far, then the operand is not legal and we must
				1957	// legalize it.
				1958	legalizeOpWithMove(MI, Idx);
				1959	}
				1960	}
				1961
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	1962	unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
				1963	MachineRegisterInfo &MRI) const {
				1964	const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
				1965	const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
				1966	unsigned DstReg = MRI.createVirtualRegister(SRC);
				1967	unsigned SubRegs = VRC->getSize() / 4;
				1968
				1969	SmallVector<unsigned, 8> SRegs;
				1970	for (unsigned i = 0; i < SubRegs; ++i) {
				1971	unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				1972	BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
				1973	get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
				1974	.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
				1975	SRegs.push_back(SGPR);
				1976	}
				1977
				1978	MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI,
				1979	UseMI->getDebugLoc(),
				1980	get(AMDGPU::REG_SEQUENCE), DstReg);
				1981	for (unsigned i = 0; i < SubRegs; ++i) {
				1982	MIB.addReg(SRegs[i]);
				1983	MIB.addImm(RI.getSubRegFromChannel(i));
				1984	}
				1985	return DstReg;
				1986	}
				1987
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	1988	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
				1989	MachineInstr *MI) const {
				1990
				1991	// If the pointer is store in VGPRs, then we need to move them to
				1992	// SGPRs using v_readfirstlane. This is safe because we only select
				1993	// loads with uniform pointers to SMRD instruction so we know the
				1994	// pointer value is uniform.
				1995	MachineOperand SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
				1996	if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
				1997	unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
				1998	SBase->setReg(SGPR);
				1999	}
				2000	}
				2001
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2002	void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
				2003	MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2004
				2005	// Legalize VOP2
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2006	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	2007	legalizeOperandsVOP2(MRI, MI);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2008	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2009	}
				2010
				2011	// Legalize VOP3
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	2012	if (isVOP3(*MI)) {
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	2013	legalizeOperandsVOP3(MRI, MI);
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2014	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2015	}
				2016
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	2017	// Legalize SMRD
				2018	if (isSMRD(*MI)) {
				2019	legalizeOperandsSMRD(MRI, MI);
				2020	return;
				2021	}
				2022
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2023	// Legalize REG_SEQUENCE and PHI
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2024	// The register class of the operands much be the same type as the register
				2025	// class of the output.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2026	if (MI->getOpcode() == AMDGPU::PHI) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	2027	const TargetRegisterClass RC = nullptr, SRC = nullptr, *VRC = nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2028	for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
				2029	if (!MI->getOperand(i).isReg() \|\|
				2030	!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
				2031	continue;
				2032	const TargetRegisterClass *OpRC =
				2033	MRI.getRegClass(MI->getOperand(i).getReg());
				2034	if (RI.hasVGPRs(OpRC)) {
				2035	VRC = OpRC;
				2036	} else {
				2037	SRC = OpRC;
				2038	}
				2039	}
				2040
				2041	// If any of the operands are VGPR registers, then they all most be
				2042	// otherwise we will create illegal VGPR->SGPR copies when legalizing
				2043	// them.
				2044	if (VRC \|\| !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
				2045	if (!VRC) {
				2046	assert(SRC);
				2047	VRC = RI.getEquivalentVGPRClass(SRC);
				2048	}
				2049	RC = VRC;
				2050	} else {
				2051	RC = SRC;
				2052	}
				2053
				2054	// Update all the operands so they have the same type.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2055	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2056	MachineOperand &Op = MI->getOperand(I);
				2057	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2058	continue;
				2059	unsigned DstReg = MRI.createVirtualRegister(RC);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2060
				2061	// MI is a PHI instruction.
				2062	MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
				2063	MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
				2064
				2065	BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2066	.addOperand(Op);
				2067	Op.setReg(DstReg);
				2068	}
				2069	}
				2070
				2071	// REG_SEQUENCE doesn't really require operand legalization, but if one has a
				2072	// VGPR dest type and SGPR sources, insert copies so all operands are
				2073	// VGPRs. This seems to help operand folding / the register coalescer.
				2074	if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
				2075	MachineBasicBlock *MBB = MI->getParent();
				2076	const TargetRegisterClass DstRC = getOpRegClass(MI, 0);
				2077	if (RI.hasVGPRs(DstRC)) {
				2078	// Update all the operands so they are VGPR register classes. These may
				2079	// not be the same register class because REG_SEQUENCE supports mixing
				2080	// subregister index types e.g. sub0_sub1 + sub2 + sub3
				2081	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2082	MachineOperand &Op = MI->getOperand(I);
				2083	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
				2084	continue;
				2085
				2086	const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
				2087	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
				2088	if (VRC == OpRC)
				2089	continue;
				2090
				2091	unsigned DstReg = MRI.createVirtualRegister(VRC);
				2092
				2093	BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2094	.addOperand(Op);
				2095
				2096	Op.setReg(DstReg);
				2097	Op.setIsKill();
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2098	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2099	}
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2100
				2101	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2102	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2103
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	2104	// Legalize INSERT_SUBREG
				2105	// src0 must have the same register class as dst
				2106	if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
				2107	unsigned Dst = MI->getOperand(0).getReg();
				2108	unsigned Src0 = MI->getOperand(1).getReg();
				2109	const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
				2110	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
				2111	if (DstRC != Src0RC) {
				2112	MachineBasicBlock &MBB = *MI->getParent();
				2113	unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
				2114	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
				2115	.addReg(Src0);
				2116	MI->getOperand(1).setReg(NewSrc0);
				2117	}
				2118	return;
				2119	}
				2120
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	2121	// Legalize MIMG
				2122	if (isMIMG(*MI)) {
				2123	MachineOperand SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
				2124	if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
				2125	unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
				2126	SRsrc->setReg(SGPR);
				2127	}
				2128
				2129	MachineOperand SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
				2130	if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
				2131	unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
				2132	SSamp->setReg(SGPR);
				2133	}
				2134	return;
				2135	}
				2136
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2137	// Legalize MUBUF* instructions
				2138	// FIXME: If we start using the non-addr64 instructions for compute, we
				2139	// may need to legalize them here.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2140	int SRsrcIdx =
				2141	AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
				2142	if (SRsrcIdx != -1) {
				2143	// We have an MUBUF instruction
				2144	MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
				2145	unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
				2146	if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
				2147	RI.getRegClass(SRsrcRC))) {
				2148	// The operands are legal.
				2149	// FIXME: We may need to legalize operands besided srsrc.
				2150	return;
				2151	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2152
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2153	MachineBasicBlock &MBB = *MI->getParent();
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2154
Eric Christopher	572e03a	2015-06-19 01:53:21 +0000	[diff] [blame]	2155	// Extract the ptr from the resource descriptor.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2156	unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
				2157	&AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2158
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2159	// Create an empty resource descriptor
				2160	unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				2161	unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2162	unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2163	unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2164	uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2165
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2166	// Zero64 = 0
				2167	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
				2168	Zero64)
				2169	.addImm(0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2170
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2171	// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
				2172	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2173	SRsrcFormatLo)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2174	.addImm(RsrcDataFormat & 0xFFFFFFFF);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2175
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2176	// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
				2177	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2178	SRsrcFormatHi)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2179	.addImm(RsrcDataFormat >> 32);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2180
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2181	// NewSRsrc = {Zero64, SRsrcFormat}
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2182	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
				2183	.addReg(Zero64)
				2184	.addImm(AMDGPU::sub0_sub1)
				2185	.addReg(SRsrcFormatLo)
				2186	.addImm(AMDGPU::sub2)
				2187	.addReg(SRsrcFormatHi)
				2188	.addImm(AMDGPU::sub3);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2189
				2190	MachineOperand VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
				2191	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2192	if (VAddr) {
				2193	// This is already an ADDR64 instruction so we need to add the pointer
				2194	// extracted from the resource descriptor to the current value of VAddr.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2195	unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2196	unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2197
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2198	// NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2199	DebugLoc DL = MI->getDebugLoc();
				2200	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2201	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2202	.addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2203
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2204	// NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2205	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2206	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2207	.addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2208
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2209	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2210	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2211	.addReg(NewVAddrLo)
				2212	.addImm(AMDGPU::sub0)
				2213	.addReg(NewVAddrHi)
				2214	.addImm(AMDGPU::sub1);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2215	} else {
				2216	// This instructions is the _OFFSET variant, so we need to convert it to
				2217	// ADDR64.
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2218	assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
				2219	< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
				2220	"FIXME: Need to emit flat atomics here");
				2221
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2222	MachineOperand VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
				2223	MachineOperand Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				2224	MachineOperand SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2225	unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2226
				2227	// Atomics rith return have have an additional tied operand and are
				2228	// missing some of the special bits.
				2229	MachineOperand VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
				2230	MachineInstr *Addr64;
				2231
				2232	if (!VDataIn) {
				2233	// Regular buffer load / store.
				2234	MachineInstrBuilder MIB
				2235	= BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2236	.addOperand(*VData)
				2237	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2238	// This will be replaced later
				2239	// with the new value of vaddr.
				2240	.addOperand(*SRsrc)
				2241	.addOperand(*SOffset)
				2242	.addOperand(*Offset);
				2243
				2244	// Atomics do not have this operand.
				2245	if (const MachineOperand *GLC
				2246	= getNamedOperand(*MI, AMDGPU::OpName::glc)) {
				2247	MIB.addImm(GLC->getImm());
				2248	}
				2249
				2250	MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
				2251
				2252	if (const MachineOperand *TFE
				2253	= getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
				2254	MIB.addImm(TFE->getImm());
				2255	}
				2256
				2257	MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2258	Addr64 = MIB;
				2259	} else {
				2260	// Atomics with return.
				2261	Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2262	.addOperand(*VData)
				2263	.addOperand(*VDataIn)
				2264	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2265	// This will be replaced later
				2266	// with the new value of vaddr.
				2267	.addOperand(*SRsrc)
				2268	.addOperand(*SOffset)
				2269	.addOperand(*Offset)
				2270	.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
				2271	.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2272	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2273
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2274	MI->removeFromParent();
				2275	MI = Addr64;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2276
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2277	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2278	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2279	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
				2280	.addImm(AMDGPU::sub0)
				2281	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
				2282	.addImm(AMDGPU::sub1);
				2283
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2284	VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
				2285	SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2286	}
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2287
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2288	// Update the instruction to use NewVaddr
				2289	VAddr->setReg(NewVAddr);
				2290	// Update the instruction to use NewSRsrc
				2291	SRsrc->setReg(NewSRsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2292	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2293	}
				2294
				2295	void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
				2296	SmallVector<MachineInstr *, 128> Worklist;
				2297	Worklist.push_back(&TopInst);
				2298
				2299	while (!Worklist.empty()) {
				2300	MachineInstr *Inst = Worklist.pop_back_val();
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2301	MachineBasicBlock *MBB = Inst->getParent();
				2302	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				2303
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2304	unsigned Opcode = Inst->getOpcode();
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2305	unsigned NewOpcode = getVALUOp(*Inst);
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2306
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2307	// Handle some special cases
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2308	switch (Opcode) {
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2309	default:
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2310	break;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2311	case AMDGPU::S_AND_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2312	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2313	Inst->eraseFromParent();
				2314	continue;
				2315
				2316	case AMDGPU::S_OR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2317	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2318	Inst->eraseFromParent();
				2319	continue;
				2320
				2321	case AMDGPU::S_XOR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2322	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2323	Inst->eraseFromParent();
				2324	continue;
				2325
				2326	case AMDGPU::S_NOT_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2327	splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2328	Inst->eraseFromParent();
				2329	continue;
				2330
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2331	case AMDGPU::S_BCNT1_I32_B64:
				2332	splitScalar64BitBCNT(Worklist, Inst);
				2333	Inst->eraseFromParent();
				2334	continue;
				2335
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2336	case AMDGPU::S_BFE_I64: {
				2337	splitScalar64BitBFE(Worklist, Inst);
				2338	Inst->eraseFromParent();
				2339	continue;
				2340	}
				2341
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2342	case AMDGPU::S_LSHL_B32:
				2343	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2344	NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
				2345	swapOperands(Inst);
				2346	}
				2347	break;
				2348	case AMDGPU::S_ASHR_I32:
				2349	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2350	NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
				2351	swapOperands(Inst);
				2352	}
				2353	break;
				2354	case AMDGPU::S_LSHR_B32:
				2355	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2356	NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
				2357	swapOperands(Inst);
				2358	}
				2359	break;
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	2360	case AMDGPU::S_LSHL_B64:
				2361	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2362	NewOpcode = AMDGPU::V_LSHLREV_B64;
				2363	swapOperands(Inst);
				2364	}
				2365	break;
				2366	case AMDGPU::S_ASHR_I64:
				2367	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2368	NewOpcode = AMDGPU::V_ASHRREV_I64;
				2369	swapOperands(Inst);
				2370	}
				2371	break;
				2372	case AMDGPU::S_LSHR_B64:
				2373	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2374	NewOpcode = AMDGPU::V_LSHRREV_B64;
				2375	swapOperands(Inst);
				2376	}
				2377	break;
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2378
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2379	case AMDGPU::S_ABS_I32:
				2380	lowerScalarAbs(Worklist, Inst);
				2381	Inst->eraseFromParent();
				2382	continue;
				2383
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2384	case AMDGPU::S_CBRANCH_SCC0:
				2385	case AMDGPU::S_CBRANCH_SCC1:
				2386	// Clear unused bits of vcc
				2387	BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC)
				2388	.addReg(AMDGPU::EXEC)
				2389	.addReg(AMDGPU::VCC);
				2390	break;
				2391
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2392	case AMDGPU::S_BFE_U64:
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2393	case AMDGPU::S_BFM_B64:
				2394	llvm_unreachable("Moving this op to VALU not implemented");
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2395	}
				2396
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2397	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
				2398	// We cannot move this instruction to the VALU, so we should try to
				2399	// legalize its operands instead.
				2400	legalizeOperands(Inst);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2401	continue;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2402	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2403
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2404	// Use the new VALU Opcode.
				2405	const MCInstrDesc &NewDesc = get(NewOpcode);
				2406	Inst->setDesc(NewDesc);
				2407
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2408	// Remove any references to SCC. Vector instructions can't read from it, and
				2409	// We're just about to add the implicit use / defs of VCC, and we don't want
				2410	// both.
				2411	for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
				2412	MachineOperand &Op = Inst->getOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2413	if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2414	Inst->RemoveOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2415	addSCCDefUsersToVALUWorklist(Inst, Worklist);
				2416	}
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2417	}
				2418
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2419	if (Opcode == AMDGPU::S_SEXT_I32_I8 \|\| Opcode == AMDGPU::S_SEXT_I32_I16) {
				2420	// We are converting these to a BFE, so we need to add the missing
				2421	// operands for the size and offset.
				2422	unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
				2423	Inst->addOperand(MachineOperand::CreateImm(0));
				2424	Inst->addOperand(MachineOperand::CreateImm(Size));
				2425
Matt Arsenault	b5b5110	2014-06-10 19:18:21 +0000	[diff] [blame]	2426	} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
				2427	// The VALU version adds the second operand to the result, so insert an
				2428	// extra 0 operand.
				2429	Inst->addOperand(MachineOperand::CreateImm(0));
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2430	}
				2431
Alex Lorenz	b4d0d6a	2015-07-31 23:30:09 +0000	[diff] [blame]	2432	Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2433
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2434	if (Opcode == AMDGPU::S_BFE_I32 \|\| Opcode == AMDGPU::S_BFE_U32) {
				2435	const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
				2436	// If we need to move this to VGPRs, we need to unpack the second operand
				2437	// back into the 2 separate ones for bit offset and width.
				2438	assert(OffsetWidthOp.isImm() &&
				2439	"Scalar BFE is only implemented for constant width and offset");
				2440	uint32_t Imm = OffsetWidthOp.getImm();
				2441
				2442	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2443	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2444	Inst->RemoveOperand(2); // Remove old immediate.
				2445	Inst->addOperand(MachineOperand::CreateImm(Offset));
Vincent Lejeune	94af31f	2014-05-10 19:18:33 +0000	[diff] [blame]	2446	Inst->addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2447	}
				2448
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2449	bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef();
				2450	unsigned NewDstReg = AMDGPU::NoRegister;
				2451	if (HasDst) {
				2452	// Update the destination register class.
				2453	const TargetRegisterClass NewDstRC = getDestEquivalentVGPRClass(Inst);
				2454	if (!NewDstRC)
				2455	continue;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2456
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2457	unsigned DstReg = Inst->getOperand(0).getReg();
				2458	NewDstReg = MRI.createVirtualRegister(NewDstRC);
				2459	MRI.replaceRegWith(DstReg, NewDstReg);
				2460	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2461
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	2462	// Legalize the operands
				2463	legalizeOperands(Inst);
				2464
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2465	if (HasDst)
				2466	addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2467	}
				2468	}
				2469
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2470	//===----------------------------------------------------------------------===//
				2471	// Indirect addressing callbacks
				2472	//===----------------------------------------------------------------------===//
				2473
Tom Stellard	26a3b67	2013-10-22 18:19:10 +0000	[diff] [blame]	2474	const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	2475	return &AMDGPU::VGPR_32RegClass;
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2476	}
				2477
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2478	void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
				2479	MachineInstr *Inst) const {
				2480	MachineBasicBlock &MBB = *Inst->getParent();
				2481	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2482	MachineBasicBlock::iterator MII = Inst;
				2483	DebugLoc DL = Inst->getDebugLoc();
				2484
				2485	MachineOperand &Dest = Inst->getOperand(0);
				2486	MachineOperand &Src = Inst->getOperand(1);
				2487	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2488	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2489
				2490	BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
				2491	.addImm(0)
				2492	.addReg(Src.getReg());
				2493
				2494	BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
				2495	.addReg(Src.getReg())
				2496	.addReg(TmpReg);
				2497
				2498	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2499	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				2500	}
				2501
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2502	void SIInstrInfo::splitScalar64BitUnaryOp(
				2503	SmallVectorImpl<MachineInstr *> &Worklist,
				2504	MachineInstr *Inst,
				2505	unsigned Opcode) const {
				2506	MachineBasicBlock &MBB = *Inst->getParent();
				2507	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2508
				2509	MachineOperand &Dest = Inst->getOperand(0);
				2510	MachineOperand &Src0 = Inst->getOperand(1);
				2511	DebugLoc DL = Inst->getDebugLoc();
				2512
				2513	MachineBasicBlock::iterator MII = Inst;
				2514
				2515	const MCInstrDesc &InstDesc = get(Opcode);
				2516	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2517	MRI.getRegClass(Src0.getReg()) :
				2518	&AMDGPU::SGPR_32RegClass;
				2519
				2520	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2521
				2522	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2523	AMDGPU::sub0, Src0SubRC);
				2524
				2525	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2526	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2527	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2528
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2529	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
				2530	BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2531	.addOperand(SrcReg0Sub0);
				2532
				2533	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2534	AMDGPU::sub1, Src0SubRC);
				2535
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2536	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
				2537	BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2538	.addOperand(SrcReg0Sub1);
				2539
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2540	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2541	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2542	.addReg(DestSub0)
				2543	.addImm(AMDGPU::sub0)
				2544	.addReg(DestSub1)
				2545	.addImm(AMDGPU::sub1);
				2546
				2547	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2548
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2549	// We don't need to legalizeOperands here because for a single operand, src0
				2550	// will support any kind of input.
				2551
				2552	// Move all users of this moved value.
				2553	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2554	}
				2555
				2556	void SIInstrInfo::splitScalar64BitBinaryOp(
				2557	SmallVectorImpl<MachineInstr *> &Worklist,
				2558	MachineInstr *Inst,
				2559	unsigned Opcode) const {
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2560	MachineBasicBlock &MBB = *Inst->getParent();
				2561	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2562
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2563	MachineOperand &Dest = Inst->getOperand(0);
				2564	MachineOperand &Src0 = Inst->getOperand(1);
				2565	MachineOperand &Src1 = Inst->getOperand(2);
				2566	DebugLoc DL = Inst->getDebugLoc();
				2567
				2568	MachineBasicBlock::iterator MII = Inst;
				2569
				2570	const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2571	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2572	MRI.getRegClass(Src0.getReg()) :
				2573	&AMDGPU::SGPR_32RegClass;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2574
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2575	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2576	const TargetRegisterClass *Src1RC = Src1.isReg() ?
				2577	MRI.getRegClass(Src1.getReg()) :
				2578	&AMDGPU::SGPR_32RegClass;
				2579
				2580	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				2581
				2582	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2583	AMDGPU::sub0, Src0SubRC);
				2584	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2585	AMDGPU::sub0, Src1SubRC);
				2586
				2587	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2588	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2589	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2590
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2591	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2592	MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2593	.addOperand(SrcReg0Sub0)
				2594	.addOperand(SrcReg1Sub0);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2595
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2596	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2597	AMDGPU::sub1, Src0SubRC);
				2598	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2599	AMDGPU::sub1, Src1SubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2600
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2601	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2602	MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2603	.addOperand(SrcReg0Sub1)
				2604	.addOperand(SrcReg1Sub1);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2605
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2606	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2607	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2608	.addReg(DestSub0)
				2609	.addImm(AMDGPU::sub0)
				2610	.addReg(DestSub1)
				2611	.addImm(AMDGPU::sub1);
				2612
				2613	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2614
				2615	// Try to legalize the operands in case we need to swap the order to keep it
				2616	// valid.
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2617	legalizeOperands(LoHalf);
				2618	legalizeOperands(HiHalf);
				2619
				2620	// Move all users of this moved vlaue.
				2621	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2622	}
				2623
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2624	void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
				2625	MachineInstr *Inst) const {
				2626	MachineBasicBlock &MBB = *Inst->getParent();
				2627	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2628
				2629	MachineBasicBlock::iterator MII = Inst;
				2630	DebugLoc DL = Inst->getDebugLoc();
				2631
				2632	MachineOperand &Dest = Inst->getOperand(0);
				2633	MachineOperand &Src = Inst->getOperand(1);
				2634
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	2635	const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2636	const TargetRegisterClass *SrcRC = Src.isReg() ?
				2637	MRI.getRegClass(Src.getReg()) :
				2638	&AMDGPU::SGPR_32RegClass;
				2639
				2640	unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2641	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2642
				2643	const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
				2644
				2645	MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2646	AMDGPU::sub0, SrcSubRC);
				2647	MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2648	AMDGPU::sub1, SrcSubRC);
				2649
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2650	BuildMI(MBB, MII, DL, InstDesc, MidReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2651	.addOperand(SrcRegSub0)
				2652	.addImm(0);
				2653
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2654	BuildMI(MBB, MII, DL, InstDesc, ResultReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2655	.addOperand(SrcRegSub1)
				2656	.addReg(MidReg);
				2657
				2658	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2659
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2660	// We don't need to legalize operands here. src0 for etiher instruction can be
				2661	// an SGPR, and the second input is unused or determined here.
				2662	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2663	}
				2664
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2665	void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
				2666	MachineInstr *Inst) const {
				2667	MachineBasicBlock &MBB = *Inst->getParent();
				2668	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2669	MachineBasicBlock::iterator MII = Inst;
				2670	DebugLoc DL = Inst->getDebugLoc();
				2671
				2672	MachineOperand &Dest = Inst->getOperand(0);
				2673	uint32_t Imm = Inst->getOperand(2).getImm();
				2674	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2675	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
				2676
Matt Arsenault	6ad3426	2014-11-14 18:40:49 +0000	[diff] [blame]	2677	(void) Offset;
				2678
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2679	// Only sext_inreg cases handled.
				2680	assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
				2681	BitWidth <= 32 &&
				2682	Offset == 0 &&
				2683	"Not implemented");
				2684
				2685	if (BitWidth < 32) {
				2686	unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2687	unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2688	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2689
				2690	BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
				2691	.addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
				2692	.addImm(0)
				2693	.addImm(BitWidth);
				2694
				2695	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
				2696	.addImm(31)
				2697	.addReg(MidRegLo);
				2698
				2699	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2700	.addReg(MidRegLo)
				2701	.addImm(AMDGPU::sub0)
				2702	.addReg(MidRegHi)
				2703	.addImm(AMDGPU::sub1);
				2704
				2705	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2706	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2707	return;
				2708	}
				2709
				2710	MachineOperand &Src = Inst->getOperand(1);
				2711	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2712	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2713
				2714	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
				2715	.addImm(31)
				2716	.addReg(Src.getReg(), 0, AMDGPU::sub0);
				2717
				2718	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2719	.addReg(Src.getReg(), 0, AMDGPU::sub0)
				2720	.addImm(AMDGPU::sub0)
				2721	.addReg(TmpReg)
				2722	.addImm(AMDGPU::sub1);
				2723
				2724	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2725	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2726	}
				2727
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2728	void SIInstrInfo::addUsersToMoveToVALUWorklist(
				2729	unsigned DstReg,
				2730	MachineRegisterInfo &MRI,
				2731	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2732	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
				2733	E = MRI.use_end(); I != E; ++I) {
				2734	MachineInstr &UseMI = *I->getParent();
				2735	if (!canReadVGPR(UseMI, I.getOperandNo())) {
				2736	Worklist.push_back(&UseMI);
				2737	}
				2738	}
				2739	}
				2740
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2741	void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst,
				2742	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2743	// This assumes that all the users of SCC are in the same block
				2744	// as the SCC def.
				2745	for (MachineBasicBlock::iterator I = SCCDefInst,
				2746	E = SCCDefInst->getParent()->end(); I != E; ++I) {
				2747
				2748	// Exit if we find another SCC def.
				2749	if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
				2750	return;
				2751
				2752	if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
				2753	Worklist.push_back(I);
				2754	}
				2755	}
				2756
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	2757	const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
				2758	const MachineInstr &Inst) const {
				2759	const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
				2760
				2761	switch (Inst.getOpcode()) {
				2762	// For target instructions, getOpRegClass just returns the virtual register
				2763	// class associated with the operand, so we need to find an equivalent VGPR
				2764	// register class in order to move the instruction to the VALU.
				2765	case AMDGPU::COPY:
				2766	case AMDGPU::PHI:
				2767	case AMDGPU::REG_SEQUENCE:
				2768	case AMDGPU::INSERT_SUBREG:
				2769	if (RI.hasVGPRs(NewDstRC))
				2770	return nullptr;
				2771
				2772	NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
				2773	if (!NewDstRC)
				2774	return nullptr;
				2775	return NewDstRC;
				2776	default:
				2777	return NewDstRC;
				2778	}
				2779	}
				2780
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2781	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2782	unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
				2783	int OpIndices[3]) const {
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2784	const MCInstrDesc &Desc = MI->getDesc();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2785
				2786	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2787	//
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2788	// First we need to consider the instruction's operand requirements before
				2789	// legalizing. Some operands are required to be SGPRs, such as implicit uses
				2790	// of VCC, but we are still bound by the constant bus requirement to only use
				2791	// one.
				2792	//
				2793	// If the operand's class is an SGPR, we can never move it.
				2794
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2795	unsigned SGPRReg = findImplicitSGPRRead(*MI);
				2796	if (SGPRReg != AMDGPU::NoRegister)
				2797	return SGPRReg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2798
				2799	unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
				2800	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				2801
				2802	for (unsigned i = 0; i < 3; ++i) {
				2803	int Idx = OpIndices[i];
				2804	if (Idx == -1)
				2805	break;
				2806
				2807	const MachineOperand &MO = MI->getOperand(Idx);
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2808	if (!MO.isReg())
				2809	continue;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2810
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2811	// Is this operand statically required to be an SGPR based on the operand
				2812	// constraints?
				2813	const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
				2814	bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
				2815	if (IsRequiredSGPR)
				2816	return MO.getReg();
				2817
				2818	// If this could be a VGPR or an SGPR, Check the dynamic register class.
				2819	unsigned Reg = MO.getReg();
				2820	const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
				2821	if (RI.isSGPRClass(RegRC))
				2822	UsedSGPRs[i] = Reg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2823	}
				2824
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2825	// We don't have a required SGPR operand, so we have a bit more freedom in
				2826	// selecting operands to move.
				2827
				2828	// Try to select the most used SGPR. If an SGPR is equal to one of the
				2829	// others, we choose that.
				2830	//
				2831	// e.g.
				2832	// V_FMA_F32 v0, s0, s0, s0 -> No moves
				2833	// V_FMA_F32 v0, s0, s1, s0 -> Move s1
				2834
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2835	// TODO: If some of the operands are 64-bit SGPRs and some 32, we should
				2836	// prefer those.
				2837
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2838	if (UsedSGPRs[0] != AMDGPU::NoRegister) {
				2839	if (UsedSGPRs[0] == UsedSGPRs[1] \|\| UsedSGPRs[0] == UsedSGPRs[2])
				2840	SGPRReg = UsedSGPRs[0];
				2841	}
				2842
				2843	if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
				2844	if (UsedSGPRs[1] == UsedSGPRs[2])
				2845	SGPRReg = UsedSGPRs[1];
				2846	}
				2847
				2848	return SGPRReg;
				2849	}
				2850
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2851	void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
				2852	const MachineFunction &MF) const {
				2853	int End = getIndirectIndexEnd(MF);
				2854	int Begin = getIndirectIndexBegin(MF);
				2855
				2856	if (End == -1)
				2857	return;
				2858
				2859
				2860	for (int Index = Begin; Index <= End; ++Index)
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	2861	Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2862
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2863	for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2864	Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
				2865
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2866	for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2867	Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
				2868
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2869	for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2870	Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
				2871
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2872	for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2873	Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
				2874
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2875	for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2876	Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2877	}
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	2878
Tom Stellard	6407e1e	2014-08-01 00:32:33 +0000	[diff] [blame]	2879	MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	2880	unsigned OperandName) const {
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	2881	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
				2882	if (Idx == -1)
				2883	return nullptr;
				2884
				2885	return &MI.getOperand(Idx);
				2886	}
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2887
				2888	uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
				2889	uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	2890	if (ST.isAmdHsaOS()) {
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2891	RsrcDataFormat \|= (1ULL << 56);
				2892
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	2893	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				2894	// Set MTYPE = 2
				2895	RsrcDataFormat \|= (2ULL << 59);
				2896	}
				2897
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2898	return RsrcDataFormat;
				2899	}
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	2900
				2901	uint64_t SIInstrInfo::getScratchRsrcWords23() const {
				2902	uint64_t Rsrc23 = getDefaultRsrcDataFormat() \|
				2903	AMDGPU::RSRC_TID_ENABLE \|
				2904	0xffffffff; // Size;
				2905
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	2906	uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
				2907
				2908	Rsrc23 \|= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
				2909
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	2910	// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
				2911	// Clear them unless we want a huge stride.
				2912	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				2913	Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
				2914
				2915	return Rsrc23;
				2916	}
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	2917
				2918	bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
				2919	unsigned Opc = MI->getOpcode();
				2920
				2921	return isSMRD(Opc);
				2922	}
				2923
				2924	bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
				2925	unsigned Opc = MI->getOpcode();
				2926
				2927	return isMUBUF(Opc) \|\| isMTBUF(Opc) \|\| isMIMG(Opc);
				2928	}
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	2929
				2930	ArrayRef<std::pair<int, const char *>>
				2931	SIInstrInfo::getSerializableTargetIndices() const {
				2932	static const std::pair<int, const char *> TargetIndices[] = {
				2933	{AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
				2934	{AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
				2935	{AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
				2936	{AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
				2937	{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
				2938	return makeArrayRef(TargetIndices);
				2939	}