Blame - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp - toolchain/llvm-project

blob: 5854c110c128b70a6c217e3efd8b24ca5b471e77 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief SI Implementation of TargetInstrInfo.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "SIInstrInfo.h"
				17	#include "AMDGPUTargetMachine.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	18	#include "GCNHazardRecognizer.h"
Tom Stellard	16a9a20	2013-08-14 23:24:17 +0000	[diff] [blame]	19	#include "SIDefines.h"
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	20	#include "SIMachineFunctionInfo.h"
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	21	#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	22	#include "llvm/CodeGen/MachineInstrBuilder.h"
				23	#include "llvm/CodeGen/MachineRegisterInfo.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	24	#include "llvm/CodeGen/ScheduleDAG.h"
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	25	#include "llvm/IR/Function.h"
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	26	#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	27	#include "llvm/MC/MCInstrDesc.h"
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	28	#include "llvm/Support/Debug.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	29
				30	using namespace llvm;
				31
Tom Stellard	2e59a45	2014-06-13 01:32:00 +0000	[diff] [blame]	32	SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
Eric Christopher	6c5b511	2015-03-11 18:43:21 +0000	[diff] [blame]	33	: AMDGPUInstrInfo(st), RI() {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	34
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	35	//===----------------------------------------------------------------------===//
				36	// TargetInstrInfo callbacks
				37	//===----------------------------------------------------------------------===//
				38
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	39	static unsigned getNumOperandsNoGlue(SDNode *Node) {
				40	unsigned N = Node->getNumOperands();
				41	while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
				42	--N;
				43	return N;
				44	}
				45
				46	static SDValue findChainOperand(SDNode *Load) {
				47	SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
				48	assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
				49	return LastOp;
				50	}
				51
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	52	/// \brief Returns true if both nodes have the same value for the given
				53	/// operand \p Op, or if both nodes do not have this operand.
				54	static bool nodesHaveSameOperandValue(SDNode N0, SDNode N1, unsigned OpName) {
				55	unsigned Opc0 = N0->getMachineOpcode();
				56	unsigned Opc1 = N1->getMachineOpcode();
				57
				58	int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
				59	int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
				60
				61	if (Op0Idx == -1 && Op1Idx == -1)
				62	return true;
				63
				64
				65	if ((Op0Idx == -1 && Op1Idx != -1) \|\|
				66	(Op1Idx == -1 && Op0Idx != -1))
				67	return false;
				68
				69	// getNamedOperandIdx returns the index for the MachineInstr's operands,
				70	// which includes the result as the first operand. We are indexing into the
				71	// MachineSDNode's operands, so we need to skip the result operand to get
				72	// the real index.
				73	--Op0Idx;
				74	--Op1Idx;
				75
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	76	return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	77	}
				78
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	79	bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
				80	AliasAnalysis *AA) const {
				81	// TODO: The generic check fails for VALU instructions that should be
				82	// rematerializable due to implicit reads of exec. We really want all of the
				83	// generic logic for this except for this.
				84	switch (MI->getOpcode()) {
				85	case AMDGPU::V_MOV_B32_e32:
				86	case AMDGPU::V_MOV_B32_e64:
Matt Arsenault	80f766a	2015-09-10 01:23:28 +0000	[diff] [blame]	87	case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	88	return true;
				89	default:
				90	return false;
				91	}
				92	}
				93
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	94	bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1,
				95	int64_t &Offset0,
				96	int64_t &Offset1) const {
				97	if (!Load0->isMachineOpcode() \|\| !Load1->isMachineOpcode())
				98	return false;
				99
				100	unsigned Opc0 = Load0->getMachineOpcode();
				101	unsigned Opc1 = Load1->getMachineOpcode();
				102
				103	// Make sure both are actually loads.
				104	if (!get(Opc0).mayLoad() \|\| !get(Opc1).mayLoad())
				105	return false;
				106
				107	if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard	20fa0be	2014-10-07 21:09:20 +0000	[diff] [blame]	108
				109	// FIXME: Handle this case:
				110	if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
				111	return false;
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	112
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	113	// Check base reg.
				114	if (Load0->getOperand(1) != Load1->getOperand(1))
				115	return false;
				116
				117	// Check chain.
				118	if (findChainOperand(Load0) != findChainOperand(Load1))
				119	return false;
				120
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	121	// Skip read2 / write2 variants for simplicity.
				122	// TODO: We should report true if the used offsets are adjacent (excluded
				123	// st64 versions).
				124	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 \|\|
				125	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
				126	return false;
				127
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	128	Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
				129	Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
				130	return true;
				131	}
				132
				133	if (isSMRD(Opc0) && isSMRD(Opc1)) {
				134	assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
				135
				136	// Check base reg.
				137	if (Load0->getOperand(0) != Load1->getOperand(0))
				138	return false;
				139
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	140	const ConstantSDNode *Load0Offset =
				141	dyn_cast<ConstantSDNode>(Load0->getOperand(1));
				142	const ConstantSDNode *Load1Offset =
				143	dyn_cast<ConstantSDNode>(Load1->getOperand(1));
				144
				145	if (!Load0Offset \|\| !Load1Offset)
				146	return false;
				147
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	148	// Check chain.
				149	if (findChainOperand(Load0) != findChainOperand(Load1))
				150	return false;
				151
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	152	Offset0 = Load0Offset->getZExtValue();
				153	Offset1 = Load1Offset->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	154	return true;
				155	}
				156
				157	// MUBUF and MTBUF can access the same addresses.
				158	if ((isMUBUF(Opc0) \|\| isMTBUF(Opc0)) && (isMUBUF(Opc1) \|\| isMTBUF(Opc1))) {
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	159
				160	// MUBUF and MTBUF have vaddr at different indices.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	161	if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) \|\|
				162	findChainOperand(Load0) != findChainOperand(Load1) \|\|
				163	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) \|\|
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	164	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	165	return false;
				166
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	167	int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				168	int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				169
				170	if (OffIdx0 == -1 \|\| OffIdx1 == -1)
				171	return false;
				172
				173	// getNamedOperandIdx returns the index for MachineInstrs. Since they
				174	// inlcude the output in the operand list, but SDNodes don't, we need to
				175	// subtract the index by one.
				176	--OffIdx0;
				177	--OffIdx1;
				178
				179	SDValue Off0 = Load0->getOperand(OffIdx0);
				180	SDValue Off1 = Load1->getOperand(OffIdx1);
				181
				182	// The offset might be a FrameIndexSDNode.
				183	if (!isa<ConstantSDNode>(Off0) \|\| !isa<ConstantSDNode>(Off1))
				184	return false;
				185
				186	Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
				187	Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	188	return true;
				189	}
				190
				191	return false;
				192	}
				193
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	194	static bool isStride64(unsigned Opc) {
				195	switch (Opc) {
				196	case AMDGPU::DS_READ2ST64_B32:
				197	case AMDGPU::DS_READ2ST64_B64:
				198	case AMDGPU::DS_WRITE2ST64_B32:
				199	case AMDGPU::DS_WRITE2ST64_B64:
				200	return true;
				201	default:
				202	return false;
				203	}
				204	}
				205
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	206	bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
Chad Rosier	c27a18f	2016-03-09 16:00:35 +0000	[diff] [blame]	207	int64_t &Offset,
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	208	const TargetRegisterInfo *TRI) const {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	209	unsigned Opc = LdSt->getOpcode();
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	210
				211	if (isDS(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	212	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				213	AMDGPU::OpName::offset);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	214	if (OffsetImm) {
				215	// Normal, single offset LDS instruction.
				216	const MachineOperand AddrReg = getNamedOperand(LdSt,
				217	AMDGPU::OpName::addr);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	218
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	219	BaseReg = AddrReg->getReg();
				220	Offset = OffsetImm->getImm();
				221	return true;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	222	}
				223
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	224	// The 2 offset instructions use offset0 and offset1 instead. We can treat
				225	// these as a load with a single offset if the 2 offsets are consecutive. We
				226	// will use this for some partially aligned loads.
				227	const MachineOperand Offset0Imm = getNamedOperand(LdSt,
				228	AMDGPU::OpName::offset0);
				229	const MachineOperand Offset1Imm = getNamedOperand(LdSt,
				230	AMDGPU::OpName::offset1);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	231
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	232	uint8_t Offset0 = Offset0Imm->getImm();
				233	uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	234
Matt Arsenault	84db5d9	2015-07-14 17:57:36 +0000	[diff] [blame]	235	if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	236	// Each of these offsets is in element sized units, so we need to convert
				237	// to bytes of the individual reads.
				238
				239	unsigned EltSize;
				240	if (LdSt->mayLoad())
				241	EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
				242	else {
				243	assert(LdSt->mayStore());
				244	int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
				245	EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
				246	}
				247
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	248	if (isStride64(Opc))
				249	EltSize *= 64;
				250
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	251	const MachineOperand AddrReg = getNamedOperand(LdSt,
				252	AMDGPU::OpName::addr);
				253	BaseReg = AddrReg->getReg();
				254	Offset = EltSize * Offset0;
				255	return true;
				256	}
				257
				258	return false;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	259	}
				260
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	261	if (isMUBUF(LdSt) \|\| isMTBUF(LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	262	if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
				263	return false;
				264
				265	const MachineOperand AddrReg = getNamedOperand(LdSt,
				266	AMDGPU::OpName::vaddr);
				267	if (!AddrReg)
				268	return false;
				269
				270	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				271	AMDGPU::OpName::offset);
				272	BaseReg = AddrReg->getReg();
				273	Offset = OffsetImm->getImm();
				274	return true;
				275	}
				276
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	277	if (isSMRD(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	278	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				279	AMDGPU::OpName::offset);
				280	if (!OffsetImm)
				281	return false;
				282
				283	const MachineOperand SBaseReg = getNamedOperand(LdSt,
				284	AMDGPU::OpName::sbase);
				285	BaseReg = SBaseReg->getReg();
				286	Offset = OffsetImm->getImm();
				287	return true;
				288	}
				289
				290	return false;
				291	}
				292
Jun Bum Lim	4c5bd58	2016-04-15 14:58:38 +0000	[diff] [blame]	293	bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt,
				294	MachineInstr *SecondLdSt,
				295	unsigned NumLoads) const {
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	296	const MachineOperand *FirstDst = nullptr;
				297	const MachineOperand *SecondDst = nullptr;
				298
				299	if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
				300	FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdst);
				301	SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdst);
				302	}
				303
Etienne Bergeron	06c14ec	2016-04-25 15:06:33 +0000	[diff] [blame]	304	if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	305	FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::sdst);
				306	SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::sdst);
				307	}
				308
				309	if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) \|\|
				310	(isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) {
				311	FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdata);
				312	SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdata);
				313	}
				314
				315	if (!FirstDst \|\| !SecondDst)
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	316	return false;
				317
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	318	// Try to limit clustering based on the total number of bytes loaded
				319	// rather than the number of instructions. This is done to help reduce
				320	// register pressure. The method used is somewhat inexact, though,
				321	// because it assumes that all loads in the cluster will load the
				322	// same number of bytes as FirstLdSt.
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	323
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	324	// The unit of this value is bytes.
				325	// FIXME: This needs finer tuning.
				326	unsigned LoadClusterThreshold = 16;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	327
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	328	const MachineRegisterInfo &MRI =
				329	FirstLdSt->getParent()->getParent()->getRegInfo();
				330	const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
				331
				332	return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	333	}
				334
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	335	void
				336	SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	337	MachineBasicBlock::iterator MI, DebugLoc DL,
				338	unsigned DestReg, unsigned SrcReg,
				339	bool KillSrc) const {
				340
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	341	// If we are trying to copy to or from SCC, there is a bug somewhere else in
				342	// the backend. While it may be theoretically possible to do this, it should
				343	// never be necessary.
				344	assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
				345
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	346	static const int16_t Sub0_15[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	347	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
				348	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
				349	AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	350	AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	351	};
				352
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	353	static const int16_t Sub0_15_64[] = {
				354	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				355	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				356	AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
				357	AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
				358	};
				359
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	360	static const int16_t Sub0_7[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	361	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	362	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	363	};
				364
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	365	static const int16_t Sub0_7_64[] = {
				366	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				367	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				368	};
				369
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	370	static const int16_t Sub0_3[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	371	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	372	};
				373
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	374	static const int16_t Sub0_3_64[] = {
				375	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				376	};
				377
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	378	static const int16_t Sub0_2[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	379	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	380	};
				381
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	382	static const int16_t Sub0_1[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	383	AMDGPU::sub0, AMDGPU::sub1,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	384	};
				385
				386	unsigned Opcode;
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	387	ArrayRef<int16_t> SubIndices;
				388	bool Forward;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	389
				390	if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
				391	assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
				392	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				393	.addReg(SrcReg, getKillRegState(KillSrc));
				394	return;
				395
Tom Stellard	aac1889	2013-02-07 19:39:43 +0000	[diff] [blame]	396	} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	397	if (DestReg == AMDGPU::VCC) {
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	398	if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				399	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
				400	.addReg(SrcReg, getKillRegState(KillSrc));
				401	} else {
				402	// FIXME: Hack until VReg_1 removed.
				403	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault	4635915	2015-08-08 00:41:48 +0000	[diff] [blame]	404	BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	405	.addImm(0)
				406	.addReg(SrcReg, getKillRegState(KillSrc));
				407	}
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	408
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	409	return;
				410	}
				411
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	412	assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
				413	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				414	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	415	return;
				416
				417	} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
				418	assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	419	Opcode = AMDGPU::S_MOV_B64;
				420	SubIndices = Sub0_3_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	421
				422	} else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
				423	assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	424	Opcode = AMDGPU::S_MOV_B64;
				425	SubIndices = Sub0_7_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	426
				427	} else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
				428	assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	429	Opcode = AMDGPU::S_MOV_B64;
				430	SubIndices = Sub0_15_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	431
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	432	} else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
				433	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	434	AMDGPU::SReg_32RegClass.contains(SrcReg));
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	435	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				436	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	437	return;
				438
				439	} else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
				440	assert(AMDGPU::VReg_64RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	441	AMDGPU::SReg_64RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	442	Opcode = AMDGPU::V_MOV_B32_e32;
				443	SubIndices = Sub0_1;
				444
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	445	} else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
				446	assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
				447	Opcode = AMDGPU::V_MOV_B32_e32;
				448	SubIndices = Sub0_2;
				449
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	450	} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
				451	assert(AMDGPU::VReg_128RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	452	AMDGPU::SReg_128RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	453	Opcode = AMDGPU::V_MOV_B32_e32;
				454	SubIndices = Sub0_3;
				455
				456	} else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
				457	assert(AMDGPU::VReg_256RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	458	AMDGPU::SReg_256RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	459	Opcode = AMDGPU::V_MOV_B32_e32;
				460	SubIndices = Sub0_7;
				461
				462	} else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
				463	assert(AMDGPU::VReg_512RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	464	AMDGPU::SReg_512RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	465	Opcode = AMDGPU::V_MOV_B32_e32;
				466	SubIndices = Sub0_15;
				467
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	468	} else {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	469	llvm_unreachable("Can't copy register!");
				470	}
				471
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	472	if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
				473	Forward = true;
				474	else
				475	Forward = false;
				476
				477	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				478	unsigned SubIdx;
				479	if (Forward)
				480	SubIdx = SubIndices[Idx];
				481	else
				482	SubIdx = SubIndices[SubIndices.size() - Idx - 1];
				483
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	484	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				485	get(Opcode), RI.getSubReg(DestReg, SubIdx));
				486
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	487	Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	488
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	489	if (Idx == SubIndices.size() - 1)
				490	Builder.addReg(SrcReg, RegState::Kill \| RegState::Implicit);
				491
				492	if (Idx == 0)
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	493	Builder.addReg(DestReg, RegState::Define \| RegState::Implicit);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	494	}
				495	}
				496
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	497	int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
Matt Arsenault	f5b2cd8	2015-03-23 18:45:30 +0000	[diff] [blame]	498	const unsigned Opcode = MI.getOpcode();
				499
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	500	int NewOpc;
				501
				502	// Try to map original to commuted opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	503	NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	504	if (NewOpc != -1)
				505	// Check if the commuted (REV) opcode exists on the target.
				506	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	507
				508	// Try to map commuted to original opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	509	NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	510	if (NewOpc != -1)
				511	// Check if the original (non-REV) opcode exists on the target.
				512	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	513
				514	return Opcode;
				515	}
				516
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	517	unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
				518
				519	if (DstRC->getSize() == 4) {
				520	return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
				521	} else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
				522	return AMDGPU::S_MOV_B64;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	523	} else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
				524	return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	525	}
				526	return AMDGPU::COPY;
				527	}
				528
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	529	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
				530	switch (Size) {
				531	case 4:
				532	return AMDGPU::SI_SPILL_S32_SAVE;
				533	case 8:
				534	return AMDGPU::SI_SPILL_S64_SAVE;
				535	case 16:
				536	return AMDGPU::SI_SPILL_S128_SAVE;
				537	case 32:
				538	return AMDGPU::SI_SPILL_S256_SAVE;
				539	case 64:
				540	return AMDGPU::SI_SPILL_S512_SAVE;
				541	default:
				542	llvm_unreachable("unknown register size");
				543	}
				544	}
				545
				546	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
				547	switch (Size) {
				548	case 4:
				549	return AMDGPU::SI_SPILL_V32_SAVE;
				550	case 8:
				551	return AMDGPU::SI_SPILL_V64_SAVE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	552	case 12:
				553	return AMDGPU::SI_SPILL_V96_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	554	case 16:
				555	return AMDGPU::SI_SPILL_V128_SAVE;
				556	case 32:
				557	return AMDGPU::SI_SPILL_V256_SAVE;
				558	case 64:
				559	return AMDGPU::SI_SPILL_V512_SAVE;
				560	default:
				561	llvm_unreachable("unknown register size");
				562	}
				563	}
				564
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	565	void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
				566	MachineBasicBlock::iterator MI,
				567	unsigned SrcReg, bool isKill,
				568	int FrameIndex,
				569	const TargetRegisterClass *RC,
				570	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	571	MachineFunction *MF = MBB.getParent();
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	572	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	573	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	574	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	575
				576	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
				577	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				578	MachinePointerInfo PtrInfo
				579	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				580	MachineMemOperand *MMO
				581	= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
				582	Size, Align);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	583
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	584	if (RI.isSGPRClass(RC)) {
Matt Arsenault	5b22dfa	2015-11-05 05:27:10 +0000	[diff] [blame]	585	MFI->setHasSpilledSGPRs();
				586
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	587	// We are only allowed to create one new instruction when spilling
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	588	// registers, so we need to use pseudo instruction for spilling
				589	// SGPRs.
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	590	unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
				591	BuildMI(MBB, MI, DL, get(Opcode))
				592	.addReg(SrcReg) // src
				593	.addFrameIndex(FrameIndex) // frame_idx
				594	.addMemOperand(MMO);
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	595
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	596	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	597	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	598
Nicolai Haehnle	df3a20c	2016-04-06 19:40:20 +0000	[diff] [blame]	599	if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	600	LLVMContext &Ctx = MF->getFunction()->getContext();
				601	Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
				602	" spill register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	603	BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	604	.addReg(SrcReg);
				605
				606	return;
				607	}
				608
				609	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				610
				611	unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
				612	MFI->setHasSpilledVGPRs();
				613	BuildMI(MBB, MI, DL, get(Opcode))
				614	.addReg(SrcReg) // src
				615	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	616	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				617	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Tom Stellard	649b5db	2016-03-04 18:31:18 +0000	[diff] [blame]	618	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	619	.addMemOperand(MMO);
				620	}
				621
				622	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
				623	switch (Size) {
				624	case 4:
				625	return AMDGPU::SI_SPILL_S32_RESTORE;
				626	case 8:
				627	return AMDGPU::SI_SPILL_S64_RESTORE;
				628	case 16:
				629	return AMDGPU::SI_SPILL_S128_RESTORE;
				630	case 32:
				631	return AMDGPU::SI_SPILL_S256_RESTORE;
				632	case 64:
				633	return AMDGPU::SI_SPILL_S512_RESTORE;
				634	default:
				635	llvm_unreachable("unknown register size");
				636	}
				637	}
				638
				639	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
				640	switch (Size) {
				641	case 4:
				642	return AMDGPU::SI_SPILL_V32_RESTORE;
				643	case 8:
				644	return AMDGPU::SI_SPILL_V64_RESTORE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	645	case 12:
				646	return AMDGPU::SI_SPILL_V96_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	647	case 16:
				648	return AMDGPU::SI_SPILL_V128_RESTORE;
				649	case 32:
				650	return AMDGPU::SI_SPILL_V256_RESTORE;
				651	case 64:
				652	return AMDGPU::SI_SPILL_V512_RESTORE;
				653	default:
				654	llvm_unreachable("unknown register size");
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	655	}
				656	}
				657
				658	void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
				659	MachineBasicBlock::iterator MI,
				660	unsigned DestReg, int FrameIndex,
				661	const TargetRegisterClass *RC,
				662	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	663	MachineFunction *MF = MBB.getParent();
Tom Stellard	e99fb65	2015-01-20 19:33:04 +0000	[diff] [blame]	664	const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	665	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	666	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	667	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				668	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	669
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	670	MachinePointerInfo PtrInfo
				671	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				672
				673	MachineMemOperand *MMO = MF->getMachineMemOperand(
				674	PtrInfo, MachineMemOperand::MOLoad, Size, Align);
				675
				676	if (RI.isSGPRClass(RC)) {
				677	// FIXME: Maybe this should not include a memoperand because it will be
				678	// lowered to non-memory instructions.
				679	unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
				680	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				681	.addFrameIndex(FrameIndex) // frame_idx
				682	.addMemOperand(MMO);
				683
				684	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	685	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	686
Nicolai Haehnle	df3a20c	2016-04-06 19:40:20 +0000	[diff] [blame]	687	if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	688	LLVMContext &Ctx = MF->getFunction()->getContext();
				689	Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
				690	" restore register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	691	BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	692
				693	return;
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	694	}
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	695
				696	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				697
				698	unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
				699	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				700	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	701	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				702	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Tom Stellard	649b5db	2016-03-04 18:31:18 +0000	[diff] [blame]	703	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	704	.addMemOperand(MMO);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	705	}
				706
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	707	/// \param @Offset Offset in bytes of the FrameIndex being spilled
				708	unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
				709	MachineBasicBlock::iterator MI,
				710	RegScavenger *RS, unsigned TmpReg,
				711	unsigned FrameOffset,
				712	unsigned Size) const {
				713	MachineFunction *MF = MBB.getParent();
				714	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Eric Christopher	7792e32	2015-01-30 23:24:40 +0000	[diff] [blame]	715	const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	716	const SIRegisterInfo *TRI =
				717	static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
				718	DebugLoc DL = MBB.findDebugLoc(MI);
				719	unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
				720	unsigned WavefrontSize = ST.getWavefrontSize();
				721
				722	unsigned TIDReg = MFI->getTIDReg();
				723	if (!MFI->hasCalculatedTID()) {
				724	MachineBasicBlock &Entry = MBB.getParent()->front();
				725	MachineBasicBlock::iterator Insert = Entry.front();
				726	DebugLoc DL = Insert->getDebugLoc();
				727
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	728	TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	729	if (TIDReg == AMDGPU::NoRegister)
				730	return TIDReg;
				731
				732
Nicolai Haehnle	df3a20c	2016-04-06 19:40:20 +0000	[diff] [blame]	733	if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	734	WorkGroupSize > WavefrontSize) {
				735
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	736	unsigned TIDIGXReg
				737	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
				738	unsigned TIDIGYReg
				739	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
				740	unsigned TIDIGZReg
				741	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	742	unsigned InputPtrReg =
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	743	TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer	7149aab	2015-03-01 18:09:56 +0000	[diff] [blame]	744	for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	745	if (!Entry.isLiveIn(Reg))
				746	Entry.addLiveIn(Reg);
				747	}
				748
Matthias Braun	7dc03f0	2016-04-06 02:47:09 +0000	[diff] [blame]	749	RS->enterBasicBlock(Entry);
Matt Arsenault	0c90e95	2015-11-06 18:17:45 +0000	[diff] [blame]	750	// FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	751	unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				752	unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				753	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
				754	.addReg(InputPtrReg)
				755	.addImm(SI::KernelInputOffsets::NGROUPS_Z);
				756	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
				757	.addReg(InputPtrReg)
				758	.addImm(SI::KernelInputOffsets::NGROUPS_Y);
				759
				760	// NGROUPS.X * NGROUPS.Y
				761	BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
				762	.addReg(STmp1)
				763	.addReg(STmp0);
				764	// (NGROUPS.X * NGROUPS.Y) * TIDIG.X
				765	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
				766	.addReg(STmp1)
				767	.addReg(TIDIGXReg);
				768	// NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
				769	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
				770	.addReg(STmp0)
				771	.addReg(TIDIGYReg)
				772	.addReg(TIDReg);
				773	// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
				774	BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
				775	.addReg(TIDReg)
				776	.addReg(TIDIGZReg);
				777	} else {
				778	// Get the wave id
				779	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
				780	TIDReg)
				781	.addImm(-1)
				782	.addImm(0);
				783
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	784	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	785	TIDReg)
				786	.addImm(-1)
				787	.addReg(TIDReg);
				788	}
				789
				790	BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
				791	TIDReg)
				792	.addImm(2)
				793	.addReg(TIDReg);
				794	MFI->setTIDReg(TIDReg);
				795	}
				796
				797	// Add FrameIndex to LDS offset
				798	unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
				799	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
				800	.addImm(LDSOffset)
				801	.addReg(TIDReg);
				802
				803	return TmpReg;
				804	}
				805
Tom Stellard	d37630e	2016-04-07 14:47:07 +0000	[diff] [blame]	806	void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
				807	MachineBasicBlock::iterator MI,
Nicolai Haehnle	87323da	2015-12-17 16:46:42 +0000	[diff] [blame]	808	int Count) const {
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	809	DebugLoc DL = MBB.findDebugLoc(MI);
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	810	while (Count > 0) {
				811	int Arg;
				812	if (Count >= 8)
				813	Arg = 7;
				814	else
				815	Arg = Count - 1;
				816	Count -= 8;
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	817	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	818	.addImm(Arg);
				819	}
				820	}
				821
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	822	void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
				823	MachineBasicBlock::iterator MI) const {
				824	insertWaitStates(MBB, MI, 1);
				825	}
				826
				827	unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
				828	switch (MI.getOpcode()) {
				829	default: return 1; // FIXME: Do wait states equal cycles?
				830
				831	case AMDGPU::S_NOP:
				832	return MI.getOperand(0).getImm() + 1;
				833	}
				834	}
				835
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	836	bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	837	MachineBasicBlock &MBB = *MI->getParent();
				838	DebugLoc DL = MBB.findDebugLoc(MI);
				839	switch (MI->getOpcode()) {
				840	default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
				841
Tom Stellard	60024a0	2014-09-24 01:33:24 +0000	[diff] [blame]	842	case AMDGPU::SGPR_USE:
				843	// This is just a placeholder for register allocation.
				844	MI->eraseFromParent();
				845	break;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	846
				847	case AMDGPU::V_MOV_B64_PSEUDO: {
				848	unsigned Dst = MI->getOperand(0).getReg();
				849	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				850	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				851
				852	const MachineOperand &SrcOp = MI->getOperand(1);
				853	// FIXME: Will this work for 64-bit floating point immediates?
				854	assert(!SrcOp.isFPImm());
				855	if (SrcOp.isImm()) {
				856	APInt Imm(64, SrcOp.getImm());
				857	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				858	.addImm(Imm.getLoBits(32).getZExtValue())
				859	.addReg(Dst, RegState::Implicit);
				860	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				861	.addImm(Imm.getHiBits(32).getZExtValue())
				862	.addReg(Dst, RegState::Implicit);
				863	} else {
				864	assert(SrcOp.isReg());
				865	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				866	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
				867	.addReg(Dst, RegState::Implicit);
				868	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				869	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
				870	.addReg(Dst, RegState::Implicit);
				871	}
				872	MI->eraseFromParent();
				873	break;
				874	}
Marek Olsak	7d77728	2015-03-24 13:40:15 +0000	[diff] [blame]	875
				876	case AMDGPU::V_CNDMASK_B64_PSEUDO: {
				877	unsigned Dst = MI->getOperand(0).getReg();
				878	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				879	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				880	unsigned Src0 = MI->getOperand(1).getReg();
				881	unsigned Src1 = MI->getOperand(2).getReg();
				882	const MachineOperand &SrcCond = MI->getOperand(3);
				883
				884	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
				885	.addReg(RI.getSubReg(Src0, AMDGPU::sub0))
				886	.addReg(RI.getSubReg(Src1, AMDGPU::sub0))
				887	.addOperand(SrcCond);
				888	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
				889	.addReg(RI.getSubReg(Src0, AMDGPU::sub1))
				890	.addReg(RI.getSubReg(Src1, AMDGPU::sub1))
				891	.addOperand(SrcCond);
				892	MI->eraseFromParent();
				893	break;
				894	}
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	895
				896	case AMDGPU::SI_CONSTDATA_PTR: {
				897	const SIRegisterInfo *TRI =
				898	static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
				899	MachineFunction &MF = *MBB.getParent();
				900	unsigned Reg = MI->getOperand(0).getReg();
				901	unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
				902	unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
				903
				904	// Create a bundle so these instructions won't be re-ordered by the
				905	// post-RA scheduler.
				906	MIBundleBuilder Bundler(MBB, MI);
				907	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
				908
				909	// Add 32-bit offset from this instruction to the start of the
				910	// constant data.
				911	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
				912	.addReg(RegLo)
				913	.addOperand(MI->getOperand(1)));
				914	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
				915	.addReg(RegHi)
				916	.addImm(0));
				917
				918	llvm::finalizeBundle(MBB, Bundler.begin());
				919
				920	MI->eraseFromParent();
				921	break;
				922	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	923	}
				924	return true;
				925	}
				926
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	927	/// Commutes the operands in the given instruction.
				928	/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
				929	///
				930	/// Do not call this method for a non-commutable instruction or for
				931	/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
				932	/// Even though the instruction is commutable, the method may still
				933	/// fail to commute the operands, null pointer is returned in such cases.
				934	MachineInstr SIInstrInfo::commuteInstructionImpl(MachineInstr MI,
				935	bool NewMI,
				936	unsigned OpIdx0,
				937	unsigned OpIdx1) const {
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	938	int CommutedOpcode = commuteOpcode(*MI);
				939	if (CommutedOpcode == -1)
				940	return nullptr;
				941
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	942	int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				943	AMDGPU::OpName::src0);
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	944	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				945	if (!Src0.isReg())
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	946	return nullptr;
				947
				948	int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				949	AMDGPU::OpName::src1);
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	950
				951	if ((OpIdx0 != static_cast<unsigned>(Src0Idx) \|\|
				952	OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
				953	(OpIdx0 != static_cast<unsigned>(Src1Idx) \|\|
				954	OpIdx1 != static_cast<unsigned>(Src0Idx)))
				955	return nullptr;
				956
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	957	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				958
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	959
Nicolai Haehnle	e2dda4f	2016-04-19 21:58:22 +0000	[diff] [blame]	960	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	961	const MCInstrDesc &InstrDesc = MI->getDesc();
Nicolai Haehnle	e2dda4f	2016-04-19 21:58:22 +0000	[diff] [blame]	962	// For VOP2 and VOPC instructions, any operand type is valid to use for
				963	// src0. Make sure we can use the src0 as src1.
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	964	//
				965	// We could be stricter here and only allow commuting if there is a reason
				966	// to do so. i.e. if both operands are VGPRs there is no real benefit,
				967	// although MachineCSE attempts to find matches by commuting.
				968	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				969	if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
				970	return nullptr;
Matt Arsenault	3c34ae2	2015-02-18 02:04:31 +0000	[diff] [blame]	971	}
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	972
				973	if (!Src1.isReg()) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	974	// Allow commuting instructions with Imm operands.
				975	if (NewMI \|\| !Src1.isImm() \|\|
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	976	(!isVOP2(MI) && !isVOP3(MI))) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	977	return nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	978	}
Matt Arsenault	d282ada	2014-10-17 18:00:48 +0000	[diff] [blame]	979	// Be sure to copy the source modifiers to the right place.
				980	if (MachineOperand *Src0Mods
				981	= getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
				982	MachineOperand *Src1Mods
				983	= getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
				984
				985	int Src0ModsVal = Src0Mods->getImm();
				986	if (!Src1Mods && Src0ModsVal != 0)
				987	return nullptr;
				988
				989	// XXX - This assert might be a lie. It might be useful to have a neg
				990	// modifier with 0.0.
				991	int Src1ModsVal = Src1Mods->getImm();
				992	assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
				993
				994	Src1Mods->setImm(Src0ModsVal);
				995	Src0Mods->setImm(Src1ModsVal);
				996	}
				997
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	998	unsigned Reg = Src0.getReg();
				999	unsigned SubReg = Src0.getSubReg();
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	1000	if (Src1.isImm())
				1001	Src0.ChangeToImmediate(Src1.getImm());
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	1002	else
				1003	llvm_unreachable("Should only have immediates");
				1004
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	1005	Src1.ChangeToRegister(Reg, false);
				1006	Src1.setSubReg(SubReg);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1007	} else {
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1008	MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1009	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1010
				1011	if (MI)
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	1012	MI->setDesc(get(CommutedOpcode));
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1013
				1014	return MI;
Christian Konig	76edd4f	2013-02-26 17:52:29 +0000	[diff] [blame]	1015	}
				1016
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1017	// This needs to be implemented because the source modifiers may be inserted
				1018	// between the true commutable operands, and the base
				1019	// TargetInstrInfo::commuteInstruction uses it.
				1020	bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1021	unsigned &SrcOpIdx0,
				1022	unsigned &SrcOpIdx1) const {
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1023	const MCInstrDesc &MCID = MI->getDesc();
				1024	if (!MCID.isCommutable())
				1025	return false;
				1026
				1027	unsigned Opc = MI->getOpcode();
				1028	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1029	if (Src0Idx == -1)
				1030	return false;
				1031
				1032	// FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1033	// immediate. Also, immediate src0 operand is not handled in
				1034	// SIInstrInfo::commuteInstruction();
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1035	if (!MI->getOperand(Src0Idx).isReg())
				1036	return false;
				1037
				1038	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1039	if (Src1Idx == -1)
				1040	return false;
				1041
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1042	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1043	if (Src1.isImm()) {
				1044	// SIInstrInfo::commuteInstruction() does support commuting the immediate
				1045	// operand src1 in 2 and 3 operand instructions.
				1046	if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
				1047	return false;
				1048	} else if (Src1.isReg()) {
				1049	// If any source modifiers are set, the generic instruction commuting won't
				1050	// understand how to copy the source modifiers.
				1051	if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) \|\|
				1052	hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
				1053	return false;
				1054	} else
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1055	return false;
				1056
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1057	return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1058	}
				1059
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1060	unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
				1061	switch (Cond) {
				1062	case SIInstrInfo::SCC_TRUE:
				1063	return AMDGPU::S_CBRANCH_SCC1;
				1064	case SIInstrInfo::SCC_FALSE:
				1065	return AMDGPU::S_CBRANCH_SCC0;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame^]	1066	case SIInstrInfo::VCCNZ:
				1067	return AMDGPU::S_CBRANCH_VCCNZ;
				1068	case SIInstrInfo::VCCZ:
				1069	return AMDGPU::S_CBRANCH_VCCZ;
				1070	case SIInstrInfo::EXECNZ:
				1071	return AMDGPU::S_CBRANCH_EXECNZ;
				1072	case SIInstrInfo::EXECZ:
				1073	return AMDGPU::S_CBRANCH_EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1074	default:
				1075	llvm_unreachable("invalid branch predicate");
				1076	}
				1077	}
				1078
				1079	SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
				1080	switch (Opcode) {
				1081	case AMDGPU::S_CBRANCH_SCC0:
				1082	return SCC_FALSE;
				1083	case AMDGPU::S_CBRANCH_SCC1:
				1084	return SCC_TRUE;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame^]	1085	case AMDGPU::S_CBRANCH_VCCNZ:
				1086	return VCCNZ;
				1087	case AMDGPU::S_CBRANCH_VCCZ:
				1088	return VCCZ;
				1089	case AMDGPU::S_CBRANCH_EXECNZ:
				1090	return EXECNZ;
				1091	case AMDGPU::S_CBRANCH_EXECZ:
				1092	return EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1093	default:
				1094	return INVALID_BR;
				1095	}
				1096	}
				1097
				1098	bool SIInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
				1099	MachineBasicBlock *&TBB,
				1100	MachineBasicBlock *&FBB,
				1101	SmallVectorImpl<MachineOperand> &Cond,
				1102	bool AllowModify) const {
				1103	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
				1104
				1105	if (I == MBB.end())
				1106	return false;
				1107
				1108	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1109	// Unconditional Branch
				1110	TBB = I->getOperand(0).getMBB();
				1111	return false;
				1112	}
				1113
				1114	BranchPredicate Pred = getBranchPredicate(I->getOpcode());
				1115	if (Pred == INVALID_BR)
				1116	return true;
				1117
				1118	MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
				1119	Cond.push_back(MachineOperand::CreateImm(Pred));
				1120
				1121	++I;
				1122
				1123	if (I == MBB.end()) {
				1124	// Conditional branch followed by fall-through.
				1125	TBB = CondBB;
				1126	return false;
				1127	}
				1128
				1129	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1130	TBB = CondBB;
				1131	FBB = I->getOperand(0).getMBB();
				1132	return false;
				1133	}
				1134
				1135	return true;
				1136	}
				1137
				1138	unsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
				1139	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
				1140
				1141	unsigned Count = 0;
				1142	while (I != MBB.end()) {
				1143	MachineBasicBlock::iterator Next = std::next(I);
				1144	I->eraseFromParent();
				1145	++Count;
				1146	I = Next;
				1147	}
				1148
				1149	return Count;
				1150	}
				1151
				1152	unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB,
				1153	MachineBasicBlock *TBB,
				1154	MachineBasicBlock *FBB,
				1155	ArrayRef<MachineOperand> Cond,
				1156	DebugLoc DL) const {
				1157
				1158	if (!FBB && Cond.empty()) {
				1159	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1160	.addMBB(TBB);
				1161	return 1;
				1162	}
				1163
				1164	assert(TBB && Cond[0].isImm());
				1165
				1166	unsigned Opcode
				1167	= getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
				1168
				1169	if (!FBB) {
				1170	BuildMI(&MBB, DL, get(Opcode))
				1171	.addMBB(TBB);
				1172	return 1;
				1173	}
				1174
				1175	assert(TBB && FBB);
				1176
				1177	BuildMI(&MBB, DL, get(Opcode))
				1178	.addMBB(TBB);
				1179	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1180	.addMBB(FBB);
				1181
				1182	return 2;
				1183	}
				1184
Matt Arsenault	72fcd5f	2016-05-21 00:29:34 +0000	[diff] [blame]	1185	bool SIInstrInfo::ReverseBranchCondition(
				1186	SmallVectorImpl<MachineOperand> &Cond) const {
				1187	assert(Cond.size() == 1);
				1188	Cond[0].setImm(-Cond[0].getImm());
				1189	return false;
				1190	}
				1191
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1192	static void removeModOperands(MachineInstr &MI) {
				1193	unsigned Opc = MI.getOpcode();
				1194	int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1195	AMDGPU::OpName::src0_modifiers);
				1196	int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1197	AMDGPU::OpName::src1_modifiers);
				1198	int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1199	AMDGPU::OpName::src2_modifiers);
				1200
				1201	MI.RemoveOperand(Src2ModIdx);
				1202	MI.RemoveOperand(Src1ModIdx);
				1203	MI.RemoveOperand(Src0ModIdx);
				1204	}
				1205
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	1206	// TODO: Maybe this should be removed this and custom fold everything in
				1207	// SIFoldOperands?
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1208	bool SIInstrInfo::FoldImmediate(MachineInstr UseMI, MachineInstr DefMI,
				1209	unsigned Reg, MachineRegisterInfo *MRI) const {
				1210	if (!MRI->hasOneNonDBGUse(Reg))
				1211	return false;
				1212
				1213	unsigned Opc = UseMI->getOpcode();
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1214	if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1215	// Don't fold if we are using source modifiers. The new VOP2 instructions
				1216	// don't have them.
				1217	if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) \|\|
				1218	hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) \|\|
				1219	hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
				1220	return false;
				1221	}
				1222
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	1223	const MachineOperand &ImmOp = DefMI->getOperand(1);
				1224
				1225	// If this is a free constant, there's no reason to do this.
				1226	// TODO: We could fold this here instead of letting SIFoldOperands do it
				1227	// later.
				1228	if (isInlineConstant(ImmOp, 4))
				1229	return false;
				1230
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1231	MachineOperand Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
				1232	MachineOperand Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
				1233	MachineOperand Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
				1234
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1235	// Multiplied part is the constant: Use v_madmk_f32
				1236	// We should only expect these to be on src0 due to canonicalizations.
				1237	if (Src0->isReg() && Src0->getReg() == Reg) {
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	1238	if (!Src1->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1239	return false;
				1240
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	1241	if (!Src2->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1242	return false;
				1243
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	1244	// We need to swap operands 0 and 1 since madmk constant is at operand 1.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1245
				1246	const int64_t Imm = DefMI->getOperand(1).getImm();
				1247
				1248	// FIXME: This would be a lot easier if we could return a new instruction
				1249	// instead of having to modify in place.
				1250
				1251	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1252	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1253	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1254	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1255	AMDGPU::OpName::clamp));
				1256
				1257	unsigned Src1Reg = Src1->getReg();
				1258	unsigned Src1SubReg = Src1->getSubReg();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1259	Src0->setReg(Src1Reg);
				1260	Src0->setSubReg(Src1SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	1261	Src0->setIsKill(Src1->isKill());
				1262
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1263	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1264	UseMI->untieRegOperand(
				1265	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1266	}
				1267
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	1268	Src1->ChangeToImmediate(Imm);
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1269
				1270	removeModOperands(*UseMI);
				1271	UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
				1272
				1273	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1274	if (DeleteDef)
				1275	DefMI->eraseFromParent();
				1276
				1277	return true;
				1278	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1279
				1280	// Added part is the constant: Use v_madak_f32
				1281	if (Src2->isReg() && Src2->getReg() == Reg) {
				1282	// Not allowed to use constant bus for another operand.
				1283	// We can however allow an inline immediate as src0.
				1284	if (!Src0->isImm() &&
				1285	(Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
				1286	return false;
				1287
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	1288	if (!Src1->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1289	return false;
				1290
				1291	const int64_t Imm = DefMI->getOperand(1).getImm();
				1292
				1293	// FIXME: This would be a lot easier if we could return a new instruction
				1294	// instead of having to modify in place.
				1295
				1296	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1297	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1298	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1299	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1300	AMDGPU::OpName::clamp));
				1301
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1302	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1303	UseMI->untieRegOperand(
				1304	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1305	}
				1306
				1307	// ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1308	Src2->ChangeToImmediate(Imm);
				1309
				1310	// These come before src2.
				1311	removeModOperands(*UseMI);
				1312	UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
				1313
				1314	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1315	if (DeleteDef)
				1316	DefMI->eraseFromParent();
				1317
				1318	return true;
				1319	}
				1320	}
				1321
				1322	return false;
				1323	}
				1324
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1325	static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
				1326	int WidthB, int OffsetB) {
				1327	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
				1328	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
				1329	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
				1330	return LowOffset + LowWidth <= HighOffset;
				1331	}
				1332
				1333	bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
				1334	MachineInstr *MIb) const {
Chad Rosier	c27a18f	2016-03-09 16:00:35 +0000	[diff] [blame]	1335	unsigned BaseReg0, BaseReg1;
				1336	int64_t Offset0, Offset1;
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1337
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	1338	if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
				1339	getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	1340
				1341	if (!MIa->hasOneMemOperand() \|\| !MIb->hasOneMemOperand()) {
				1342	// FIXME: Handle ds_read2 / ds_write2.
				1343	return false;
				1344	}
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1345	unsigned Width0 = (*MIa->memoperands_begin())->getSize();
				1346	unsigned Width1 = (*MIb->memoperands_begin())->getSize();
				1347	if (BaseReg0 == BaseReg1 &&
				1348	offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
				1349	return true;
				1350	}
				1351	}
				1352
				1353	return false;
				1354	}
				1355
				1356	bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
				1357	MachineInstr *MIb,
				1358	AliasAnalysis *AA) const {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1359	assert(MIa && (MIa->mayLoad() \|\| MIa->mayStore()) &&
				1360	"MIa must load from or modify a memory location");
				1361	assert(MIb && (MIb->mayLoad() \|\| MIb->mayStore()) &&
				1362	"MIb must load from or modify a memory location");
				1363
				1364	if (MIa->hasUnmodeledSideEffects() \|\| MIb->hasUnmodeledSideEffects())
				1365	return false;
				1366
				1367	// XXX - Can we relax this between address spaces?
				1368	if (MIa->hasOrderedMemoryRef() \|\| MIb->hasOrderedMemoryRef())
				1369	return false;
				1370
				1371	// TODO: Should we check the address space from the MachineMemOperand? That
				1372	// would allow us to distinguish objects we know don't alias based on the
Benjamin Kramer	df005cb	2015-08-08 18:27:36 +0000	[diff] [blame]	1373	// underlying address space, even if it was lowered to a different one,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1374	// e.g. private accesses lowered to use MUBUF instructions on a scratch
				1375	// buffer.
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1376	if (isDS(*MIa)) {
				1377	if (isDS(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1378	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1379
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1380	return !isFLAT(*MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1381	}
				1382
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1383	if (isMUBUF(MIa) \|\| isMTBUF(MIa)) {
				1384	if (isMUBUF(MIb) \|\| isMTBUF(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1385	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1386
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1387	return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1388	}
				1389
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1390	if (isSMRD(*MIa)) {
				1391	if (isSMRD(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1392	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1393
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1394	return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(*MIa);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1395	}
				1396
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1397	if (isFLAT(*MIa)) {
				1398	if (isFLAT(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1399	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1400
				1401	return false;
				1402	}
				1403
				1404	return false;
				1405	}
				1406
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1407	MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
				1408	MachineBasicBlock::iterator &MI,
				1409	LiveVariables *LV) const {
				1410
				1411	switch (MI->getOpcode()) {
				1412	default: return nullptr;
				1413	case AMDGPU::V_MAC_F32_e64: break;
				1414	case AMDGPU::V_MAC_F32_e32: {
				1415	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1416	if (Src0->isImm() && !isInlineConstant(*Src0, 4))
				1417	return nullptr;
				1418	break;
				1419	}
				1420	}
				1421
Tom Stellard	cc4c871	2016-02-16 18:14:56 +0000	[diff] [blame]	1422	const MachineOperand Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1423	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1424	const MachineOperand Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				1425	const MachineOperand Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				1426
				1427	return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
				1428	.addOperand(*Dst)
				1429	.addImm(0) // Src0 mods
				1430	.addOperand(*Src0)
				1431	.addImm(0) // Src1 mods
				1432	.addOperand(*Src1)
				1433	.addImm(0) // Src mods
				1434	.addOperand(*Src2)
				1435	.addImm(0) // clamp
				1436	.addImm(0); // omod
				1437	}
				1438
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	1439	bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
				1440	const MachineBasicBlock *MBB,
				1441	const MachineFunction &MF) const {
				1442	// Target-independent instructions do not have an implicit-use of EXEC, even
				1443	// when they operate on VGPRs. Treating EXEC modifications as scheduling
				1444	// boundaries prevents incorrect movements of such instructions.
				1445	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
				1446	if (MI->modifiesRegister(AMDGPU::EXEC, TRI))
				1447	return true;
				1448
				1449	return AMDGPUInstrInfo::isSchedulingBoundary(MI, MBB, MF);
				1450	}
				1451
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1452	bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1453	int64_t SVal = Imm.getSExtValue();
				1454	if (SVal >= -16 && SVal <= 64)
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1455	return true;
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1456
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1457	if (Imm.getBitWidth() == 64) {
				1458	uint64_t Val = Imm.getZExtValue();
				1459	return (DoubleToBits(0.0) == Val) \|\|
				1460	(DoubleToBits(1.0) == Val) \|\|
				1461	(DoubleToBits(-1.0) == Val) \|\|
				1462	(DoubleToBits(0.5) == Val) \|\|
				1463	(DoubleToBits(-0.5) == Val) \|\|
				1464	(DoubleToBits(2.0) == Val) \|\|
				1465	(DoubleToBits(-2.0) == Val) \|\|
				1466	(DoubleToBits(4.0) == Val) \|\|
				1467	(DoubleToBits(-4.0) == Val);
				1468	}
				1469
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1470	// The actual type of the operand does not seem to matter as long
				1471	// as the bits match one of the inline immediate values. For example:
				1472	//
				1473	// -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
				1474	// so it is a legal inline immediate.
				1475	//
				1476	// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
				1477	// floating-point, so it is a legal inline immediate.
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1478	uint32_t Val = Imm.getZExtValue();
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1479
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1480	return (FloatToBits(0.0f) == Val) \|\|
				1481	(FloatToBits(1.0f) == Val) \|\|
				1482	(FloatToBits(-1.0f) == Val) \|\|
				1483	(FloatToBits(0.5f) == Val) \|\|
				1484	(FloatToBits(-0.5f) == Val) \|\|
				1485	(FloatToBits(2.0f) == Val) \|\|
				1486	(FloatToBits(-2.0f) == Val) \|\|
				1487	(FloatToBits(4.0f) == Val) \|\|
				1488	(FloatToBits(-4.0f) == Val);
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1489	}
				1490
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1491	bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
				1492	unsigned OpSize) const {
				1493	if (MO.isImm()) {
				1494	// MachineOperand provides no way to tell the true operand size, since it
				1495	// only records a 64-bit value. We need to know the size to determine if a
				1496	// 32-bit floating point immediate bit pattern is legal for an integer
				1497	// immediate. It would be for any 32-bit integer operand, but would not be
				1498	// for a 64-bit one.
				1499
				1500	unsigned BitSize = 8 * OpSize;
				1501	return isInlineConstant(APInt(BitSize, MO.getImm(), true));
				1502	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1503
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1504	return false;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1505	}
				1506
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1507	bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
				1508	unsigned OpSize) const {
				1509	return MO.isImm() && !isInlineConstant(MO, OpSize);
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1510	}
				1511
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1512	static bool compareMachineOp(const MachineOperand &Op0,
				1513	const MachineOperand &Op1) {
				1514	if (Op0.getType() != Op1.getType())
				1515	return false;
				1516
				1517	switch (Op0.getType()) {
				1518	case MachineOperand::MO_Register:
				1519	return Op0.getReg() == Op1.getReg();
				1520	case MachineOperand::MO_Immediate:
				1521	return Op0.getImm() == Op1.getImm();
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1522	default:
				1523	llvm_unreachable("Didn't expect to be comparing these operand types");
				1524	}
				1525	}
				1526
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1527	bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
				1528	const MachineOperand &MO) const {
				1529	const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
				1530
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1531	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1532
				1533	if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
				1534	return true;
				1535
				1536	if (OpInfo.RegClass < 0)
				1537	return false;
				1538
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1539	unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
				1540	if (isLiteralConstant(MO, OpSize))
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1541	return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1542
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1543	return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1544	}
				1545
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1546	bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	1547	int Op32 = AMDGPU::getVOPe32(Opcode);
				1548	if (Op32 == -1)
				1549	return false;
				1550
				1551	return pseudoToMCOpcode(Op32) != -1;
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1552	}
				1553
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1554	bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
				1555	// The src0_modifier operand is present on all instructions
				1556	// that have modifiers.
				1557
				1558	return AMDGPU::getNamedOperandIdx(Opcode,
				1559	AMDGPU::OpName::src0_modifiers) != -1;
				1560	}
				1561
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	1562	bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
				1563	unsigned OpName) const {
				1564	const MachineOperand *Mods = getNamedOperand(MI, OpName);
				1565	return Mods && Mods->getImm();
				1566	}
				1567
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1568	bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1569	const MachineOperand &MO,
				1570	unsigned OpSize) const {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1571	// Literal constants use the constant bus.
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1572	if (isLiteralConstant(MO, OpSize))
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1573	return true;
				1574
				1575	if (!MO.isReg() \|\| !MO.isUse())
				1576	return false;
				1577
				1578	if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
				1579	return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
				1580
				1581	// FLAT_SCR is just an SGPR pair.
				1582	if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
				1583	return true;
				1584
				1585	// EXEC register uses the constant bus.
				1586	if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
				1587	return true;
				1588
				1589	// SGPRs use the constant bus
Matt Arsenault	8226fc4	2016-03-02 23:00:21 +0000	[diff] [blame]	1590	return (MO.getReg() == AMDGPU::VCC \|\| MO.getReg() == AMDGPU::M0 \|\|
				1591	(!MO.isImplicit() &&
				1592	(AMDGPU::SGPR_32RegClass.contains(MO.getReg()) \|\|
				1593	AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1594	}
				1595
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1596	static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
				1597	for (const MachineOperand &MO : MI.implicit_operands()) {
				1598	// We only care about reads.
				1599	if (MO.isDef())
				1600	continue;
				1601
				1602	switch (MO.getReg()) {
				1603	case AMDGPU::VCC:
				1604	case AMDGPU::M0:
				1605	case AMDGPU::FLAT_SCR:
				1606	return MO.getReg();
				1607
				1608	default:
				1609	break;
				1610	}
				1611	}
				1612
				1613	return AMDGPU::NoRegister;
				1614	}
				1615
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1616	bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
				1617	StringRef &ErrInfo) const {
				1618	uint16_t Opcode = MI->getOpcode();
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1619	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1620	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
				1621	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
				1622	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
				1623
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1624	// Make sure the number of operands is correct.
				1625	const MCInstrDesc &Desc = get(Opcode);
				1626	if (!Desc.isVariadic() &&
				1627	Desc.getNumOperands() != MI->getNumExplicitOperands()) {
				1628	ErrInfo = "Instruction has wrong number of operands.";
				1629	return false;
				1630	}
				1631
Changpeng Fang	c996393	2015-12-18 20:04:28 +0000	[diff] [blame]	1632	// Make sure the register classes are correct.
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1633	for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1634	if (MI->getOperand(i).isFPImm()) {
				1635	ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
				1636	"all fp values to integers.";
				1637	return false;
				1638	}
				1639
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1640	int RegClass = Desc.OpInfo[i].RegClass;
				1641
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1642	switch (Desc.OpInfo[i].OperandType) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1643	case MCOI::OPERAND_REGISTER:
Matt Arsenault	63bef0d	2015-02-13 02:47:22 +0000	[diff] [blame]	1644	if (MI->getOperand(i).isImm()) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1645	ErrInfo = "Illegal immediate value for operand.";
				1646	return false;
				1647	}
				1648	break;
				1649	case AMDGPU::OPERAND_REG_IMM32:
				1650	break;
				1651	case AMDGPU::OPERAND_REG_INLINE_C:
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1652	if (isLiteralConstant(MI->getOperand(i),
				1653	RI.getRegClass(RegClass)->getSize())) {
				1654	ErrInfo = "Illegal immediate value for operand.";
				1655	return false;
Tom Stellard	a305f93	2014-07-02 20:53:44 +0000	[diff] [blame]	1656	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1657	break;
				1658	case MCOI::OPERAND_IMMEDIATE:
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1659	// Check if this operand is an immediate.
				1660	// FrameIndex operands will be replaced by immediates, so they are
				1661	// allowed.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1662	if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1663	ErrInfo = "Expected immediate, but got non-immediate";
				1664	return false;
				1665	}
				1666	// Fall-through
				1667	default:
				1668	continue;
				1669	}
				1670
				1671	if (!MI->getOperand(i).isReg())
				1672	continue;
				1673
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1674	if (RegClass != -1) {
				1675	unsigned Reg = MI->getOperand(i).getReg();
				1676	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1677	continue;
				1678
				1679	const TargetRegisterClass *RC = RI.getRegClass(RegClass);
				1680	if (!RC->contains(Reg)) {
				1681	ErrInfo = "Operand has incorrect register class.";
				1682	return false;
				1683	}
				1684	}
				1685	}
				1686
				1687
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1688	// Verify VOP*
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1689	if (isVOP1(MI) \|\| isVOP2(MI) \|\| isVOP3(MI) \|\| isVOPC(MI)) {
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1690	// Only look at the true operands. Only a real operand can use the constant
				1691	// bus, and we don't want to check pseudo-operands like the source modifier
				1692	// flags.
				1693	const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
				1694
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1695	unsigned ConstantBusCount = 0;
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1696	unsigned SGPRUsed = findImplicitSGPRRead(*MI);
				1697	if (SGPRUsed != AMDGPU::NoRegister)
				1698	++ConstantBusCount;
				1699
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1700	for (int OpIdx : OpIndices) {
				1701	if (OpIdx == -1)
				1702	break;
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1703	const MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1704	if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1705	if (MO.isReg()) {
				1706	if (MO.getReg() != SGPRUsed)
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1707	++ConstantBusCount;
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1708	SGPRUsed = MO.getReg();
				1709	} else {
				1710	++ConstantBusCount;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1711	}
				1712	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1713	}
				1714	if (ConstantBusCount > 1) {
				1715	ErrInfo = "VOP* instruction uses the constant bus more than once";
				1716	return false;
				1717	}
				1718	}
				1719
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1720	// Verify misc. restrictions on specific instructions.
				1721	if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 \|\|
				1722	Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Matt Arsenault	262407b	2014-09-24 02:17:09 +0000	[diff] [blame]	1723	const MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1724	const MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1725	const MachineOperand &Src2 = MI->getOperand(Src2Idx);
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1726	if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
				1727	if (!compareMachineOp(Src0, Src1) &&
				1728	!compareMachineOp(Src0, Src2)) {
				1729	ErrInfo = "v_div_scale_{f32\|f64} require src0 = src1 or src2";
				1730	return false;
				1731	}
				1732	}
				1733	}
				1734
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	1735	// Make sure we aren't losing exec uses in the td files. This mostly requires
				1736	// being careful when using let Uses to try to add other use registers.
				1737	if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	1738	if (!MI->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	1739	ErrInfo = "VALU instruction does not implicitly read exec mask";
				1740	return false;
				1741	}
				1742	}
				1743
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1744	return true;
				1745	}
				1746
Matt Arsenault	f14032a	2013-11-15 22:02:28 +0000	[diff] [blame]	1747	unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1748	switch (MI.getOpcode()) {
				1749	default: return AMDGPU::INSTRUCTION_LIST_END;
				1750	case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
				1751	case AMDGPU::COPY: return AMDGPU::COPY;
				1752	case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard	204e61b	2014-04-07 19:45:45 +0000	[diff] [blame]	1753	case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	1754	case AMDGPU::S_MOV_B32:
				1755	return MI.getOperand(1).isReg() ?
Tom Stellard	8c12fd9	2014-03-24 16:12:34 +0000	[diff] [blame]	1756	AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1757	case AMDGPU::S_ADD_I32:
				1758	case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1759	case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1760	case AMDGPU::S_SUB_I32:
				1761	case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1762	case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault	869cd07	2014-09-03 23:24:35 +0000	[diff] [blame]	1763	case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Matt Arsenault	8e2581b	2014-03-21 18:01:18 +0000	[diff] [blame]	1764	case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
				1765	case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
				1766	case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
				1767	case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
				1768	case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
				1769	case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
				1770	case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1771	case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
				1772	case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
				1773	case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
				1774	case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
				1775	case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
				1776	case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	1777	case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
				1778	case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	1779	case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
				1780	case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak	63a7b08	2015-03-24 13:40:21 +0000	[diff] [blame]	1781	case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault	43160e7	2014-06-18 17:13:57 +0000	[diff] [blame]	1782	case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault	2c33562	2014-04-09 07:16:16 +0000	[diff] [blame]	1783	case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	1784	case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	0cb92e1	2014-04-11 19:25:18 +0000	[diff] [blame]	1785	case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
				1786	case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
				1787	case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
				1788	case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
				1789	case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
				1790	case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1791	case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
				1792	case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
				1793	case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
				1794	case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
				1795	case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
				1796	case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	1797	case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault	295b86e	2014-06-17 17:36:27 +0000	[diff] [blame]	1798	case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault	8579601	2014-06-17 17:36:24 +0000	[diff] [blame]	1799	case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsak	d2af89d	2015-03-04 17:33:45 +0000	[diff] [blame]	1800	case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1801	case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
				1802	case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1803	}
				1804	}
				1805
				1806	bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
				1807	return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
				1808	}
				1809
				1810	const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
				1811	unsigned OpNo) const {
				1812	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				1813	const MCInstrDesc &Desc = get(MI.getOpcode());
				1814	if (MI.isVariadic() \|\| OpNo >= Desc.getNumOperands() \|\|
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1815	Desc.OpInfo[OpNo].RegClass == -1) {
				1816	unsigned Reg = MI.getOperand(OpNo).getReg();
				1817
				1818	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1819	return MRI.getRegClass(Reg);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1820	return RI.getPhysRegClass(Reg);
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1821	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1822
				1823	unsigned RCID = Desc.OpInfo[OpNo].RegClass;
				1824	return RI.getRegClass(RCID);
				1825	}
				1826
				1827	bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
				1828	switch (MI.getOpcode()) {
				1829	case AMDGPU::COPY:
				1830	case AMDGPU::REG_SEQUENCE:
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	1831	case AMDGPU::PHI:
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	1832	case AMDGPU::INSERT_SUBREG:
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1833	return RI.hasVGPRs(getOpRegClass(MI, 0));
				1834	default:
				1835	return RI.hasVGPRs(getOpRegClass(MI, OpNo));
				1836	}
				1837	}
				1838
				1839	void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
				1840	MachineBasicBlock::iterator I = MI;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1841	MachineBasicBlock *MBB = MI->getParent();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1842	MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1843	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1844	unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
				1845	const TargetRegisterClass *RC = RI.getRegClass(RCID);
				1846	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1847	if (MO.isReg())
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1848	Opcode = AMDGPU::COPY;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1849	else if (RI.isSGPRClass(RC))
Matt Arsenault	671a005	2013-11-14 10:08:50 +0000	[diff] [blame]	1850	Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1851
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1852
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1853	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1854	if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard	0c93c9e	2014-09-05 14:08:01 +0000	[diff] [blame]	1855	VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1856	else
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	1857	VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1858
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1859	unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1860	DebugLoc DL = MBB->findDebugLoc(I);
				1861	BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
				1862	.addOperand(MO);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1863	MO.ChangeToRegister(Reg, false);
				1864	}
				1865
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1866	unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
				1867	MachineRegisterInfo &MRI,
				1868	MachineOperand &SuperReg,
				1869	const TargetRegisterClass *SuperRC,
				1870	unsigned SubIdx,
				1871	const TargetRegisterClass *SubRC)
				1872	const {
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1873	MachineBasicBlock *MBB = MI->getParent();
				1874	DebugLoc DL = MI->getDebugLoc();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1875	unsigned SubReg = MRI.createVirtualRegister(SubRC);
				1876
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1877	if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
				1878	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1879	.addReg(SuperReg.getReg(), 0, SubIdx);
				1880	return SubReg;
				1881	}
				1882
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1883	// Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault	08d8494	2014-06-03 23:06:13 +0000	[diff] [blame]	1884	// value so we don't need to worry about merging its subreg index with the
				1885	// SubIdx passed to this function. The register coalescer should be able to
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1886	// eliminate this extra copy.
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1887	unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1888
Matt Arsenault	7480a0e	2014-11-17 21:11:37 +0000	[diff] [blame]	1889	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
				1890	.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
				1891
				1892	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1893	.addReg(NewSuperReg, 0, SubIdx);
				1894
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1895	return SubReg;
				1896	}
				1897
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	1898	MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
				1899	MachineBasicBlock::iterator MII,
				1900	MachineRegisterInfo &MRI,
				1901	MachineOperand &Op,
				1902	const TargetRegisterClass *SuperRC,
				1903	unsigned SubIdx,
				1904	const TargetRegisterClass *SubRC) const {
				1905	if (Op.isImm()) {
				1906	// XXX - Is there a better way to do this?
				1907	if (SubIdx == AMDGPU::sub0)
				1908	return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
				1909	if (SubIdx == AMDGPU::sub1)
				1910	return MachineOperand::CreateImm(Op.getImm() >> 32);
				1911
				1912	llvm_unreachable("Unhandled register index for immediate");
				1913	}
				1914
				1915	unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
				1916	SubIdx, SubRC);
				1917	return MachineOperand::CreateReg(SubReg, false);
				1918	}
				1919
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	1920	// Change the order of operands from (0, 1, 2) to (0, 2, 1)
				1921	void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
				1922	assert(Inst->getNumExplicitOperands() == 3);
				1923	MachineOperand Op1 = Inst->getOperand(1);
				1924	Inst->RemoveOperand(1);
				1925	Inst->addOperand(Op1);
				1926	}
				1927
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1928	bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
				1929	const MCOperandInfo &OpInfo,
				1930	const MachineOperand &MO) const {
				1931	if (!MO.isReg())
				1932	return false;
				1933
				1934	unsigned Reg = MO.getReg();
				1935	const TargetRegisterClass *RC =
				1936	TargetRegisterInfo::isVirtualRegister(Reg) ?
				1937	MRI.getRegClass(Reg) :
				1938	RI.getPhysRegClass(Reg);
				1939
Nicolai Haehnle	82fc962	2016-01-07 17:10:29 +0000	[diff] [blame]	1940	const SIRegisterInfo *TRI =
				1941	static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
				1942	RC = TRI->getSubRegClass(RC, MO.getSubReg());
				1943
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1944	// In order to be legal, the common sub-class must be equal to the
				1945	// class of the current operand. For example:
				1946	//
				1947	// v_mov_b32 s0 ; Operand defined as vsrc_32
				1948	// ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
				1949	//
				1950	// s_sendmsg 0, s0 ; Operand defined as m0reg
				1951	// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
				1952
				1953	return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
				1954	}
				1955
				1956	bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
				1957	const MCOperandInfo &OpInfo,
				1958	const MachineOperand &MO) const {
				1959	if (MO.isReg())
				1960	return isLegalRegOperand(MRI, OpInfo, MO);
				1961
				1962	// Handle non-register types that are treated like immediates.
				1963	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
				1964	return true;
				1965	}
				1966
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1967	bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
				1968	const MachineOperand *MO) const {
				1969	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1970	const MCInstrDesc &InstDesc = MI->getDesc();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1971	const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
				1972	const TargetRegisterClass *DefinedRC =
				1973	OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
				1974	if (!MO)
				1975	MO = &MI->getOperand(OpIdx);
				1976
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1977	if (isVALU(*MI) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1978	usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1979
				1980	RegSubRegPair SGPRUsed;
				1981	if (MO->isReg())
				1982	SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
				1983
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1984	for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
				1985	if (i == OpIdx)
				1986	continue;
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1987	const MachineOperand &Op = MI->getOperand(i);
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1988	if (Op.isReg() &&
				1989	(Op.getReg() != SGPRUsed.Reg \|\| Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1990	usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1991	return false;
				1992	}
				1993	}
				1994	}
				1995
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1996	if (MO->isReg()) {
				1997	assert(DefinedRC);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1998	return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1999	}
				2000
				2001
				2002	// Handle non-register types that are treated like immediates.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	2003	assert(MO->isImm() \|\| MO->isTargetIndex() \|\| MO->isFI());
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2004
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	2005	if (!DefinedRC) {
				2006	// This operand expects an immediate.
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2007	return true;
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	2008	}
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2009
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2010	return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2011	}
				2012
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	2013	void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
				2014	MachineInstr *MI) const {
				2015	unsigned Opc = MI->getOpcode();
				2016	const MCInstrDesc &InstrDesc = get(Opc);
				2017
				2018	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				2019	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				2020
				2021	// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
				2022	// we need to only have one constant bus use.
				2023	//
				2024	// Note we do not need to worry about literal constants here. They are
				2025	// disabled for the operand type for instructions because they will always
				2026	// violate the one constant bus use rule.
				2027	bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
				2028	if (HasImplicitSGPR) {
				2029	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				2030	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				2031
				2032	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
				2033	legalizeOpWithMove(MI, Src0Idx);
				2034	}
				2035
				2036	// VOP2 src0 instructions support all operand types, so we don't need to check
				2037	// their legality. If src1 is already legal, we don't need to do anything.
				2038	if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
				2039	return;
				2040
				2041	// We do not use commuteInstruction here because it is too aggressive and will
				2042	// commute if it is possible. We only want to commute here if it improves
				2043	// legality. This can be called a fairly large number of times so don't waste
				2044	// compile time pointlessly swapping and checking legality again.
				2045	if (HasImplicitSGPR \|\| !MI->isCommutable()) {
				2046	legalizeOpWithMove(MI, Src1Idx);
				2047	return;
				2048	}
				2049
				2050	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				2051	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				2052
				2053	// If src0 can be used as src1, commuting will make the operands legal.
				2054	// Otherwise we have to give up and insert a move.
				2055	//
				2056	// TODO: Other immediate-like operand kinds could be commuted if there was a
				2057	// MachineOperand::ChangeTo* for them.
				2058	if ((!Src1.isImm() && !Src1.isReg()) \|\|
				2059	!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
				2060	legalizeOpWithMove(MI, Src1Idx);
				2061	return;
				2062	}
				2063
				2064	int CommutedOpc = commuteOpcode(*MI);
				2065	if (CommutedOpc == -1) {
				2066	legalizeOpWithMove(MI, Src1Idx);
				2067	return;
				2068	}
				2069
				2070	MI->setDesc(get(CommutedOpc));
				2071
				2072	unsigned Src0Reg = Src0.getReg();
				2073	unsigned Src0SubReg = Src0.getSubReg();
				2074	bool Src0Kill = Src0.isKill();
				2075
				2076	if (Src1.isImm())
				2077	Src0.ChangeToImmediate(Src1.getImm());
				2078	else if (Src1.isReg()) {
				2079	Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
				2080	Src0.setSubReg(Src1.getSubReg());
				2081	} else
				2082	llvm_unreachable("Should only have register or immediate operands");
				2083
				2084	Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
				2085	Src1.setSubReg(Src0SubReg);
				2086	}
				2087
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	2088	// Legalize VOP3 operands. Because all operand types are supported for any
				2089	// operand, and since literal constants are not allowed and should never be
				2090	// seen, we only need to worry about inserting copies if we use multiple SGPR
				2091	// operands.
				2092	void SIInstrInfo::legalizeOperandsVOP3(
				2093	MachineRegisterInfo &MRI,
				2094	MachineInstr *MI) const {
				2095	unsigned Opc = MI->getOpcode();
				2096
				2097	int VOP3Idx[3] = {
				2098	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
				2099	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
				2100	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
				2101	};
				2102
				2103	// Find the one SGPR operand we are allowed to use.
				2104	unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
				2105
				2106	for (unsigned i = 0; i < 3; ++i) {
				2107	int Idx = VOP3Idx[i];
				2108	if (Idx == -1)
				2109	break;
				2110	MachineOperand &MO = MI->getOperand(Idx);
				2111
				2112	// We should never see a VOP3 instruction with an illegal immediate operand.
				2113	if (!MO.isReg())
				2114	continue;
				2115
				2116	if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
				2117	continue; // VGPRs are legal
				2118
				2119	if (SGPRReg == AMDGPU::NoRegister \|\| SGPRReg == MO.getReg()) {
				2120	SGPRReg = MO.getReg();
				2121	// We can use one SGPR in each VOP3 instruction.
				2122	continue;
				2123	}
				2124
				2125	// If we make it this far, then the operand is not legal and we must
				2126	// legalize it.
				2127	legalizeOpWithMove(MI, Idx);
				2128	}
				2129	}
				2130
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	2131	unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
				2132	MachineRegisterInfo &MRI) const {
				2133	const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
				2134	const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
				2135	unsigned DstReg = MRI.createVirtualRegister(SRC);
				2136	unsigned SubRegs = VRC->getSize() / 4;
				2137
				2138	SmallVector<unsigned, 8> SRegs;
				2139	for (unsigned i = 0; i < SubRegs; ++i) {
				2140	unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2141	BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
				2142	get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
				2143	.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
				2144	SRegs.push_back(SGPR);
				2145	}
				2146
				2147	MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI,
				2148	UseMI->getDebugLoc(),
				2149	get(AMDGPU::REG_SEQUENCE), DstReg);
				2150	for (unsigned i = 0; i < SubRegs; ++i) {
				2151	MIB.addReg(SRegs[i]);
				2152	MIB.addImm(RI.getSubRegFromChannel(i));
				2153	}
				2154	return DstReg;
				2155	}
				2156
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	2157	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
				2158	MachineInstr *MI) const {
				2159
				2160	// If the pointer is store in VGPRs, then we need to move them to
				2161	// SGPRs using v_readfirstlane. This is safe because we only select
				2162	// loads with uniform pointers to SMRD instruction so we know the
				2163	// pointer value is uniform.
				2164	MachineOperand SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
				2165	if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
				2166	unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
				2167	SBase->setReg(SGPR);
				2168	}
				2169	}
				2170
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2171	void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
				2172	MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2173
				2174	// Legalize VOP2
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2175	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	2176	legalizeOperandsVOP2(MRI, MI);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2177	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2178	}
				2179
				2180	// Legalize VOP3
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	2181	if (isVOP3(*MI)) {
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	2182	legalizeOperandsVOP3(MRI, MI);
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2183	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2184	}
				2185
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	2186	// Legalize SMRD
				2187	if (isSMRD(*MI)) {
				2188	legalizeOperandsSMRD(MRI, MI);
				2189	return;
				2190	}
				2191
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2192	// Legalize REG_SEQUENCE and PHI
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2193	// The register class of the operands much be the same type as the register
				2194	// class of the output.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2195	if (MI->getOpcode() == AMDGPU::PHI) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	2196	const TargetRegisterClass RC = nullptr, SRC = nullptr, *VRC = nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2197	for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
				2198	if (!MI->getOperand(i).isReg() \|\|
				2199	!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
				2200	continue;
				2201	const TargetRegisterClass *OpRC =
				2202	MRI.getRegClass(MI->getOperand(i).getReg());
				2203	if (RI.hasVGPRs(OpRC)) {
				2204	VRC = OpRC;
				2205	} else {
				2206	SRC = OpRC;
				2207	}
				2208	}
				2209
				2210	// If any of the operands are VGPR registers, then they all most be
				2211	// otherwise we will create illegal VGPR->SGPR copies when legalizing
				2212	// them.
				2213	if (VRC \|\| !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
				2214	if (!VRC) {
				2215	assert(SRC);
				2216	VRC = RI.getEquivalentVGPRClass(SRC);
				2217	}
				2218	RC = VRC;
				2219	} else {
				2220	RC = SRC;
				2221	}
				2222
				2223	// Update all the operands so they have the same type.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2224	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2225	MachineOperand &Op = MI->getOperand(I);
				2226	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2227	continue;
				2228	unsigned DstReg = MRI.createVirtualRegister(RC);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2229
				2230	// MI is a PHI instruction.
				2231	MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
				2232	MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
				2233
				2234	BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2235	.addOperand(Op);
				2236	Op.setReg(DstReg);
				2237	}
				2238	}
				2239
				2240	// REG_SEQUENCE doesn't really require operand legalization, but if one has a
				2241	// VGPR dest type and SGPR sources, insert copies so all operands are
				2242	// VGPRs. This seems to help operand folding / the register coalescer.
				2243	if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
				2244	MachineBasicBlock *MBB = MI->getParent();
				2245	const TargetRegisterClass DstRC = getOpRegClass(MI, 0);
				2246	if (RI.hasVGPRs(DstRC)) {
				2247	// Update all the operands so they are VGPR register classes. These may
				2248	// not be the same register class because REG_SEQUENCE supports mixing
				2249	// subregister index types e.g. sub0_sub1 + sub2 + sub3
				2250	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2251	MachineOperand &Op = MI->getOperand(I);
				2252	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
				2253	continue;
				2254
				2255	const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
				2256	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
				2257	if (VRC == OpRC)
				2258	continue;
				2259
				2260	unsigned DstReg = MRI.createVirtualRegister(VRC);
				2261
				2262	BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2263	.addOperand(Op);
				2264
				2265	Op.setReg(DstReg);
				2266	Op.setIsKill();
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2267	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2268	}
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2269
				2270	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2271	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2272
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	2273	// Legalize INSERT_SUBREG
				2274	// src0 must have the same register class as dst
				2275	if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
				2276	unsigned Dst = MI->getOperand(0).getReg();
				2277	unsigned Src0 = MI->getOperand(1).getReg();
				2278	const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
				2279	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
				2280	if (DstRC != Src0RC) {
				2281	MachineBasicBlock &MBB = *MI->getParent();
				2282	unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
				2283	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
				2284	.addReg(Src0);
				2285	MI->getOperand(1).setReg(NewSrc0);
				2286	}
				2287	return;
				2288	}
				2289
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	2290	// Legalize MIMG
				2291	if (isMIMG(*MI)) {
				2292	MachineOperand SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
				2293	if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
				2294	unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
				2295	SRsrc->setReg(SGPR);
				2296	}
				2297
				2298	MachineOperand SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
				2299	if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
				2300	unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
				2301	SSamp->setReg(SGPR);
				2302	}
				2303	return;
				2304	}
				2305
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2306	// Legalize MUBUF* instructions
				2307	// FIXME: If we start using the non-addr64 instructions for compute, we
				2308	// may need to legalize them here.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2309	int SRsrcIdx =
				2310	AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
				2311	if (SRsrcIdx != -1) {
				2312	// We have an MUBUF instruction
				2313	MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
				2314	unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
				2315	if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
				2316	RI.getRegClass(SRsrcRC))) {
				2317	// The operands are legal.
				2318	// FIXME: We may need to legalize operands besided srsrc.
				2319	return;
				2320	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2321
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2322	MachineBasicBlock &MBB = *MI->getParent();
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2323
Eric Christopher	572e03a	2015-06-19 01:53:21 +0000	[diff] [blame]	2324	// Extract the ptr from the resource descriptor.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2325	unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
				2326	&AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2327
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2328	// Create an empty resource descriptor
				2329	unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				2330	unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2331	unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2332	unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2333	uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2334
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2335	// Zero64 = 0
				2336	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
				2337	Zero64)
				2338	.addImm(0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2339
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2340	// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
				2341	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2342	SRsrcFormatLo)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2343	.addImm(RsrcDataFormat & 0xFFFFFFFF);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2344
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2345	// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
				2346	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2347	SRsrcFormatHi)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2348	.addImm(RsrcDataFormat >> 32);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2349
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2350	// NewSRsrc = {Zero64, SRsrcFormat}
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2351	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
				2352	.addReg(Zero64)
				2353	.addImm(AMDGPU::sub0_sub1)
				2354	.addReg(SRsrcFormatLo)
				2355	.addImm(AMDGPU::sub2)
				2356	.addReg(SRsrcFormatHi)
				2357	.addImm(AMDGPU::sub3);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2358
				2359	MachineOperand VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
				2360	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2361	if (VAddr) {
				2362	// This is already an ADDR64 instruction so we need to add the pointer
				2363	// extracted from the resource descriptor to the current value of VAddr.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2364	unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2365	unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2366
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2367	// NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2368	DebugLoc DL = MI->getDebugLoc();
				2369	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2370	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2371	.addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2372
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2373	// NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2374	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2375	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2376	.addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2377
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2378	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2379	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2380	.addReg(NewVAddrLo)
				2381	.addImm(AMDGPU::sub0)
				2382	.addReg(NewVAddrHi)
				2383	.addImm(AMDGPU::sub1);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2384	} else {
				2385	// This instructions is the _OFFSET variant, so we need to convert it to
				2386	// ADDR64.
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2387	assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
				2388	< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
				2389	"FIXME: Need to emit flat atomics here");
				2390
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2391	MachineOperand VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
				2392	MachineOperand Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				2393	MachineOperand SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2394	unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2395
				2396	// Atomics rith return have have an additional tied operand and are
				2397	// missing some of the special bits.
				2398	MachineOperand VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
				2399	MachineInstr *Addr64;
				2400
				2401	if (!VDataIn) {
				2402	// Regular buffer load / store.
				2403	MachineInstrBuilder MIB
				2404	= BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2405	.addOperand(*VData)
				2406	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2407	// This will be replaced later
				2408	// with the new value of vaddr.
				2409	.addOperand(*SRsrc)
				2410	.addOperand(*SOffset)
				2411	.addOperand(*Offset);
				2412
				2413	// Atomics do not have this operand.
				2414	if (const MachineOperand *GLC
				2415	= getNamedOperand(*MI, AMDGPU::OpName::glc)) {
				2416	MIB.addImm(GLC->getImm());
				2417	}
				2418
				2419	MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
				2420
				2421	if (const MachineOperand *TFE
				2422	= getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
				2423	MIB.addImm(TFE->getImm());
				2424	}
				2425
				2426	MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2427	Addr64 = MIB;
				2428	} else {
				2429	// Atomics with return.
				2430	Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2431	.addOperand(*VData)
				2432	.addOperand(*VDataIn)
				2433	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2434	// This will be replaced later
				2435	// with the new value of vaddr.
				2436	.addOperand(*SRsrc)
				2437	.addOperand(*SOffset)
				2438	.addOperand(*Offset)
				2439	.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
				2440	.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2441	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2442
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2443	MI->removeFromParent();
				2444	MI = Addr64;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2445
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2446	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2447	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2448	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
				2449	.addImm(AMDGPU::sub0)
				2450	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
				2451	.addImm(AMDGPU::sub1);
				2452
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2453	VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
				2454	SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2455	}
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2456
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2457	// Update the instruction to use NewVaddr
				2458	VAddr->setReg(NewVAddr);
				2459	// Update the instruction to use NewSRsrc
				2460	SRsrc->setReg(NewSRsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2461	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2462	}
				2463
				2464	void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
				2465	SmallVector<MachineInstr *, 128> Worklist;
				2466	Worklist.push_back(&TopInst);
				2467
				2468	while (!Worklist.empty()) {
				2469	MachineInstr *Inst = Worklist.pop_back_val();
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2470	MachineBasicBlock *MBB = Inst->getParent();
				2471	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				2472
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2473	unsigned Opcode = Inst->getOpcode();
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2474	unsigned NewOpcode = getVALUOp(*Inst);
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2475
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2476	// Handle some special cases
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2477	switch (Opcode) {
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2478	default:
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2479	break;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2480	case AMDGPU::S_AND_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2481	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2482	Inst->eraseFromParent();
				2483	continue;
				2484
				2485	case AMDGPU::S_OR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2486	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2487	Inst->eraseFromParent();
				2488	continue;
				2489
				2490	case AMDGPU::S_XOR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2491	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2492	Inst->eraseFromParent();
				2493	continue;
				2494
				2495	case AMDGPU::S_NOT_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2496	splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2497	Inst->eraseFromParent();
				2498	continue;
				2499
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2500	case AMDGPU::S_BCNT1_I32_B64:
				2501	splitScalar64BitBCNT(Worklist, Inst);
				2502	Inst->eraseFromParent();
				2503	continue;
				2504
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2505	case AMDGPU::S_BFE_I64: {
				2506	splitScalar64BitBFE(Worklist, Inst);
				2507	Inst->eraseFromParent();
				2508	continue;
				2509	}
				2510
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2511	case AMDGPU::S_LSHL_B32:
				2512	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2513	NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
				2514	swapOperands(Inst);
				2515	}
				2516	break;
				2517	case AMDGPU::S_ASHR_I32:
				2518	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2519	NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
				2520	swapOperands(Inst);
				2521	}
				2522	break;
				2523	case AMDGPU::S_LSHR_B32:
				2524	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2525	NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
				2526	swapOperands(Inst);
				2527	}
				2528	break;
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	2529	case AMDGPU::S_LSHL_B64:
				2530	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2531	NewOpcode = AMDGPU::V_LSHLREV_B64;
				2532	swapOperands(Inst);
				2533	}
				2534	break;
				2535	case AMDGPU::S_ASHR_I64:
				2536	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2537	NewOpcode = AMDGPU::V_ASHRREV_I64;
				2538	swapOperands(Inst);
				2539	}
				2540	break;
				2541	case AMDGPU::S_LSHR_B64:
				2542	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2543	NewOpcode = AMDGPU::V_LSHRREV_B64;
				2544	swapOperands(Inst);
				2545	}
				2546	break;
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2547
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2548	case AMDGPU::S_ABS_I32:
				2549	lowerScalarAbs(Worklist, Inst);
				2550	Inst->eraseFromParent();
				2551	continue;
				2552
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2553	case AMDGPU::S_CBRANCH_SCC0:
				2554	case AMDGPU::S_CBRANCH_SCC1:
				2555	// Clear unused bits of vcc
				2556	BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC)
				2557	.addReg(AMDGPU::EXEC)
				2558	.addReg(AMDGPU::VCC);
				2559	break;
				2560
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2561	case AMDGPU::S_BFE_U64:
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2562	case AMDGPU::S_BFM_B64:
				2563	llvm_unreachable("Moving this op to VALU not implemented");
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2564	}
				2565
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2566	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
				2567	// We cannot move this instruction to the VALU, so we should try to
				2568	// legalize its operands instead.
				2569	legalizeOperands(Inst);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2570	continue;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2571	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2572
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2573	// Use the new VALU Opcode.
				2574	const MCInstrDesc &NewDesc = get(NewOpcode);
				2575	Inst->setDesc(NewDesc);
				2576
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2577	// Remove any references to SCC. Vector instructions can't read from it, and
				2578	// We're just about to add the implicit use / defs of VCC, and we don't want
				2579	// both.
				2580	for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
				2581	MachineOperand &Op = Inst->getOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2582	if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2583	Inst->RemoveOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2584	addSCCDefUsersToVALUWorklist(Inst, Worklist);
				2585	}
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2586	}
				2587
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2588	if (Opcode == AMDGPU::S_SEXT_I32_I8 \|\| Opcode == AMDGPU::S_SEXT_I32_I16) {
				2589	// We are converting these to a BFE, so we need to add the missing
				2590	// operands for the size and offset.
				2591	unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
				2592	Inst->addOperand(MachineOperand::CreateImm(0));
				2593	Inst->addOperand(MachineOperand::CreateImm(Size));
				2594
Matt Arsenault	b5b5110	2014-06-10 19:18:21 +0000	[diff] [blame]	2595	} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
				2596	// The VALU version adds the second operand to the result, so insert an
				2597	// extra 0 operand.
				2598	Inst->addOperand(MachineOperand::CreateImm(0));
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2599	}
				2600
Alex Lorenz	b4d0d6a	2015-07-31 23:30:09 +0000	[diff] [blame]	2601	Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2602
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2603	if (Opcode == AMDGPU::S_BFE_I32 \|\| Opcode == AMDGPU::S_BFE_U32) {
				2604	const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
				2605	// If we need to move this to VGPRs, we need to unpack the second operand
				2606	// back into the 2 separate ones for bit offset and width.
				2607	assert(OffsetWidthOp.isImm() &&
				2608	"Scalar BFE is only implemented for constant width and offset");
				2609	uint32_t Imm = OffsetWidthOp.getImm();
				2610
				2611	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2612	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2613	Inst->RemoveOperand(2); // Remove old immediate.
				2614	Inst->addOperand(MachineOperand::CreateImm(Offset));
Vincent Lejeune	94af31f	2014-05-10 19:18:33 +0000	[diff] [blame]	2615	Inst->addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2616	}
				2617
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2618	bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef();
				2619	unsigned NewDstReg = AMDGPU::NoRegister;
				2620	if (HasDst) {
				2621	// Update the destination register class.
				2622	const TargetRegisterClass NewDstRC = getDestEquivalentVGPRClass(Inst);
				2623	if (!NewDstRC)
				2624	continue;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2625
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2626	unsigned DstReg = Inst->getOperand(0).getReg();
				2627	NewDstReg = MRI.createVirtualRegister(NewDstRC);
				2628	MRI.replaceRegWith(DstReg, NewDstReg);
				2629	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2630
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	2631	// Legalize the operands
				2632	legalizeOperands(Inst);
				2633
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2634	if (HasDst)
				2635	addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2636	}
				2637	}
				2638
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2639	//===----------------------------------------------------------------------===//
				2640	// Indirect addressing callbacks
				2641	//===----------------------------------------------------------------------===//
				2642
Tom Stellard	26a3b67	2013-10-22 18:19:10 +0000	[diff] [blame]	2643	const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	2644	return &AMDGPU::VGPR_32RegClass;
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2645	}
				2646
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2647	void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
				2648	MachineInstr *Inst) const {
				2649	MachineBasicBlock &MBB = *Inst->getParent();
				2650	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2651	MachineBasicBlock::iterator MII = Inst;
				2652	DebugLoc DL = Inst->getDebugLoc();
				2653
				2654	MachineOperand &Dest = Inst->getOperand(0);
				2655	MachineOperand &Src = Inst->getOperand(1);
				2656	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2657	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2658
				2659	BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
				2660	.addImm(0)
				2661	.addReg(Src.getReg());
				2662
				2663	BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
				2664	.addReg(Src.getReg())
				2665	.addReg(TmpReg);
				2666
				2667	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2668	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				2669	}
				2670
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2671	void SIInstrInfo::splitScalar64BitUnaryOp(
				2672	SmallVectorImpl<MachineInstr *> &Worklist,
				2673	MachineInstr *Inst,
				2674	unsigned Opcode) const {
				2675	MachineBasicBlock &MBB = *Inst->getParent();
				2676	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2677
				2678	MachineOperand &Dest = Inst->getOperand(0);
				2679	MachineOperand &Src0 = Inst->getOperand(1);
				2680	DebugLoc DL = Inst->getDebugLoc();
				2681
				2682	MachineBasicBlock::iterator MII = Inst;
				2683
				2684	const MCInstrDesc &InstDesc = get(Opcode);
				2685	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2686	MRI.getRegClass(Src0.getReg()) :
				2687	&AMDGPU::SGPR_32RegClass;
				2688
				2689	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2690
				2691	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2692	AMDGPU::sub0, Src0SubRC);
				2693
				2694	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2695	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2696	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2697
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2698	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
				2699	BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2700	.addOperand(SrcReg0Sub0);
				2701
				2702	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2703	AMDGPU::sub1, Src0SubRC);
				2704
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2705	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
				2706	BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2707	.addOperand(SrcReg0Sub1);
				2708
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2709	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2710	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2711	.addReg(DestSub0)
				2712	.addImm(AMDGPU::sub0)
				2713	.addReg(DestSub1)
				2714	.addImm(AMDGPU::sub1);
				2715
				2716	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2717
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2718	// We don't need to legalizeOperands here because for a single operand, src0
				2719	// will support any kind of input.
				2720
				2721	// Move all users of this moved value.
				2722	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2723	}
				2724
				2725	void SIInstrInfo::splitScalar64BitBinaryOp(
				2726	SmallVectorImpl<MachineInstr *> &Worklist,
				2727	MachineInstr *Inst,
				2728	unsigned Opcode) const {
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2729	MachineBasicBlock &MBB = *Inst->getParent();
				2730	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2731
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2732	MachineOperand &Dest = Inst->getOperand(0);
				2733	MachineOperand &Src0 = Inst->getOperand(1);
				2734	MachineOperand &Src1 = Inst->getOperand(2);
				2735	DebugLoc DL = Inst->getDebugLoc();
				2736
				2737	MachineBasicBlock::iterator MII = Inst;
				2738
				2739	const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2740	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2741	MRI.getRegClass(Src0.getReg()) :
				2742	&AMDGPU::SGPR_32RegClass;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2743
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2744	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2745	const TargetRegisterClass *Src1RC = Src1.isReg() ?
				2746	MRI.getRegClass(Src1.getReg()) :
				2747	&AMDGPU::SGPR_32RegClass;
				2748
				2749	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				2750
				2751	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2752	AMDGPU::sub0, Src0SubRC);
				2753	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2754	AMDGPU::sub0, Src1SubRC);
				2755
				2756	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2757	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2758	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2759
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2760	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2761	MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2762	.addOperand(SrcReg0Sub0)
				2763	.addOperand(SrcReg1Sub0);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2764
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2765	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2766	AMDGPU::sub1, Src0SubRC);
				2767	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2768	AMDGPU::sub1, Src1SubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2769
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2770	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2771	MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2772	.addOperand(SrcReg0Sub1)
				2773	.addOperand(SrcReg1Sub1);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2774
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2775	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2776	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2777	.addReg(DestSub0)
				2778	.addImm(AMDGPU::sub0)
				2779	.addReg(DestSub1)
				2780	.addImm(AMDGPU::sub1);
				2781
				2782	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2783
				2784	// Try to legalize the operands in case we need to swap the order to keep it
				2785	// valid.
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2786	legalizeOperands(LoHalf);
				2787	legalizeOperands(HiHalf);
				2788
				2789	// Move all users of this moved vlaue.
				2790	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2791	}
				2792
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2793	void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
				2794	MachineInstr *Inst) const {
				2795	MachineBasicBlock &MBB = *Inst->getParent();
				2796	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2797
				2798	MachineBasicBlock::iterator MII = Inst;
				2799	DebugLoc DL = Inst->getDebugLoc();
				2800
				2801	MachineOperand &Dest = Inst->getOperand(0);
				2802	MachineOperand &Src = Inst->getOperand(1);
				2803
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	2804	const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2805	const TargetRegisterClass *SrcRC = Src.isReg() ?
				2806	MRI.getRegClass(Src.getReg()) :
				2807	&AMDGPU::SGPR_32RegClass;
				2808
				2809	unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2810	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2811
				2812	const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
				2813
				2814	MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2815	AMDGPU::sub0, SrcSubRC);
				2816	MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2817	AMDGPU::sub1, SrcSubRC);
				2818
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2819	BuildMI(MBB, MII, DL, InstDesc, MidReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2820	.addOperand(SrcRegSub0)
				2821	.addImm(0);
				2822
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2823	BuildMI(MBB, MII, DL, InstDesc, ResultReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2824	.addOperand(SrcRegSub1)
				2825	.addReg(MidReg);
				2826
				2827	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2828
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2829	// We don't need to legalize operands here. src0 for etiher instruction can be
				2830	// an SGPR, and the second input is unused or determined here.
				2831	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2832	}
				2833
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2834	void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
				2835	MachineInstr *Inst) const {
				2836	MachineBasicBlock &MBB = *Inst->getParent();
				2837	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2838	MachineBasicBlock::iterator MII = Inst;
				2839	DebugLoc DL = Inst->getDebugLoc();
				2840
				2841	MachineOperand &Dest = Inst->getOperand(0);
				2842	uint32_t Imm = Inst->getOperand(2).getImm();
				2843	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2844	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
				2845
Matt Arsenault	6ad3426	2014-11-14 18:40:49 +0000	[diff] [blame]	2846	(void) Offset;
				2847
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2848	// Only sext_inreg cases handled.
				2849	assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
				2850	BitWidth <= 32 &&
				2851	Offset == 0 &&
				2852	"Not implemented");
				2853
				2854	if (BitWidth < 32) {
				2855	unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2856	unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2857	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2858
				2859	BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
				2860	.addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
				2861	.addImm(0)
				2862	.addImm(BitWidth);
				2863
				2864	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
				2865	.addImm(31)
				2866	.addReg(MidRegLo);
				2867
				2868	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2869	.addReg(MidRegLo)
				2870	.addImm(AMDGPU::sub0)
				2871	.addReg(MidRegHi)
				2872	.addImm(AMDGPU::sub1);
				2873
				2874	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2875	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2876	return;
				2877	}
				2878
				2879	MachineOperand &Src = Inst->getOperand(1);
				2880	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2881	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2882
				2883	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
				2884	.addImm(31)
				2885	.addReg(Src.getReg(), 0, AMDGPU::sub0);
				2886
				2887	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2888	.addReg(Src.getReg(), 0, AMDGPU::sub0)
				2889	.addImm(AMDGPU::sub0)
				2890	.addReg(TmpReg)
				2891	.addImm(AMDGPU::sub1);
				2892
				2893	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2894	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2895	}
				2896
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2897	void SIInstrInfo::addUsersToMoveToVALUWorklist(
				2898	unsigned DstReg,
				2899	MachineRegisterInfo &MRI,
				2900	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2901	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
				2902	E = MRI.use_end(); I != E; ++I) {
				2903	MachineInstr &UseMI = *I->getParent();
				2904	if (!canReadVGPR(UseMI, I.getOperandNo())) {
				2905	Worklist.push_back(&UseMI);
				2906	}
				2907	}
				2908	}
				2909
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2910	void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst,
				2911	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2912	// This assumes that all the users of SCC are in the same block
				2913	// as the SCC def.
				2914	for (MachineBasicBlock::iterator I = SCCDefInst,
				2915	E = SCCDefInst->getParent()->end(); I != E; ++I) {
				2916
				2917	// Exit if we find another SCC def.
				2918	if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
				2919	return;
				2920
				2921	if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
				2922	Worklist.push_back(I);
				2923	}
				2924	}
				2925
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	2926	const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
				2927	const MachineInstr &Inst) const {
				2928	const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
				2929
				2930	switch (Inst.getOpcode()) {
				2931	// For target instructions, getOpRegClass just returns the virtual register
				2932	// class associated with the operand, so we need to find an equivalent VGPR
				2933	// register class in order to move the instruction to the VALU.
				2934	case AMDGPU::COPY:
				2935	case AMDGPU::PHI:
				2936	case AMDGPU::REG_SEQUENCE:
				2937	case AMDGPU::INSERT_SUBREG:
				2938	if (RI.hasVGPRs(NewDstRC))
				2939	return nullptr;
				2940
				2941	NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
				2942	if (!NewDstRC)
				2943	return nullptr;
				2944	return NewDstRC;
				2945	default:
				2946	return NewDstRC;
				2947	}
				2948	}
				2949
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2950	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2951	unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
				2952	int OpIndices[3]) const {
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2953	const MCInstrDesc &Desc = MI->getDesc();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2954
				2955	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2956	//
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2957	// First we need to consider the instruction's operand requirements before
				2958	// legalizing. Some operands are required to be SGPRs, such as implicit uses
				2959	// of VCC, but we are still bound by the constant bus requirement to only use
				2960	// one.
				2961	//
				2962	// If the operand's class is an SGPR, we can never move it.
				2963
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2964	unsigned SGPRReg = findImplicitSGPRRead(*MI);
				2965	if (SGPRReg != AMDGPU::NoRegister)
				2966	return SGPRReg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2967
				2968	unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
				2969	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				2970
				2971	for (unsigned i = 0; i < 3; ++i) {
				2972	int Idx = OpIndices[i];
				2973	if (Idx == -1)
				2974	break;
				2975
				2976	const MachineOperand &MO = MI->getOperand(Idx);
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2977	if (!MO.isReg())
				2978	continue;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2979
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2980	// Is this operand statically required to be an SGPR based on the operand
				2981	// constraints?
				2982	const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
				2983	bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
				2984	if (IsRequiredSGPR)
				2985	return MO.getReg();
				2986
				2987	// If this could be a VGPR or an SGPR, Check the dynamic register class.
				2988	unsigned Reg = MO.getReg();
				2989	const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
				2990	if (RI.isSGPRClass(RegRC))
				2991	UsedSGPRs[i] = Reg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2992	}
				2993
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2994	// We don't have a required SGPR operand, so we have a bit more freedom in
				2995	// selecting operands to move.
				2996
				2997	// Try to select the most used SGPR. If an SGPR is equal to one of the
				2998	// others, we choose that.
				2999	//
				3000	// e.g.
				3001	// V_FMA_F32 v0, s0, s0, s0 -> No moves
				3002	// V_FMA_F32 v0, s0, s1, s0 -> Move s1
				3003
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	3004	// TODO: If some of the operands are 64-bit SGPRs and some 32, we should
				3005	// prefer those.
				3006
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	3007	if (UsedSGPRs[0] != AMDGPU::NoRegister) {
				3008	if (UsedSGPRs[0] == UsedSGPRs[1] \|\| UsedSGPRs[0] == UsedSGPRs[2])
				3009	SGPRReg = UsedSGPRs[0];
				3010	}
				3011
				3012	if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
				3013	if (UsedSGPRs[1] == UsedSGPRs[2])
				3014	SGPRReg = UsedSGPRs[1];
				3015	}
				3016
				3017	return SGPRReg;
				3018	}
				3019
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	3020	void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
				3021	const MachineFunction &MF) const {
				3022	int End = getIndirectIndexEnd(MF);
				3023	int Begin = getIndirectIndexBegin(MF);
				3024
				3025	if (End == -1)
				3026	return;
				3027
				3028
				3029	for (int Index = Begin; Index <= End; ++Index)
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	3030	Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	3031
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	3032	for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	3033	Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
				3034
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	3035	for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	3036	Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
				3037
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	3038	for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	3039	Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
				3040
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	3041	for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	3042	Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
				3043
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	3044	for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	3045	Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	3046	}
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	3047
Tom Stellard	6407e1e	2014-08-01 00:32:33 +0000	[diff] [blame]	3048	MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	3049	unsigned OperandName) const {
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	3050	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
				3051	if (Idx == -1)
				3052	return nullptr;
				3053
				3054	return &MI.getOperand(Idx);
				3055	}
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	3056
				3057	uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
				3058	uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	3059	if (ST.isAmdHsaOS()) {
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	3060	RsrcDataFormat \|= (1ULL << 56);
				3061
Michel Danzer	beb79ce	2016-03-16 09:10:35 +0000	[diff] [blame]	3062	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				3063	// Set MTYPE = 2
				3064	RsrcDataFormat \|= (2ULL << 59);
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	3065	}
				3066
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	3067	return RsrcDataFormat;
				3068	}
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	3069
				3070	uint64_t SIInstrInfo::getScratchRsrcWords23() const {
				3071	uint64_t Rsrc23 = getDefaultRsrcDataFormat() \|
				3072	AMDGPU::RSRC_TID_ENABLE \|
				3073	0xffffffff; // Size;
				3074
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	3075	uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
				3076
				3077	Rsrc23 \|= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
				3078
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	3079	// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
				3080	// Clear them unless we want a huge stride.
				3081	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				3082	Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
				3083
				3084	return Rsrc23;
				3085	}
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	3086
				3087	bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
				3088	unsigned Opc = MI->getOpcode();
				3089
				3090	return isSMRD(Opc);
				3091	}
				3092
				3093	bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
				3094	unsigned Opc = MI->getOpcode();
				3095
				3096	return isMUBUF(Opc) \|\| isMTBUF(Opc) \|\| isMIMG(Opc);
				3097	}
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	3098
				3099	ArrayRef<std::pair<int, const char *>>
				3100	SIInstrInfo::getSerializableTargetIndices() const {
				3101	static const std::pair<int, const char *> TargetIndices[] = {
				3102	{AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
				3103	{AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
				3104	{AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
				3105	{AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
				3106	{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
				3107	return makeArrayRef(TargetIndices);
				3108	}
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	3109
				3110	/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
				3111	/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
				3112	ScheduleHazardRecognizer *
				3113	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
				3114	const ScheduleDAG *DAG) const {
				3115	return new GCNHazardRecognizer(DAG->MF);
				3116	}
				3117
				3118	/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
				3119	/// pass.
				3120	ScheduleHazardRecognizer *
				3121	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
				3122	return new GCNHazardRecognizer(MF);
				3123	}