Blame - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp - toolchain/llvm-project

blob: 480f01806b1817bc47791e18e12fa3574c00f99c [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief SI Implementation of TargetInstrInfo.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15
				16	#include "SIInstrInfo.h"
				17	#include "AMDGPUTargetMachine.h"
Tom Stellard	16a9a20	2013-08-14 23:24:17 +0000	[diff] [blame]	18	#include "SIDefines.h"
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	19	#include "SIMachineFunctionInfo.h"
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	21	#include "llvm/CodeGen/MachineInstrBuilder.h"
				22	#include "llvm/CodeGen/MachineRegisterInfo.h"
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	23	#include "llvm/IR/Function.h"
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	24	#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	25	#include "llvm/MC/MCInstrDesc.h"
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	26	#include "llvm/Support/Debug.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	27
				28	using namespace llvm;
				29
Tom Stellard	2e59a45	2014-06-13 01:32:00 +0000	[diff] [blame]	30	SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
Eric Christopher	6c5b511	2015-03-11 18:43:21 +0000	[diff] [blame]	31	: AMDGPUInstrInfo(st), RI() {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	32
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	33	//===----------------------------------------------------------------------===//
				34	// TargetInstrInfo callbacks
				35	//===----------------------------------------------------------------------===//
				36
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	37	static unsigned getNumOperandsNoGlue(SDNode *Node) {
				38	unsigned N = Node->getNumOperands();
				39	while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
				40	--N;
				41	return N;
				42	}
				43
				44	static SDValue findChainOperand(SDNode *Load) {
				45	SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
				46	assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
				47	return LastOp;
				48	}
				49
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	50	/// \brief Returns true if both nodes have the same value for the given
				51	/// operand \p Op, or if both nodes do not have this operand.
				52	static bool nodesHaveSameOperandValue(SDNode N0, SDNode N1, unsigned OpName) {
				53	unsigned Opc0 = N0->getMachineOpcode();
				54	unsigned Opc1 = N1->getMachineOpcode();
				55
				56	int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
				57	int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
				58
				59	if (Op0Idx == -1 && Op1Idx == -1)
				60	return true;
				61
				62
				63	if ((Op0Idx == -1 && Op1Idx != -1) \|\|
				64	(Op1Idx == -1 && Op0Idx != -1))
				65	return false;
				66
				67	// getNamedOperandIdx returns the index for the MachineInstr's operands,
				68	// which includes the result as the first operand. We are indexing into the
				69	// MachineSDNode's operands, so we need to skip the result operand to get
				70	// the real index.
				71	--Op0Idx;
				72	--Op1Idx;
				73
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	74	return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	75	}
				76
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	77	bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
				78	AliasAnalysis *AA) const {
				79	// TODO: The generic check fails for VALU instructions that should be
				80	// rematerializable due to implicit reads of exec. We really want all of the
				81	// generic logic for this except for this.
				82	switch (MI->getOpcode()) {
				83	case AMDGPU::V_MOV_B32_e32:
				84	case AMDGPU::V_MOV_B32_e64:
Matt Arsenault	80f766a	2015-09-10 01:23:28 +0000	[diff] [blame]	85	case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	86	return true;
				87	default:
				88	return false;
				89	}
				90	}
				91
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	92	bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1,
				93	int64_t &Offset0,
				94	int64_t &Offset1) const {
				95	if (!Load0->isMachineOpcode() \|\| !Load1->isMachineOpcode())
				96	return false;
				97
				98	unsigned Opc0 = Load0->getMachineOpcode();
				99	unsigned Opc1 = Load1->getMachineOpcode();
				100
				101	// Make sure both are actually loads.
				102	if (!get(Opc0).mayLoad() \|\| !get(Opc1).mayLoad())
				103	return false;
				104
				105	if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard	20fa0be	2014-10-07 21:09:20 +0000	[diff] [blame]	106
				107	// FIXME: Handle this case:
				108	if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
				109	return false;
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	110
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	111	// Check base reg.
				112	if (Load0->getOperand(1) != Load1->getOperand(1))
				113	return false;
				114
				115	// Check chain.
				116	if (findChainOperand(Load0) != findChainOperand(Load1))
				117	return false;
				118
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	119	// Skip read2 / write2 variants for simplicity.
				120	// TODO: We should report true if the used offsets are adjacent (excluded
				121	// st64 versions).
				122	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 \|\|
				123	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
				124	return false;
				125
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	126	Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
				127	Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
				128	return true;
				129	}
				130
				131	if (isSMRD(Opc0) && isSMRD(Opc1)) {
				132	assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
				133
				134	// Check base reg.
				135	if (Load0->getOperand(0) != Load1->getOperand(0))
				136	return false;
				137
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	138	const ConstantSDNode *Load0Offset =
				139	dyn_cast<ConstantSDNode>(Load0->getOperand(1));
				140	const ConstantSDNode *Load1Offset =
				141	dyn_cast<ConstantSDNode>(Load1->getOperand(1));
				142
				143	if (!Load0Offset \|\| !Load1Offset)
				144	return false;
				145
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	146	// Check chain.
				147	if (findChainOperand(Load0) != findChainOperand(Load1))
				148	return false;
				149
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	150	Offset0 = Load0Offset->getZExtValue();
				151	Offset1 = Load1Offset->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	152	return true;
				153	}
				154
				155	// MUBUF and MTBUF can access the same addresses.
				156	if ((isMUBUF(Opc0) \|\| isMTBUF(Opc0)) && (isMUBUF(Opc1) \|\| isMTBUF(Opc1))) {
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	157
				158	// MUBUF and MTBUF have vaddr at different indices.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	159	if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) \|\|
				160	findChainOperand(Load0) != findChainOperand(Load1) \|\|
				161	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) \|\|
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	162	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	163	return false;
				164
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	165	int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				166	int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				167
				168	if (OffIdx0 == -1 \|\| OffIdx1 == -1)
				169	return false;
				170
				171	// getNamedOperandIdx returns the index for MachineInstrs. Since they
				172	// inlcude the output in the operand list, but SDNodes don't, we need to
				173	// subtract the index by one.
				174	--OffIdx0;
				175	--OffIdx1;
				176
				177	SDValue Off0 = Load0->getOperand(OffIdx0);
				178	SDValue Off1 = Load1->getOperand(OffIdx1);
				179
				180	// The offset might be a FrameIndexSDNode.
				181	if (!isa<ConstantSDNode>(Off0) \|\| !isa<ConstantSDNode>(Off1))
				182	return false;
				183
				184	Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
				185	Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	186	return true;
				187	}
				188
				189	return false;
				190	}
				191
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	192	static bool isStride64(unsigned Opc) {
				193	switch (Opc) {
				194	case AMDGPU::DS_READ2ST64_B32:
				195	case AMDGPU::DS_READ2ST64_B64:
				196	case AMDGPU::DS_WRITE2ST64_B32:
				197	case AMDGPU::DS_WRITE2ST64_B64:
				198	return true;
				199	default:
				200	return false;
				201	}
				202	}
				203
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	204	bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
				205	unsigned &Offset,
				206	const TargetRegisterInfo *TRI) const {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	207	unsigned Opc = LdSt->getOpcode();
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	208
				209	if (isDS(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	210	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				211	AMDGPU::OpName::offset);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	212	if (OffsetImm) {
				213	// Normal, single offset LDS instruction.
				214	const MachineOperand AddrReg = getNamedOperand(LdSt,
				215	AMDGPU::OpName::addr);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	216
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	217	BaseReg = AddrReg->getReg();
				218	Offset = OffsetImm->getImm();
				219	return true;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	220	}
				221
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	222	// The 2 offset instructions use offset0 and offset1 instead. We can treat
				223	// these as a load with a single offset if the 2 offsets are consecutive. We
				224	// will use this for some partially aligned loads.
				225	const MachineOperand Offset0Imm = getNamedOperand(LdSt,
				226	AMDGPU::OpName::offset0);
				227	const MachineOperand Offset1Imm = getNamedOperand(LdSt,
				228	AMDGPU::OpName::offset1);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	229
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	230	uint8_t Offset0 = Offset0Imm->getImm();
				231	uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	232
Matt Arsenault	84db5d9	2015-07-14 17:57:36 +0000	[diff] [blame]	233	if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	234	// Each of these offsets is in element sized units, so we need to convert
				235	// to bytes of the individual reads.
				236
				237	unsigned EltSize;
				238	if (LdSt->mayLoad())
				239	EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
				240	else {
				241	assert(LdSt->mayStore());
				242	int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
				243	EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
				244	}
				245
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	246	if (isStride64(Opc))
				247	EltSize *= 64;
				248
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	249	const MachineOperand AddrReg = getNamedOperand(LdSt,
				250	AMDGPU::OpName::addr);
				251	BaseReg = AddrReg->getReg();
				252	Offset = EltSize * Offset0;
				253	return true;
				254	}
				255
				256	return false;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	257	}
				258
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	259	if (isMUBUF(LdSt) \|\| isMTBUF(LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	260	if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
				261	return false;
				262
				263	const MachineOperand AddrReg = getNamedOperand(LdSt,
				264	AMDGPU::OpName::vaddr);
				265	if (!AddrReg)
				266	return false;
				267
				268	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				269	AMDGPU::OpName::offset);
				270	BaseReg = AddrReg->getReg();
				271	Offset = OffsetImm->getImm();
				272	return true;
				273	}
				274
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	275	if (isSMRD(*LdSt)) {
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	276	const MachineOperand OffsetImm = getNamedOperand(LdSt,
				277	AMDGPU::OpName::offset);
				278	if (!OffsetImm)
				279	return false;
				280
				281	const MachineOperand SBaseReg = getNamedOperand(LdSt,
				282	AMDGPU::OpName::sbase);
				283	BaseReg = SBaseReg->getReg();
				284	Offset = OffsetImm->getImm();
				285	return true;
				286	}
				287
				288	return false;
				289	}
				290
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	291	bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
				292	MachineInstr *SecondLdSt,
				293	unsigned NumLoads) const {
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	294	// TODO: This needs finer tuning
				295	if (NumLoads > 4)
				296	return false;
				297
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	298	if (isDS(FirstLdSt) && isDS(SecondLdSt))
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	299	return true;
				300
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	301	if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt))
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	302	return true;
				303
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	304	if ((isMUBUF(FirstLdSt) \|\| isMTBUF(FirstLdSt)) &&
				305	(isMUBUF(SecondLdSt) \|\| isMTBUF(SecondLdSt)))
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	306	return true;
				307
				308	return false;
				309	}
				310
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	311	void
				312	SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	313	MachineBasicBlock::iterator MI, DebugLoc DL,
				314	unsigned DestReg, unsigned SrcReg,
				315	bool KillSrc) const {
				316
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	317	// If we are trying to copy to or from SCC, there is a bug somewhere else in
				318	// the backend. While it may be theoretically possible to do this, it should
				319	// never be necessary.
				320	assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
				321
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	322	static const int16_t Sub0_15[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	323	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
				324	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
				325	AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	326	AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	327	};
				328
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	329	static const int16_t Sub0_15_64[] = {
				330	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				331	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				332	AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
				333	AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
				334	};
				335
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	336	static const int16_t Sub0_7[] = {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	337	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	338	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	339	};
				340
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	341	static const int16_t Sub0_7_64[] = {
				342	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				343	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				344	};
				345
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	346	static const int16_t Sub0_3[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	347	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	348	};
				349
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	350	static const int16_t Sub0_3_64[] = {
				351	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				352	};
				353
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	354	static const int16_t Sub0_2[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	355	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	356	};
				357
Craig Topper	0afd0ab	2013-07-15 06:39:13 +0000	[diff] [blame]	358	static const int16_t Sub0_1[] = {
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	359	AMDGPU::sub0, AMDGPU::sub1,
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	360	};
				361
				362	unsigned Opcode;
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	363	ArrayRef<int16_t> SubIndices;
				364	bool Forward;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	365
				366	if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
				367	assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
				368	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				369	.addReg(SrcReg, getKillRegState(KillSrc));
				370	return;
				371
Tom Stellard	aac1889	2013-02-07 19:39:43 +0000	[diff] [blame]	372	} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	373	if (DestReg == AMDGPU::VCC) {
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	374	if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				375	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
				376	.addReg(SrcReg, getKillRegState(KillSrc));
				377	} else {
				378	// FIXME: Hack until VReg_1 removed.
				379	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault	4635915	2015-08-08 00:41:48 +0000	[diff] [blame]	380	BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	381	.addImm(0)
				382	.addReg(SrcReg, getKillRegState(KillSrc));
				383	}
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	384
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	385	return;
				386	}
				387
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	388	assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
				389	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				390	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	391	return;
				392
				393	} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
				394	assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	395	Opcode = AMDGPU::S_MOV_B64;
				396	SubIndices = Sub0_3_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	397
				398	} else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
				399	assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	400	Opcode = AMDGPU::S_MOV_B64;
				401	SubIndices = Sub0_7_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	402
				403	} else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
				404	assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
Nicolai Haehnle	6bcf8b2	2015-12-19 01:36:26 +0000	[diff] [blame]	405	Opcode = AMDGPU::S_MOV_B64;
				406	SubIndices = Sub0_15_64;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	407
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	408	} else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
				409	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	410	AMDGPU::SReg_32RegClass.contains(SrcReg));
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	411	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				412	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	413	return;
				414
				415	} else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
				416	assert(AMDGPU::VReg_64RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	417	AMDGPU::SReg_64RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	418	Opcode = AMDGPU::V_MOV_B32_e32;
				419	SubIndices = Sub0_1;
				420
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	421	} else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
				422	assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
				423	Opcode = AMDGPU::V_MOV_B32_e32;
				424	SubIndices = Sub0_2;
				425
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	426	} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
				427	assert(AMDGPU::VReg_128RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	428	AMDGPU::SReg_128RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	429	Opcode = AMDGPU::V_MOV_B32_e32;
				430	SubIndices = Sub0_3;
				431
				432	} else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
				433	assert(AMDGPU::VReg_256RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	434	AMDGPU::SReg_256RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	435	Opcode = AMDGPU::V_MOV_B32_e32;
				436	SubIndices = Sub0_7;
				437
				438	} else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
				439	assert(AMDGPU::VReg_512RegClass.contains(SrcReg) \|\|
NAKAMURA Takumi	4bb85f9	2013-10-28 04:07:23 +0000	[diff] [blame]	440	AMDGPU::SReg_512RegClass.contains(SrcReg));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	441	Opcode = AMDGPU::V_MOV_B32_e32;
				442	SubIndices = Sub0_15;
				443
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	444	} else {
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	445	llvm_unreachable("Can't copy register!");
				446	}
				447
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	448	if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
				449	Forward = true;
				450	else
				451	Forward = false;
				452
				453	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				454	unsigned SubIdx;
				455	if (Forward)
				456	SubIdx = SubIndices[Idx];
				457	else
				458	SubIdx = SubIndices[SubIndices.size() - Idx - 1];
				459
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	460	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				461	get(Opcode), RI.getSubReg(DestReg, SubIdx));
				462
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	463	Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	464
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	465	if (Idx == SubIndices.size() - 1)
				466	Builder.addReg(SrcReg, RegState::Kill \| RegState::Implicit);
				467
				468	if (Idx == 0)
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	469	Builder.addReg(DestReg, RegState::Define \| RegState::Implicit);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	470	}
				471	}
				472
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	473	int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
Matt Arsenault	f5b2cd8	2015-03-23 18:45:30 +0000	[diff] [blame]	474	const unsigned Opcode = MI.getOpcode();
				475
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	476	int NewOpc;
				477
				478	// Try to map original to commuted opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	479	NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	480	if (NewOpc != -1)
				481	// Check if the commuted (REV) opcode exists on the target.
				482	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	483
				484	// Try to map commuted to original opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	485	NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	486	if (NewOpc != -1)
				487	// Check if the original (non-REV) opcode exists on the target.
				488	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	489
				490	return Opcode;
				491	}
				492
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	493	unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
				494
				495	if (DstRC->getSize() == 4) {
				496	return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
				497	} else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
				498	return AMDGPU::S_MOV_B64;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	499	} else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
				500	return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	501	}
				502	return AMDGPU::COPY;
				503	}
				504
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	505	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
				506	switch (Size) {
				507	case 4:
				508	return AMDGPU::SI_SPILL_S32_SAVE;
				509	case 8:
				510	return AMDGPU::SI_SPILL_S64_SAVE;
				511	case 16:
				512	return AMDGPU::SI_SPILL_S128_SAVE;
				513	case 32:
				514	return AMDGPU::SI_SPILL_S256_SAVE;
				515	case 64:
				516	return AMDGPU::SI_SPILL_S512_SAVE;
				517	default:
				518	llvm_unreachable("unknown register size");
				519	}
				520	}
				521
				522	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
				523	switch (Size) {
				524	case 4:
				525	return AMDGPU::SI_SPILL_V32_SAVE;
				526	case 8:
				527	return AMDGPU::SI_SPILL_V64_SAVE;
				528	case 16:
				529	return AMDGPU::SI_SPILL_V128_SAVE;
				530	case 32:
				531	return AMDGPU::SI_SPILL_V256_SAVE;
				532	case 64:
				533	return AMDGPU::SI_SPILL_V512_SAVE;
				534	default:
				535	llvm_unreachable("unknown register size");
				536	}
				537	}
				538
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	539	void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
				540	MachineBasicBlock::iterator MI,
				541	unsigned SrcReg, bool isKill,
				542	int FrameIndex,
				543	const TargetRegisterClass *RC,
				544	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	545	MachineFunction *MF = MBB.getParent();
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	546	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	547	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	548	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	549
				550	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
				551	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				552	MachinePointerInfo PtrInfo
				553	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				554	MachineMemOperand *MMO
				555	= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
				556	Size, Align);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	557
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	558	if (RI.isSGPRClass(RC)) {
Matt Arsenault	5b22dfa	2015-11-05 05:27:10 +0000	[diff] [blame]	559	MFI->setHasSpilledSGPRs();
				560
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	561	// We are only allowed to create one new instruction when spilling
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	562	// registers, so we need to use pseudo instruction for spilling
				563	// SGPRs.
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	564	unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
				565	BuildMI(MBB, MI, DL, get(Opcode))
				566	.addReg(SrcReg) // src
				567	.addFrameIndex(FrameIndex) // frame_idx
				568	.addMemOperand(MMO);
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	569
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	570	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	571	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	572
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	573	if (!ST.isVGPRSpillingEnabled(MFI)) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	574	LLVMContext &Ctx = MF->getFunction()->getContext();
				575	Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
				576	" spill register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	577	BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	578	.addReg(SrcReg);
				579
				580	return;
				581	}
				582
				583	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				584
				585	unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
				586	MFI->setHasSpilledVGPRs();
				587	BuildMI(MBB, MI, DL, get(Opcode))
				588	.addReg(SrcReg) // src
				589	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	590	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				591	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	592	.addMemOperand(MMO);
				593	}
				594
				595	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
				596	switch (Size) {
				597	case 4:
				598	return AMDGPU::SI_SPILL_S32_RESTORE;
				599	case 8:
				600	return AMDGPU::SI_SPILL_S64_RESTORE;
				601	case 16:
				602	return AMDGPU::SI_SPILL_S128_RESTORE;
				603	case 32:
				604	return AMDGPU::SI_SPILL_S256_RESTORE;
				605	case 64:
				606	return AMDGPU::SI_SPILL_S512_RESTORE;
				607	default:
				608	llvm_unreachable("unknown register size");
				609	}
				610	}
				611
				612	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
				613	switch (Size) {
				614	case 4:
				615	return AMDGPU::SI_SPILL_V32_RESTORE;
				616	case 8:
				617	return AMDGPU::SI_SPILL_V64_RESTORE;
				618	case 16:
				619	return AMDGPU::SI_SPILL_V128_RESTORE;
				620	case 32:
				621	return AMDGPU::SI_SPILL_V256_RESTORE;
				622	case 64:
				623	return AMDGPU::SI_SPILL_V512_RESTORE;
				624	default:
				625	llvm_unreachable("unknown register size");
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	626	}
				627	}
				628
				629	void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
				630	MachineBasicBlock::iterator MI,
				631	unsigned DestReg, int FrameIndex,
				632	const TargetRegisterClass *RC,
				633	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	634	MachineFunction *MF = MBB.getParent();
Tom Stellard	e99fb65	2015-01-20 19:33:04 +0000	[diff] [blame]	635	const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	636	MachineFrameInfo *FrameInfo = MF->getFrameInfo();
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	637	DebugLoc DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	638	unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
				639	unsigned Size = FrameInfo->getObjectSize(FrameIndex);
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	640
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	641	MachinePointerInfo PtrInfo
				642	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				643
				644	MachineMemOperand *MMO = MF->getMachineMemOperand(
				645	PtrInfo, MachineMemOperand::MOLoad, Size, Align);
				646
				647	if (RI.isSGPRClass(RC)) {
				648	// FIXME: Maybe this should not include a memoperand because it will be
				649	// lowered to non-memory instructions.
				650	unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
				651	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				652	.addFrameIndex(FrameIndex) // frame_idx
				653	.addMemOperand(MMO);
				654
				655	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	656	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	657
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	658	if (!ST.isVGPRSpillingEnabled(MFI)) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	659	LLVMContext &Ctx = MF->getFunction()->getContext();
				660	Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
				661	" restore register");
Tom Stellard	0febe68	2015-01-14 15:42:34 +0000	[diff] [blame]	662	BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	663
				664	return;
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	665	}
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	666
				667	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				668
				669	unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
				670	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
				671	.addFrameIndex(FrameIndex) // frame_idx
Matt Arsenault	26f8f3d	2015-11-30 21:16:03 +0000	[diff] [blame]	672	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				673	.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	674	.addMemOperand(MMO);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	675	}
				676
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	677	/// \param @Offset Offset in bytes of the FrameIndex being spilled
				678	unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
				679	MachineBasicBlock::iterator MI,
				680	RegScavenger *RS, unsigned TmpReg,
				681	unsigned FrameOffset,
				682	unsigned Size) const {
				683	MachineFunction *MF = MBB.getParent();
				684	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Eric Christopher	7792e32	2015-01-30 23:24:40 +0000	[diff] [blame]	685	const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	686	const SIRegisterInfo *TRI =
				687	static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
				688	DebugLoc DL = MBB.findDebugLoc(MI);
				689	unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
				690	unsigned WavefrontSize = ST.getWavefrontSize();
				691
				692	unsigned TIDReg = MFI->getTIDReg();
				693	if (!MFI->hasCalculatedTID()) {
				694	MachineBasicBlock &Entry = MBB.getParent()->front();
				695	MachineBasicBlock::iterator Insert = Entry.front();
				696	DebugLoc DL = Insert->getDebugLoc();
				697
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	698	TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	699	if (TIDReg == AMDGPU::NoRegister)
				700	return TIDReg;
				701
				702
				703	if (MFI->getShaderType() == ShaderType::COMPUTE &&
				704	WorkGroupSize > WavefrontSize) {
				705
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	706	unsigned TIDIGXReg
				707	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
				708	unsigned TIDIGYReg
				709	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
				710	unsigned TIDIGZReg
				711	= TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	712	unsigned InputPtrReg =
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	713	TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer	7149aab	2015-03-01 18:09:56 +0000	[diff] [blame]	714	for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	715	if (!Entry.isLiveIn(Reg))
				716	Entry.addLiveIn(Reg);
				717	}
				718
				719	RS->enterBasicBlock(&Entry);
Matt Arsenault	0c90e95	2015-11-06 18:17:45 +0000	[diff] [blame]	720	// FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	721	unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				722	unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				723	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
				724	.addReg(InputPtrReg)
				725	.addImm(SI::KernelInputOffsets::NGROUPS_Z);
				726	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
				727	.addReg(InputPtrReg)
				728	.addImm(SI::KernelInputOffsets::NGROUPS_Y);
				729
				730	// NGROUPS.X * NGROUPS.Y
				731	BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
				732	.addReg(STmp1)
				733	.addReg(STmp0);
				734	// (NGROUPS.X * NGROUPS.Y) * TIDIG.X
				735	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
				736	.addReg(STmp1)
				737	.addReg(TIDIGXReg);
				738	// NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
				739	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
				740	.addReg(STmp0)
				741	.addReg(TIDIGYReg)
				742	.addReg(TIDReg);
				743	// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
				744	BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
				745	.addReg(TIDReg)
				746	.addReg(TIDIGZReg);
				747	} else {
				748	// Get the wave id
				749	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
				750	TIDReg)
				751	.addImm(-1)
				752	.addImm(0);
				753
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	754	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	755	TIDReg)
				756	.addImm(-1)
				757	.addReg(TIDReg);
				758	}
				759
				760	BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
				761	TIDReg)
				762	.addImm(2)
				763	.addReg(TIDReg);
				764	MFI->setTIDReg(TIDReg);
				765	}
				766
				767	// Add FrameIndex to LDS offset
				768	unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
				769	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
				770	.addImm(LDSOffset)
				771	.addReg(TIDReg);
				772
				773	return TmpReg;
				774	}
				775
Nicolai Haehnle	87323da	2015-12-17 16:46:42 +0000	[diff] [blame]	776	void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI,
				777	int Count) const {
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	778	while (Count > 0) {
				779	int Arg;
				780	if (Count >= 8)
				781	Arg = 7;
				782	else
				783	Arg = Count - 1;
				784	Count -= 8;
				785	BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
				786	.addImm(Arg);
				787	}
				788	}
				789
				790	bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	791	MachineBasicBlock &MBB = *MI->getParent();
				792	DebugLoc DL = MBB.findDebugLoc(MI);
				793	switch (MI->getOpcode()) {
				794	default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
				795
Tom Stellard	60024a0	2014-09-24 01:33:24 +0000	[diff] [blame]	796	case AMDGPU::SGPR_USE:
				797	// This is just a placeholder for register allocation.
				798	MI->eraseFromParent();
				799	break;
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	800
				801	case AMDGPU::V_MOV_B64_PSEUDO: {
				802	unsigned Dst = MI->getOperand(0).getReg();
				803	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				804	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				805
				806	const MachineOperand &SrcOp = MI->getOperand(1);
				807	// FIXME: Will this work for 64-bit floating point immediates?
				808	assert(!SrcOp.isFPImm());
				809	if (SrcOp.isImm()) {
				810	APInt Imm(64, SrcOp.getImm());
				811	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				812	.addImm(Imm.getLoBits(32).getZExtValue())
				813	.addReg(Dst, RegState::Implicit);
				814	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				815	.addImm(Imm.getHiBits(32).getZExtValue())
				816	.addReg(Dst, RegState::Implicit);
				817	} else {
				818	assert(SrcOp.isReg());
				819	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
				820	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
				821	.addReg(Dst, RegState::Implicit);
				822	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
				823	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
				824	.addReg(Dst, RegState::Implicit);
				825	}
				826	MI->eraseFromParent();
				827	break;
				828	}
Marek Olsak	7d77728	2015-03-24 13:40:15 +0000	[diff] [blame]	829
				830	case AMDGPU::V_CNDMASK_B64_PSEUDO: {
				831	unsigned Dst = MI->getOperand(0).getReg();
				832	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				833	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				834	unsigned Src0 = MI->getOperand(1).getReg();
				835	unsigned Src1 = MI->getOperand(2).getReg();
				836	const MachineOperand &SrcCond = MI->getOperand(3);
				837
				838	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
				839	.addReg(RI.getSubReg(Src0, AMDGPU::sub0))
				840	.addReg(RI.getSubReg(Src1, AMDGPU::sub0))
				841	.addOperand(SrcCond);
				842	BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
				843	.addReg(RI.getSubReg(Src0, AMDGPU::sub1))
				844	.addReg(RI.getSubReg(Src1, AMDGPU::sub1))
				845	.addOperand(SrcCond);
				846	MI->eraseFromParent();
				847	break;
				848	}
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	849
				850	case AMDGPU::SI_CONSTDATA_PTR: {
				851	const SIRegisterInfo *TRI =
				852	static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
				853	MachineFunction &MF = *MBB.getParent();
				854	unsigned Reg = MI->getOperand(0).getReg();
				855	unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
				856	unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
				857
				858	// Create a bundle so these instructions won't be re-ordered by the
				859	// post-RA scheduler.
				860	MIBundleBuilder Bundler(MBB, MI);
				861	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
				862
				863	// Add 32-bit offset from this instruction to the start of the
				864	// constant data.
				865	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
				866	.addReg(RegLo)
				867	.addOperand(MI->getOperand(1)));
				868	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
				869	.addReg(RegHi)
				870	.addImm(0));
				871
				872	llvm::finalizeBundle(MBB, Bundler.begin());
				873
				874	MI->eraseFromParent();
				875	break;
				876	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	877	}
				878	return true;
				879	}
				880
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	881	/// Commutes the operands in the given instruction.
				882	/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
				883	///
				884	/// Do not call this method for a non-commutable instruction or for
				885	/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
				886	/// Even though the instruction is commutable, the method may still
				887	/// fail to commute the operands, null pointer is returned in such cases.
				888	MachineInstr SIInstrInfo::commuteInstructionImpl(MachineInstr MI,
				889	bool NewMI,
				890	unsigned OpIdx0,
				891	unsigned OpIdx1) const {
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	892	int CommutedOpcode = commuteOpcode(*MI);
				893	if (CommutedOpcode == -1)
				894	return nullptr;
				895
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	896	int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				897	AMDGPU::OpName::src0);
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	898	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				899	if (!Src0.isReg())
Matt Arsenault	aff65fb	2014-09-26 17:54:43 +0000	[diff] [blame]	900	return nullptr;
				901
				902	int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
				903	AMDGPU::OpName::src1);
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	904
				905	if ((OpIdx0 != static_cast<unsigned>(Src0Idx) \|\|
				906	OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
				907	(OpIdx0 != static_cast<unsigned>(Src1Idx) \|\|
				908	OpIdx1 != static_cast<unsigned>(Src0Idx)))
				909	return nullptr;
				910
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	911	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				912
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	913
				914	if (isVOP2(*MI)) {
				915	const MCInstrDesc &InstrDesc = MI->getDesc();
				916	// For VOP2 instructions, any operand type is valid to use for src0. Make
				917	// sure we can use the src1 as src0.
				918	//
				919	// We could be stricter here and only allow commuting if there is a reason
				920	// to do so. i.e. if both operands are VGPRs there is no real benefit,
				921	// although MachineCSE attempts to find matches by commuting.
				922	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				923	if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
				924	return nullptr;
Matt Arsenault	3c34ae2	2015-02-18 02:04:31 +0000	[diff] [blame]	925	}
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	926
				927	if (!Src1.isReg()) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	928	// Allow commuting instructions with Imm operands.
				929	if (NewMI \|\| !Src1.isImm() \|\|
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	930	(!isVOP2(MI) && !isVOP3(MI))) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	931	return nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	932	}
Matt Arsenault	d282ada	2014-10-17 18:00:48 +0000	[diff] [blame]	933	// Be sure to copy the source modifiers to the right place.
				934	if (MachineOperand *Src0Mods
				935	= getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
				936	MachineOperand *Src1Mods
				937	= getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
				938
				939	int Src0ModsVal = Src0Mods->getImm();
				940	if (!Src1Mods && Src0ModsVal != 0)
				941	return nullptr;
				942
				943	// XXX - This assert might be a lie. It might be useful to have a neg
				944	// modifier with 0.0.
				945	int Src1ModsVal = Src1Mods->getImm();
				946	assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
				947
				948	Src1Mods->setImm(Src0ModsVal);
				949	Src0Mods->setImm(Src1ModsVal);
				950	}
				951
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	952	unsigned Reg = Src0.getReg();
				953	unsigned SubReg = Src0.getSubReg();
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	954	if (Src1.isImm())
				955	Src0.ChangeToImmediate(Src1.getImm());
Matt Arsenault	6d3cd54	2014-10-17 18:00:39 +0000	[diff] [blame]	956	else
				957	llvm_unreachable("Should only have immediates");
				958
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	959	Src1.ChangeToRegister(Reg, false);
				960	Src1.setSubReg(SubReg);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	961	} else {
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	962	MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	963	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	964
				965	if (MI)
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	966	MI->setDesc(get(CommutedOpcode));
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	967
				968	return MI;
Christian Konig	76edd4f	2013-02-26 17:52:29 +0000	[diff] [blame]	969	}
				970
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	971	// This needs to be implemented because the source modifiers may be inserted
				972	// between the true commutable operands, and the base
				973	// TargetInstrInfo::commuteInstruction uses it.
				974	bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	975	unsigned &SrcOpIdx0,
				976	unsigned &SrcOpIdx1) const {
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	977	const MCInstrDesc &MCID = MI->getDesc();
				978	if (!MCID.isCommutable())
				979	return false;
				980
				981	unsigned Opc = MI->getOpcode();
				982	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				983	if (Src0Idx == -1)
				984	return false;
				985
				986	// FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	987	// immediate. Also, immediate src0 operand is not handled in
				988	// SIInstrInfo::commuteInstruction();
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	989	if (!MI->getOperand(Src0Idx).isReg())
				990	return false;
				991
				992	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				993	if (Src1Idx == -1)
				994	return false;
				995
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	996	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				997	if (Src1.isImm()) {
				998	// SIInstrInfo::commuteInstruction() does support commuting the immediate
				999	// operand src1 in 2 and 3 operand instructions.
				1000	if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
				1001	return false;
				1002	} else if (Src1.isReg()) {
				1003	// If any source modifiers are set, the generic instruction commuting won't
				1004	// understand how to copy the source modifiers.
				1005	if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) \|\|
				1006	hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
				1007	return false;
				1008	} else
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1009	return false;
				1010
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1011	return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1012	}
				1013
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1014	static void removeModOperands(MachineInstr &MI) {
				1015	unsigned Opc = MI.getOpcode();
				1016	int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1017	AMDGPU::OpName::src0_modifiers);
				1018	int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1019	AMDGPU::OpName::src1_modifiers);
				1020	int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1021	AMDGPU::OpName::src2_modifiers);
				1022
				1023	MI.RemoveOperand(Src2ModIdx);
				1024	MI.RemoveOperand(Src1ModIdx);
				1025	MI.RemoveOperand(Src0ModIdx);
				1026	}
				1027
				1028	bool SIInstrInfo::FoldImmediate(MachineInstr UseMI, MachineInstr DefMI,
				1029	unsigned Reg, MachineRegisterInfo *MRI) const {
				1030	if (!MRI->hasOneNonDBGUse(Reg))
				1031	return false;
				1032
				1033	unsigned Opc = UseMI->getOpcode();
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1034	if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64) {
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1035	// Don't fold if we are using source modifiers. The new VOP2 instructions
				1036	// don't have them.
				1037	if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) \|\|
				1038	hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) \|\|
				1039	hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) {
				1040	return false;
				1041	}
				1042
				1043	MachineOperand Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
				1044	MachineOperand Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
				1045	MachineOperand Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
				1046
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1047	// Multiplied part is the constant: Use v_madmk_f32
				1048	// We should only expect these to be on src0 due to canonicalizations.
				1049	if (Src0->isReg() && Src0->getReg() == Reg) {
				1050	if (!Src1->isReg() \|\|
				1051	(Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
				1052	return false;
				1053
				1054	if (!Src2->isReg() \|\|
				1055	(Src2->isReg() && RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))))
				1056	return false;
				1057
				1058	// We need to do some weird looking operand shuffling since the madmk
				1059	// operands are out of the normal expected order with the multiplied
				1060	// constant as the last operand.
				1061	//
				1062	// v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1
				1063	// src0 -> src2 K
				1064	// src1 -> src0
				1065	// src2 -> src1
				1066
				1067	const int64_t Imm = DefMI->getOperand(1).getImm();
				1068
				1069	// FIXME: This would be a lot easier if we could return a new instruction
				1070	// instead of having to modify in place.
				1071
				1072	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1073	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1074	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1075	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1076	AMDGPU::OpName::clamp));
				1077
				1078	unsigned Src1Reg = Src1->getReg();
				1079	unsigned Src1SubReg = Src1->getSubReg();
				1080	unsigned Src2Reg = Src2->getReg();
				1081	unsigned Src2SubReg = Src2->getSubReg();
				1082	Src0->setReg(Src1Reg);
				1083	Src0->setSubReg(Src1SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	1084	Src0->setIsKill(Src1->isKill());
				1085
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1086	Src1->setReg(Src2Reg);
				1087	Src1->setSubReg(Src2SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	1088	Src1->setIsKill(Src2->isKill());
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1089
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1090	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1091	UseMI->untieRegOperand(
				1092	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1093	}
				1094
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	1095	Src2->ChangeToImmediate(Imm);
				1096
				1097	removeModOperands(*UseMI);
				1098	UseMI->setDesc(get(AMDGPU::V_MADMK_F32));
				1099
				1100	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1101	if (DeleteDef)
				1102	DefMI->eraseFromParent();
				1103
				1104	return true;
				1105	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1106
				1107	// Added part is the constant: Use v_madak_f32
				1108	if (Src2->isReg() && Src2->getReg() == Reg) {
				1109	// Not allowed to use constant bus for another operand.
				1110	// We can however allow an inline immediate as src0.
				1111	if (!Src0->isImm() &&
				1112	(Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
				1113	return false;
				1114
				1115	if (!Src1->isReg() \|\|
				1116	(Src1->isReg() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
				1117	return false;
				1118
				1119	const int64_t Imm = DefMI->getOperand(1).getImm();
				1120
				1121	// FIXME: This would be a lot easier if we could return a new instruction
				1122	// instead of having to modify in place.
				1123
				1124	// Remove these first since they are at the end.
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1125	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1126	AMDGPU::OpName::omod));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1127	UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1128	AMDGPU::OpName::clamp));
				1129
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1130	if (Opc == AMDGPU::V_MAC_F32_e64) {
				1131	UseMI->untieRegOperand(
				1132	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
				1133	}
				1134
				1135	// ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1136	Src2->ChangeToImmediate(Imm);
				1137
				1138	// These come before src2.
				1139	removeModOperands(*UseMI);
				1140	UseMI->setDesc(get(AMDGPU::V_MADAK_F32));
				1141
				1142	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				1143	if (DeleteDef)
				1144	DefMI->eraseFromParent();
				1145
				1146	return true;
				1147	}
				1148	}
				1149
				1150	return false;
				1151	}
				1152
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1153	static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
				1154	int WidthB, int OffsetB) {
				1155	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
				1156	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
				1157	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
				1158	return LowOffset + LowWidth <= HighOffset;
				1159	}
				1160
				1161	bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
				1162	MachineInstr *MIb) const {
				1163	unsigned BaseReg0, Offset0;
				1164	unsigned BaseReg1, Offset1;
				1165
Sanjoy Das	b666ea3	2015-06-15 18:44:14 +0000	[diff] [blame]	1166	if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
				1167	getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1168	assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
				1169	"read2 / write2 not expected here yet");
				1170	unsigned Width0 = (*MIa->memoperands_begin())->getSize();
				1171	unsigned Width1 = (*MIb->memoperands_begin())->getSize();
				1172	if (BaseReg0 == BaseReg1 &&
				1173	offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
				1174	return true;
				1175	}
				1176	}
				1177
				1178	return false;
				1179	}
				1180
				1181	bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
				1182	MachineInstr *MIb,
				1183	AliasAnalysis *AA) const {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1184	assert(MIa && (MIa->mayLoad() \|\| MIa->mayStore()) &&
				1185	"MIa must load from or modify a memory location");
				1186	assert(MIb && (MIb->mayLoad() \|\| MIb->mayStore()) &&
				1187	"MIb must load from or modify a memory location");
				1188
				1189	if (MIa->hasUnmodeledSideEffects() \|\| MIb->hasUnmodeledSideEffects())
				1190	return false;
				1191
				1192	// XXX - Can we relax this between address spaces?
				1193	if (MIa->hasOrderedMemoryRef() \|\| MIb->hasOrderedMemoryRef())
				1194	return false;
				1195
				1196	// TODO: Should we check the address space from the MachineMemOperand? That
				1197	// would allow us to distinguish objects we know don't alias based on the
Benjamin Kramer	df005cb	2015-08-08 18:27:36 +0000	[diff] [blame]	1198	// underlying address space, even if it was lowered to a different one,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1199	// e.g. private accesses lowered to use MUBUF instructions on a scratch
				1200	// buffer.
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1201	if (isDS(*MIa)) {
				1202	if (isDS(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1203	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1204
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1205	return !isFLAT(*MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1206	}
				1207
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1208	if (isMUBUF(MIa) \|\| isMTBUF(MIa)) {
				1209	if (isMUBUF(MIb) \|\| isMTBUF(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1210	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1211
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1212	return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1213	}
				1214
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1215	if (isSMRD(*MIa)) {
				1216	if (isSMRD(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1217	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1218
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1219	return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(*MIa);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1220	}
				1221
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1222	if (isFLAT(*MIa)) {
				1223	if (isFLAT(*MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	1224	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				1225
				1226	return false;
				1227	}
				1228
				1229	return false;
				1230	}
				1231
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1232	MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
				1233	MachineBasicBlock::iterator &MI,
				1234	LiveVariables *LV) const {
				1235
				1236	switch (MI->getOpcode()) {
				1237	default: return nullptr;
				1238	case AMDGPU::V_MAC_F32_e64: break;
				1239	case AMDGPU::V_MAC_F32_e32: {
				1240	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1241	if (Src0->isImm() && !isInlineConstant(*Src0, 4))
				1242	return nullptr;
				1243	break;
				1244	}
				1245	}
				1246
Tom Stellard	cc4c871	2016-02-16 18:14:56 +0000	[diff] [blame]	1247	const MachineOperand Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	1248	const MachineOperand Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
				1249	const MachineOperand Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				1250	const MachineOperand Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				1251
				1252	return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
				1253	.addOperand(*Dst)
				1254	.addImm(0) // Src0 mods
				1255	.addOperand(*Src0)
				1256	.addImm(0) // Src1 mods
				1257	.addOperand(*Src1)
				1258	.addImm(0) // Src mods
				1259	.addOperand(*Src2)
				1260	.addImm(0) // clamp
				1261	.addImm(0); // omod
				1262	}
				1263
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1264	bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1265	int64_t SVal = Imm.getSExtValue();
				1266	if (SVal >= -16 && SVal <= 64)
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1267	return true;
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1268
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1269	if (Imm.getBitWidth() == 64) {
				1270	uint64_t Val = Imm.getZExtValue();
				1271	return (DoubleToBits(0.0) == Val) \|\|
				1272	(DoubleToBits(1.0) == Val) \|\|
				1273	(DoubleToBits(-1.0) == Val) \|\|
				1274	(DoubleToBits(0.5) == Val) \|\|
				1275	(DoubleToBits(-0.5) == Val) \|\|
				1276	(DoubleToBits(2.0) == Val) \|\|
				1277	(DoubleToBits(-2.0) == Val) \|\|
				1278	(DoubleToBits(4.0) == Val) \|\|
				1279	(DoubleToBits(-4.0) == Val);
				1280	}
				1281
Tom Stellard	d008446	2014-03-17 17:03:52 +0000	[diff] [blame]	1282	// The actual type of the operand does not seem to matter as long
				1283	// as the bits match one of the inline immediate values. For example:
				1284	//
				1285	// -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
				1286	// so it is a legal inline immediate.
				1287	//
				1288	// 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
				1289	// floating-point, so it is a legal inline immediate.
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1290	uint32_t Val = Imm.getZExtValue();
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1291
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	1292	return (FloatToBits(0.0f) == Val) \|\|
				1293	(FloatToBits(1.0f) == Val) \|\|
				1294	(FloatToBits(-1.0f) == Val) \|\|
				1295	(FloatToBits(0.5f) == Val) \|\|
				1296	(FloatToBits(-0.5f) == Val) \|\|
				1297	(FloatToBits(2.0f) == Val) \|\|
				1298	(FloatToBits(-2.0f) == Val) \|\|
				1299	(FloatToBits(4.0f) == Val) \|\|
				1300	(FloatToBits(-4.0f) == Val);
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1301	}
				1302
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1303	bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
				1304	unsigned OpSize) const {
				1305	if (MO.isImm()) {
				1306	// MachineOperand provides no way to tell the true operand size, since it
				1307	// only records a 64-bit value. We need to know the size to determine if a
				1308	// 32-bit floating point immediate bit pattern is legal for an integer
				1309	// immediate. It would be for any 32-bit integer operand, but would not be
				1310	// for a 64-bit one.
				1311
				1312	unsigned BitSize = 8 * OpSize;
				1313	return isInlineConstant(APInt(BitSize, MO.getImm(), true));
				1314	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1315
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	1316	return false;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1317	}
				1318
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1319	bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
				1320	unsigned OpSize) const {
				1321	return MO.isImm() && !isInlineConstant(MO, OpSize);
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1322	}
				1323
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1324	static bool compareMachineOp(const MachineOperand &Op0,
				1325	const MachineOperand &Op1) {
				1326	if (Op0.getType() != Op1.getType())
				1327	return false;
				1328
				1329	switch (Op0.getType()) {
				1330	case MachineOperand::MO_Register:
				1331	return Op0.getReg() == Op1.getReg();
				1332	case MachineOperand::MO_Immediate:
				1333	return Op0.getImm() == Op1.getImm();
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1334	default:
				1335	llvm_unreachable("Didn't expect to be comparing these operand types");
				1336	}
				1337	}
				1338
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1339	bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
				1340	const MachineOperand &MO) const {
				1341	const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
				1342
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1343	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1344
				1345	if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
				1346	return true;
				1347
				1348	if (OpInfo.RegClass < 0)
				1349	return false;
				1350
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1351	unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
				1352	if (isLiteralConstant(MO, OpSize))
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1353	return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1354
Tom Stellard	b655052	2015-01-12 19:33:18 +0000	[diff] [blame]	1355	return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1356	}
				1357
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1358	bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	1359	int Op32 = AMDGPU::getVOPe32(Opcode);
				1360	if (Op32 == -1)
				1361	return false;
				1362
				1363	return pseudoToMCOpcode(Op32) != -1;
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	1364	}
				1365
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1366	bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
				1367	// The src0_modifier operand is present on all instructions
				1368	// that have modifiers.
				1369
				1370	return AMDGPU::getNamedOperandIdx(Opcode,
				1371	AMDGPU::OpName::src0_modifiers) != -1;
				1372	}
				1373
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	1374	bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
				1375	unsigned OpName) const {
				1376	const MachineOperand *Mods = getNamedOperand(MI, OpName);
				1377	return Mods && Mods->getImm();
				1378	}
				1379
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1380	bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1381	const MachineOperand &MO,
				1382	unsigned OpSize) const {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1383	// Literal constants use the constant bus.
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1384	if (isLiteralConstant(MO, OpSize))
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1385	return true;
				1386
				1387	if (!MO.isReg() \|\| !MO.isUse())
				1388	return false;
				1389
				1390	if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
				1391	return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
				1392
				1393	// FLAT_SCR is just an SGPR pair.
				1394	if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
				1395	return true;
				1396
				1397	// EXEC register uses the constant bus.
				1398	if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
				1399	return true;
				1400
				1401	// SGPRs use the constant bus
				1402	if (MO.getReg() == AMDGPU::M0 \|\| MO.getReg() == AMDGPU::VCC \|\|
				1403	(!MO.isImplicit() &&
				1404	(AMDGPU::SGPR_32RegClass.contains(MO.getReg()) \|\|
				1405	AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
				1406	return true;
				1407	}
				1408
				1409	return false;
				1410	}
				1411
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1412	static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
				1413	for (const MachineOperand &MO : MI.implicit_operands()) {
				1414	// We only care about reads.
				1415	if (MO.isDef())
				1416	continue;
				1417
				1418	switch (MO.getReg()) {
				1419	case AMDGPU::VCC:
				1420	case AMDGPU::M0:
				1421	case AMDGPU::FLAT_SCR:
				1422	return MO.getReg();
				1423
				1424	default:
				1425	break;
				1426	}
				1427	}
				1428
				1429	return AMDGPU::NoRegister;
				1430	}
				1431
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1432	bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
				1433	StringRef &ErrInfo) const {
				1434	uint16_t Opcode = MI->getOpcode();
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1435	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1436	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
				1437	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
				1438	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
				1439
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1440	// Make sure we don't have SCC live-ins to basic blocks. moveToVALU assumes
				1441	// all SCC users are in the same blocks as their defs.
				1442	const MachineBasicBlock *MBB = MI->getParent();
				1443	if (MI == &MBB->front()) {
				1444	if (MBB->isLiveIn(AMDGPU::SCC)) {
				1445	ErrInfo = "scc register cannot be live across blocks.";
				1446	return false;
				1447	}
				1448	}
				1449
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1450	// Make sure the number of operands is correct.
				1451	const MCInstrDesc &Desc = get(Opcode);
				1452	if (!Desc.isVariadic() &&
				1453	Desc.getNumOperands() != MI->getNumExplicitOperands()) {
				1454	ErrInfo = "Instruction has wrong number of operands.";
				1455	return false;
				1456	}
				1457
Changpeng Fang	c996393	2015-12-18 20:04:28 +0000	[diff] [blame]	1458	// Make sure the register classes are correct.
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	1459	for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1460	if (MI->getOperand(i).isFPImm()) {
				1461	ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
				1462	"all fp values to integers.";
				1463	return false;
				1464	}
				1465
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1466	int RegClass = Desc.OpInfo[i].RegClass;
				1467
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1468	switch (Desc.OpInfo[i].OperandType) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1469	case MCOI::OPERAND_REGISTER:
Matt Arsenault	63bef0d	2015-02-13 02:47:22 +0000	[diff] [blame]	1470	if (MI->getOperand(i).isImm()) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	1471	ErrInfo = "Illegal immediate value for operand.";
				1472	return false;
				1473	}
				1474	break;
				1475	case AMDGPU::OPERAND_REG_IMM32:
				1476	break;
				1477	case AMDGPU::OPERAND_REG_INLINE_C:
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	1478	if (isLiteralConstant(MI->getOperand(i),
				1479	RI.getRegClass(RegClass)->getSize())) {
				1480	ErrInfo = "Illegal immediate value for operand.";
				1481	return false;
Tom Stellard	a305f93	2014-07-02 20:53:44 +0000	[diff] [blame]	1482	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1483	break;
				1484	case MCOI::OPERAND_IMMEDIATE:
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	1485	// Check if this operand is an immediate.
				1486	// FrameIndex operands will be replaced by immediates, so they are
				1487	// allowed.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1488	if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) {
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1489	ErrInfo = "Expected immediate, but got non-immediate";
				1490	return false;
				1491	}
				1492	// Fall-through
				1493	default:
				1494	continue;
				1495	}
				1496
				1497	if (!MI->getOperand(i).isReg())
				1498	continue;
				1499
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	1500	if (RegClass != -1) {
				1501	unsigned Reg = MI->getOperand(i).getReg();
				1502	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1503	continue;
				1504
				1505	const TargetRegisterClass *RC = RI.getRegClass(RegClass);
				1506	if (!RC->contains(Reg)) {
				1507	ErrInfo = "Operand has incorrect register class.";
				1508	return false;
				1509	}
				1510	}
				1511	}
				1512
				1513
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1514	// Verify VOP*
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1515	if (isVOP1(MI) \|\| isVOP2(MI) \|\| isVOP3(MI) \|\| isVOPC(MI)) {
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1516	// Only look at the true operands. Only a real operand can use the constant
				1517	// bus, and we don't want to check pseudo-operands like the source modifier
				1518	// flags.
				1519	const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
				1520
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1521	unsigned ConstantBusCount = 0;
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	1522	unsigned SGPRUsed = findImplicitSGPRRead(*MI);
				1523	if (SGPRUsed != AMDGPU::NoRegister)
				1524	++ConstantBusCount;
				1525
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1526	for (int OpIdx : OpIndices) {
				1527	if (OpIdx == -1)
				1528	break;
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	1529	const MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1530	if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1531	if (MO.isReg()) {
				1532	if (MO.getReg() != SGPRUsed)
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1533	++ConstantBusCount;
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1534	SGPRUsed = MO.getReg();
				1535	} else {
				1536	++ConstantBusCount;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1537	}
				1538	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1539	}
				1540	if (ConstantBusCount > 1) {
				1541	ErrInfo = "VOP* instruction uses the constant bus more than once";
				1542	return false;
				1543	}
				1544	}
				1545
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1546	// Verify misc. restrictions on specific instructions.
				1547	if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 \|\|
				1548	Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Matt Arsenault	262407b	2014-09-24 02:17:09 +0000	[diff] [blame]	1549	const MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1550	const MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1551	const MachineOperand &Src2 = MI->getOperand(Src2Idx);
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	1552	if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
				1553	if (!compareMachineOp(Src0, Src1) &&
				1554	!compareMachineOp(Src0, Src2)) {
				1555	ErrInfo = "v_div_scale_{f32\|f64} require src0 = src1 or src2";
				1556	return false;
				1557	}
				1558	}
				1559	}
				1560
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	1561	// Make sure we aren't losing exec uses in the td files. This mostly requires
				1562	// being careful when using let Uses to try to add other use registers.
				1563	if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
				1564	const MachineOperand *Exec = MI->findRegisterUseOperand(AMDGPU::EXEC);
				1565	if (!Exec \|\| !Exec->isImplicit()) {
				1566	ErrInfo = "VALU instruction does not implicitly read exec mask";
				1567	return false;
				1568	}
				1569	}
				1570
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	1571	return true;
				1572	}
				1573
Matt Arsenault	f14032a	2013-11-15 22:02:28 +0000	[diff] [blame]	1574	unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1575	switch (MI.getOpcode()) {
				1576	default: return AMDGPU::INSTRUCTION_LIST_END;
				1577	case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
				1578	case AMDGPU::COPY: return AMDGPU::COPY;
				1579	case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard	204e61b	2014-04-07 19:45:45 +0000	[diff] [blame]	1580	case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	1581	case AMDGPU::S_MOV_B32:
				1582	return MI.getOperand(1).isReg() ?
Tom Stellard	8c12fd9	2014-03-24 16:12:34 +0000	[diff] [blame]	1583	AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1584	case AMDGPU::S_ADD_I32:
				1585	case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1586	case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	1587	case AMDGPU::S_SUB_I32:
				1588	case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	1589	case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault	869cd07	2014-09-03 23:24:35 +0000	[diff] [blame]	1590	case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Matt Arsenault	8e2581b	2014-03-21 18:01:18 +0000	[diff] [blame]	1591	case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
				1592	case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
				1593	case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
				1594	case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
				1595	case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
				1596	case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
				1597	case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1598	case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
				1599	case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
				1600	case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
				1601	case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
				1602	case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
				1603	case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	1604	case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
				1605	case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	1606	case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
				1607	case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak	63a7b08	2015-03-24 13:40:21 +0000	[diff] [blame]	1608	case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault	43160e7	2014-06-18 17:13:57 +0000	[diff] [blame]	1609	case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault	2c33562	2014-04-09 07:16:16 +0000	[diff] [blame]	1610	case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	1611	case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	0cb92e1	2014-04-11 19:25:18 +0000	[diff] [blame]	1612	case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
				1613	case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
				1614	case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
				1615	case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
				1616	case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
				1617	case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1618	case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
				1619	case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
				1620	case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
				1621	case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
				1622	case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
				1623	case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	1624	case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault	295b86e	2014-06-17 17:36:27 +0000	[diff] [blame]	1625	case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault	8579601	2014-06-17 17:36:24 +0000	[diff] [blame]	1626	case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsak	d2af89d	2015-03-04 17:33:45 +0000	[diff] [blame]	1627	case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	1628	case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
				1629	case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1630	}
				1631	}
				1632
				1633	bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
				1634	return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
				1635	}
				1636
				1637	const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
				1638	unsigned OpNo) const {
				1639	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				1640	const MCInstrDesc &Desc = get(MI.getOpcode());
				1641	if (MI.isVariadic() \|\| OpNo >= Desc.getNumOperands() \|\|
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1642	Desc.OpInfo[OpNo].RegClass == -1) {
				1643	unsigned Reg = MI.getOperand(OpNo).getReg();
				1644
				1645	if (TargetRegisterInfo::isVirtualRegister(Reg))
				1646	return MRI.getRegClass(Reg);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1647	return RI.getPhysRegClass(Reg);
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	1648	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1649
				1650	unsigned RCID = Desc.OpInfo[OpNo].RegClass;
				1651	return RI.getRegClass(RCID);
				1652	}
				1653
				1654	bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
				1655	switch (MI.getOpcode()) {
				1656	case AMDGPU::COPY:
				1657	case AMDGPU::REG_SEQUENCE:
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	1658	case AMDGPU::PHI:
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	1659	case AMDGPU::INSERT_SUBREG:
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1660	return RI.hasVGPRs(getOpRegClass(MI, 0));
				1661	default:
				1662	return RI.hasVGPRs(getOpRegClass(MI, OpNo));
				1663	}
				1664	}
				1665
				1666	void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
				1667	MachineBasicBlock::iterator I = MI;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1668	MachineBasicBlock *MBB = MI->getParent();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1669	MachineOperand &MO = MI->getOperand(OpIdx);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1670	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1671	unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
				1672	const TargetRegisterClass *RC = RI.getRegClass(RCID);
				1673	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1674	if (MO.isReg())
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1675	Opcode = AMDGPU::COPY;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1676	else if (RI.isSGPRClass(RC))
Matt Arsenault	671a005	2013-11-14 10:08:50 +0000	[diff] [blame]	1677	Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1678
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1679
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1680	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1681	if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard	0c93c9e	2014-09-05 14:08:01 +0000	[diff] [blame]	1682	VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1683	else
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	1684	VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1685
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	1686	unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	1687	DebugLoc DL = MBB->findDebugLoc(I);
				1688	BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
				1689	.addOperand(MO);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1690	MO.ChangeToRegister(Reg, false);
				1691	}
				1692
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1693	unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
				1694	MachineRegisterInfo &MRI,
				1695	MachineOperand &SuperReg,
				1696	const TargetRegisterClass *SuperRC,
				1697	unsigned SubIdx,
				1698	const TargetRegisterClass *SubRC)
				1699	const {
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1700	MachineBasicBlock *MBB = MI->getParent();
				1701	DebugLoc DL = MI->getDebugLoc();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1702	unsigned SubReg = MRI.createVirtualRegister(SubRC);
				1703
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1704	if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
				1705	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1706	.addReg(SuperReg.getReg(), 0, SubIdx);
				1707	return SubReg;
				1708	}
				1709
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1710	// Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault	08d8494	2014-06-03 23:06:13 +0000	[diff] [blame]	1711	// value so we don't need to worry about merging its subreg index with the
				1712	// SubIdx passed to this function. The register coalescer should be able to
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1713	// eliminate this extra copy.
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	1714	unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1715
Matt Arsenault	7480a0e	2014-11-17 21:11:37 +0000	[diff] [blame]	1716	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
				1717	.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
				1718
				1719	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				1720	.addReg(NewSuperReg, 0, SubIdx);
				1721
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	1722	return SubReg;
				1723	}
				1724
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	1725	MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
				1726	MachineBasicBlock::iterator MII,
				1727	MachineRegisterInfo &MRI,
				1728	MachineOperand &Op,
				1729	const TargetRegisterClass *SuperRC,
				1730	unsigned SubIdx,
				1731	const TargetRegisterClass *SubRC) const {
				1732	if (Op.isImm()) {
				1733	// XXX - Is there a better way to do this?
				1734	if (SubIdx == AMDGPU::sub0)
				1735	return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
				1736	if (SubIdx == AMDGPU::sub1)
				1737	return MachineOperand::CreateImm(Op.getImm() >> 32);
				1738
				1739	llvm_unreachable("Unhandled register index for immediate");
				1740	}
				1741
				1742	unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
				1743	SubIdx, SubRC);
				1744	return MachineOperand::CreateReg(SubReg, false);
				1745	}
				1746
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	1747	// Change the order of operands from (0, 1, 2) to (0, 2, 1)
				1748	void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
				1749	assert(Inst->getNumExplicitOperands() == 3);
				1750	MachineOperand Op1 = Inst->getOperand(1);
				1751	Inst->RemoveOperand(1);
				1752	Inst->addOperand(Op1);
				1753	}
				1754
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1755	bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
				1756	const MCOperandInfo &OpInfo,
				1757	const MachineOperand &MO) const {
				1758	if (!MO.isReg())
				1759	return false;
				1760
				1761	unsigned Reg = MO.getReg();
				1762	const TargetRegisterClass *RC =
				1763	TargetRegisterInfo::isVirtualRegister(Reg) ?
				1764	MRI.getRegClass(Reg) :
				1765	RI.getPhysRegClass(Reg);
				1766
Nicolai Haehnle	82fc962	2016-01-07 17:10:29 +0000	[diff] [blame]	1767	const SIRegisterInfo *TRI =
				1768	static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
				1769	RC = TRI->getSubRegClass(RC, MO.getSubReg());
				1770
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1771	// In order to be legal, the common sub-class must be equal to the
				1772	// class of the current operand. For example:
				1773	//
				1774	// v_mov_b32 s0 ; Operand defined as vsrc_32
				1775	// ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
				1776	//
				1777	// s_sendmsg 0, s0 ; Operand defined as m0reg
				1778	// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
				1779
				1780	return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
				1781	}
				1782
				1783	bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
				1784	const MCOperandInfo &OpInfo,
				1785	const MachineOperand &MO) const {
				1786	if (MO.isReg())
				1787	return isLegalRegOperand(MRI, OpInfo, MO);
				1788
				1789	// Handle non-register types that are treated like immediates.
				1790	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
				1791	return true;
				1792	}
				1793
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1794	bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
				1795	const MachineOperand *MO) const {
				1796	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1797	const MCInstrDesc &InstDesc = MI->getDesc();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1798	const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
				1799	const TargetRegisterClass *DefinedRC =
				1800	OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
				1801	if (!MO)
				1802	MO = &MI->getOperand(OpIdx);
				1803
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	1804	if (isVALU(*MI) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1805	usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1806
				1807	RegSubRegPair SGPRUsed;
				1808	if (MO->isReg())
				1809	SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
				1810
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1811	for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
				1812	if (i == OpIdx)
				1813	continue;
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1814	const MachineOperand &Op = MI->getOperand(i);
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	1815	if (Op.isReg() &&
				1816	(Op.getReg() != SGPRUsed.Reg \|\| Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	1817	usesConstantBus(MRI, Op, getOpSize(*MI, i))) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1818	return false;
				1819	}
				1820	}
				1821	}
				1822
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1823	if (MO->isReg()) {
				1824	assert(DefinedRC);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1825	return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1826	}
				1827
				1828
				1829	// Handle non-register types that are treated like immediates.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	1830	assert(MO->isImm() \|\| MO->isTargetIndex() \|\| MO->isFI());
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1831
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	1832	if (!DefinedRC) {
				1833	// This operand expects an immediate.
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1834	return true;
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	1835	}
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1836
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	1837	return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	1838	}
				1839
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	1840	void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
				1841	MachineInstr *MI) const {
				1842	unsigned Opc = MI->getOpcode();
				1843	const MCInstrDesc &InstrDesc = get(Opc);
				1844
				1845	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1846	MachineOperand &Src1 = MI->getOperand(Src1Idx);
				1847
				1848	// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
				1849	// we need to only have one constant bus use.
				1850	//
				1851	// Note we do not need to worry about literal constants here. They are
				1852	// disabled for the operand type for instructions because they will always
				1853	// violate the one constant bus use rule.
				1854	bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
				1855	if (HasImplicitSGPR) {
				1856	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1857	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1858
				1859	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
				1860	legalizeOpWithMove(MI, Src0Idx);
				1861	}
				1862
				1863	// VOP2 src0 instructions support all operand types, so we don't need to check
				1864	// their legality. If src1 is already legal, we don't need to do anything.
				1865	if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
				1866	return;
				1867
				1868	// We do not use commuteInstruction here because it is too aggressive and will
				1869	// commute if it is possible. We only want to commute here if it improves
				1870	// legality. This can be called a fairly large number of times so don't waste
				1871	// compile time pointlessly swapping and checking legality again.
				1872	if (HasImplicitSGPR \|\| !MI->isCommutable()) {
				1873	legalizeOpWithMove(MI, Src1Idx);
				1874	return;
				1875	}
				1876
				1877	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1878	MachineOperand &Src0 = MI->getOperand(Src0Idx);
				1879
				1880	// If src0 can be used as src1, commuting will make the operands legal.
				1881	// Otherwise we have to give up and insert a move.
				1882	//
				1883	// TODO: Other immediate-like operand kinds could be commuted if there was a
				1884	// MachineOperand::ChangeTo* for them.
				1885	if ((!Src1.isImm() && !Src1.isReg()) \|\|
				1886	!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
				1887	legalizeOpWithMove(MI, Src1Idx);
				1888	return;
				1889	}
				1890
				1891	int CommutedOpc = commuteOpcode(*MI);
				1892	if (CommutedOpc == -1) {
				1893	legalizeOpWithMove(MI, Src1Idx);
				1894	return;
				1895	}
				1896
				1897	MI->setDesc(get(CommutedOpc));
				1898
				1899	unsigned Src0Reg = Src0.getReg();
				1900	unsigned Src0SubReg = Src0.getSubReg();
				1901	bool Src0Kill = Src0.isKill();
				1902
				1903	if (Src1.isImm())
				1904	Src0.ChangeToImmediate(Src1.getImm());
				1905	else if (Src1.isReg()) {
				1906	Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
				1907	Src0.setSubReg(Src1.getSubReg());
				1908	} else
				1909	llvm_unreachable("Should only have register or immediate operands");
				1910
				1911	Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
				1912	Src1.setSubReg(Src0SubReg);
				1913	}
				1914
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	1915	// Legalize VOP3 operands. Because all operand types are supported for any
				1916	// operand, and since literal constants are not allowed and should never be
				1917	// seen, we only need to worry about inserting copies if we use multiple SGPR
				1918	// operands.
				1919	void SIInstrInfo::legalizeOperandsVOP3(
				1920	MachineRegisterInfo &MRI,
				1921	MachineInstr *MI) const {
				1922	unsigned Opc = MI->getOpcode();
				1923
				1924	int VOP3Idx[3] = {
				1925	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
				1926	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
				1927	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
				1928	};
				1929
				1930	// Find the one SGPR operand we are allowed to use.
				1931	unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
				1932
				1933	for (unsigned i = 0; i < 3; ++i) {
				1934	int Idx = VOP3Idx[i];
				1935	if (Idx == -1)
				1936	break;
				1937	MachineOperand &MO = MI->getOperand(Idx);
				1938
				1939	// We should never see a VOP3 instruction with an illegal immediate operand.
				1940	if (!MO.isReg())
				1941	continue;
				1942
				1943	if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
				1944	continue; // VGPRs are legal
				1945
				1946	if (SGPRReg == AMDGPU::NoRegister \|\| SGPRReg == MO.getReg()) {
				1947	SGPRReg = MO.getReg();
				1948	// We can use one SGPR in each VOP3 instruction.
				1949	continue;
				1950	}
				1951
				1952	// If we make it this far, then the operand is not legal and we must
				1953	// legalize it.
				1954	legalizeOpWithMove(MI, Idx);
				1955	}
				1956	}
				1957
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	1958	unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI,
				1959	MachineRegisterInfo &MRI) const {
				1960	const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
				1961	const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
				1962	unsigned DstReg = MRI.createVirtualRegister(SRC);
				1963	unsigned SubRegs = VRC->getSize() / 4;
				1964
				1965	SmallVector<unsigned, 8> SRegs;
				1966	for (unsigned i = 0; i < SubRegs; ++i) {
				1967	unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				1968	BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
				1969	get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
				1970	.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
				1971	SRegs.push_back(SGPR);
				1972	}
				1973
				1974	MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI,
				1975	UseMI->getDebugLoc(),
				1976	get(AMDGPU::REG_SEQUENCE), DstReg);
				1977	for (unsigned i = 0; i < SubRegs; ++i) {
				1978	MIB.addReg(SRegs[i]);
				1979	MIB.addImm(RI.getSubRegFromChannel(i));
				1980	}
				1981	return DstReg;
				1982	}
				1983
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame^]	1984	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
				1985	MachineInstr *MI) const {
				1986
				1987	// If the pointer is store in VGPRs, then we need to move them to
				1988	// SGPRs using v_readfirstlane. This is safe because we only select
				1989	// loads with uniform pointers to SMRD instruction so we know the
				1990	// pointer value is uniform.
				1991	MachineOperand SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
				1992	if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
				1993	unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
				1994	SBase->setReg(SGPR);
				1995	}
				1996	}
				1997
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1998	void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
				1999	MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2000
				2001	// Legalize VOP2
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2002	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	2003	legalizeOperandsVOP2(MRI, MI);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	2004	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2005	}
				2006
				2007	// Legalize VOP3
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	2008	if (isVOP3(*MI)) {
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	2009	legalizeOperandsVOP3(MRI, MI);
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2010	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2011	}
				2012
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame^]	2013	// Legalize SMRD
				2014	if (isSMRD(*MI)) {
				2015	legalizeOperandsSMRD(MRI, MI);
				2016	return;
				2017	}
				2018
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2019	// Legalize REG_SEQUENCE and PHI
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2020	// The register class of the operands much be the same type as the register
				2021	// class of the output.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2022	if (MI->getOpcode() == AMDGPU::PHI) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	2023	const TargetRegisterClass RC = nullptr, SRC = nullptr, *VRC = nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2024	for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
				2025	if (!MI->getOperand(i).isReg() \|\|
				2026	!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
				2027	continue;
				2028	const TargetRegisterClass *OpRC =
				2029	MRI.getRegClass(MI->getOperand(i).getReg());
				2030	if (RI.hasVGPRs(OpRC)) {
				2031	VRC = OpRC;
				2032	} else {
				2033	SRC = OpRC;
				2034	}
				2035	}
				2036
				2037	// If any of the operands are VGPR registers, then they all most be
				2038	// otherwise we will create illegal VGPR->SGPR copies when legalizing
				2039	// them.
				2040	if (VRC \|\| !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
				2041	if (!VRC) {
				2042	assert(SRC);
				2043	VRC = RI.getEquivalentVGPRClass(SRC);
				2044	}
				2045	RC = VRC;
				2046	} else {
				2047	RC = SRC;
				2048	}
				2049
				2050	// Update all the operands so they have the same type.
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2051	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2052	MachineOperand &Op = MI->getOperand(I);
				2053	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2054	continue;
				2055	unsigned DstReg = MRI.createVirtualRegister(RC);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	2056
				2057	// MI is a PHI instruction.
				2058	MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
				2059	MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
				2060
				2061	BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2062	.addOperand(Op);
				2063	Op.setReg(DstReg);
				2064	}
				2065	}
				2066
				2067	// REG_SEQUENCE doesn't really require operand legalization, but if one has a
				2068	// VGPR dest type and SGPR sources, insert copies so all operands are
				2069	// VGPRs. This seems to help operand folding / the register coalescer.
				2070	if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
				2071	MachineBasicBlock *MBB = MI->getParent();
				2072	const TargetRegisterClass DstRC = getOpRegClass(MI, 0);
				2073	if (RI.hasVGPRs(DstRC)) {
				2074	// Update all the operands so they are VGPR register classes. These may
				2075	// not be the same register class because REG_SEQUENCE supports mixing
				2076	// subregister index types e.g. sub0_sub1 + sub2 + sub3
				2077	for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
				2078	MachineOperand &Op = MI->getOperand(I);
				2079	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
				2080	continue;
				2081
				2082	const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
				2083	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
				2084	if (VRC == OpRC)
				2085	continue;
				2086
				2087	unsigned DstReg = MRI.createVirtualRegister(VRC);
				2088
				2089	BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
				2090	.addOperand(Op);
				2091
				2092	Op.setReg(DstReg);
				2093	Op.setIsKill();
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	2094	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2095	}
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	2096
				2097	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2098	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2099
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	2100	// Legalize INSERT_SUBREG
				2101	// src0 must have the same register class as dst
				2102	if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
				2103	unsigned Dst = MI->getOperand(0).getReg();
				2104	unsigned Src0 = MI->getOperand(1).getReg();
				2105	const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
				2106	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
				2107	if (DstRC != Src0RC) {
				2108	MachineBasicBlock &MBB = *MI->getParent();
				2109	unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
				2110	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
				2111	.addReg(Src0);
				2112	MI->getOperand(1).setReg(NewSrc0);
				2113	}
				2114	return;
				2115	}
				2116
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	2117	// Legalize MIMG
				2118	if (isMIMG(*MI)) {
				2119	MachineOperand SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
				2120	if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
				2121	unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
				2122	SRsrc->setReg(SGPR);
				2123	}
				2124
				2125	MachineOperand SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
				2126	if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
				2127	unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
				2128	SSamp->setReg(SGPR);
				2129	}
				2130	return;
				2131	}
				2132
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2133	// Legalize MUBUF* instructions
				2134	// FIXME: If we start using the non-addr64 instructions for compute, we
				2135	// may need to legalize them here.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2136	int SRsrcIdx =
				2137	AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
				2138	if (SRsrcIdx != -1) {
				2139	// We have an MUBUF instruction
				2140	MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
				2141	unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
				2142	if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
				2143	RI.getRegClass(SRsrcRC))) {
				2144	// The operands are legal.
				2145	// FIXME: We may need to legalize operands besided srsrc.
				2146	return;
				2147	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2148
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2149	MachineBasicBlock &MBB = *MI->getParent();
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2150
Eric Christopher	572e03a	2015-06-19 01:53:21 +0000	[diff] [blame]	2151	// Extract the ptr from the resource descriptor.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2152	unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
				2153	&AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2154
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2155	// Create an empty resource descriptor
				2156	unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				2157	unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2158	unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				2159	unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2160	uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2161
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2162	// Zero64 = 0
				2163	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
				2164	Zero64)
				2165	.addImm(0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2166
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2167	// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
				2168	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2169	SRsrcFormatLo)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2170	.addImm(RsrcDataFormat & 0xFFFFFFFF);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2171
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2172	// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
				2173	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
				2174	SRsrcFormatHi)
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2175	.addImm(RsrcDataFormat >> 32);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2176
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2177	// NewSRsrc = {Zero64, SRsrcFormat}
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2178	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
				2179	.addReg(Zero64)
				2180	.addImm(AMDGPU::sub0_sub1)
				2181	.addReg(SRsrcFormatLo)
				2182	.addImm(AMDGPU::sub2)
				2183	.addReg(SRsrcFormatHi)
				2184	.addImm(AMDGPU::sub3);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2185
				2186	MachineOperand VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
				2187	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2188	if (VAddr) {
				2189	// This is already an ADDR64 instruction so we need to add the pointer
				2190	// extracted from the resource descriptor to the current value of VAddr.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2191	unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2192	unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2193
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2194	// NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2195	DebugLoc DL = MI->getDebugLoc();
				2196	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2197	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2198	.addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2199
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2200	// NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2201	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2202	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	2203	.addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2204
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2205	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2206	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2207	.addReg(NewVAddrLo)
				2208	.addImm(AMDGPU::sub0)
				2209	.addReg(NewVAddrHi)
				2210	.addImm(AMDGPU::sub1);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2211	} else {
				2212	// This instructions is the _OFFSET variant, so we need to convert it to
				2213	// ADDR64.
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2214	assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
				2215	< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
				2216	"FIXME: Need to emit flat atomics here");
				2217
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2218	MachineOperand VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
				2219	MachineOperand Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				2220	MachineOperand SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2221	unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	2222
				2223	// Atomics rith return have have an additional tied operand and are
				2224	// missing some of the special bits.
				2225	MachineOperand VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
				2226	MachineInstr *Addr64;
				2227
				2228	if (!VDataIn) {
				2229	// Regular buffer load / store.
				2230	MachineInstrBuilder MIB
				2231	= BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2232	.addOperand(*VData)
				2233	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2234	// This will be replaced later
				2235	// with the new value of vaddr.
				2236	.addOperand(*SRsrc)
				2237	.addOperand(*SOffset)
				2238	.addOperand(*Offset);
				2239
				2240	// Atomics do not have this operand.
				2241	if (const MachineOperand *GLC
				2242	= getNamedOperand(*MI, AMDGPU::OpName::glc)) {
				2243	MIB.addImm(GLC->getImm());
				2244	}
				2245
				2246	MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
				2247
				2248	if (const MachineOperand *TFE
				2249	= getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
				2250	MIB.addImm(TFE->getImm());
				2251	}
				2252
				2253	MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2254	Addr64 = MIB;
				2255	} else {
				2256	// Atomics with return.
				2257	Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
				2258	.addOperand(*VData)
				2259	.addOperand(*VDataIn)
				2260	.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
				2261	// This will be replaced later
				2262	// with the new value of vaddr.
				2263	.addOperand(*SRsrc)
				2264	.addOperand(*SOffset)
				2265	.addOperand(*Offset)
				2266	.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
				2267	.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
				2268	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2269
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2270	MI->removeFromParent();
				2271	MI = Addr64;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2272
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	2273	// NewVaddr = {NewVaddrHi, NewVaddrLo}
				2274	BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				2275	.addReg(SRsrcPtr, 0, AMDGPU::sub0)
				2276	.addImm(AMDGPU::sub0)
				2277	.addReg(SRsrcPtr, 0, AMDGPU::sub1)
				2278	.addImm(AMDGPU::sub1);
				2279
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2280	VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
				2281	SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2282	}
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2283
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	2284	// Update the instruction to use NewVaddr
				2285	VAddr->setReg(NewVAddr);
				2286	// Update the instruction to use NewSRsrc
				2287	SRsrc->setReg(NewSRsrc);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2288	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2289	}
				2290
				2291	void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
				2292	SmallVector<MachineInstr *, 128> Worklist;
				2293	Worklist.push_back(&TopInst);
				2294
				2295	while (!Worklist.empty()) {
				2296	MachineInstr *Inst = Worklist.pop_back_val();
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2297	MachineBasicBlock *MBB = Inst->getParent();
				2298	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				2299
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2300	unsigned Opcode = Inst->getOpcode();
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2301	unsigned NewOpcode = getVALUOp(*Inst);
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2302
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2303	// Handle some special cases
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2304	switch (Opcode) {
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2305	default:
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	2306	break;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2307	case AMDGPU::S_AND_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2308	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2309	Inst->eraseFromParent();
				2310	continue;
				2311
				2312	case AMDGPU::S_OR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2313	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2314	Inst->eraseFromParent();
				2315	continue;
				2316
				2317	case AMDGPU::S_XOR_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2318	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2319	Inst->eraseFromParent();
				2320	continue;
				2321
				2322	case AMDGPU::S_NOT_B64:
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2323	splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2324	Inst->eraseFromParent();
				2325	continue;
				2326
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2327	case AMDGPU::S_BCNT1_I32_B64:
				2328	splitScalar64BitBCNT(Worklist, Inst);
				2329	Inst->eraseFromParent();
				2330	continue;
				2331
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2332	case AMDGPU::S_BFE_I64: {
				2333	splitScalar64BitBFE(Worklist, Inst);
				2334	Inst->eraseFromParent();
				2335	continue;
				2336	}
				2337
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2338	case AMDGPU::S_LSHL_B32:
				2339	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2340	NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
				2341	swapOperands(Inst);
				2342	}
				2343	break;
				2344	case AMDGPU::S_ASHR_I32:
				2345	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2346	NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
				2347	swapOperands(Inst);
				2348	}
				2349	break;
				2350	case AMDGPU::S_LSHR_B32:
				2351	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2352	NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
				2353	swapOperands(Inst);
				2354	}
				2355	break;
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	2356	case AMDGPU::S_LSHL_B64:
				2357	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2358	NewOpcode = AMDGPU::V_LSHLREV_B64;
				2359	swapOperands(Inst);
				2360	}
				2361	break;
				2362	case AMDGPU::S_ASHR_I64:
				2363	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2364	NewOpcode = AMDGPU::V_ASHRREV_I64;
				2365	swapOperands(Inst);
				2366	}
				2367	break;
				2368	case AMDGPU::S_LSHR_B64:
				2369	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
				2370	NewOpcode = AMDGPU::V_LSHRREV_B64;
				2371	swapOperands(Inst);
				2372	}
				2373	break;
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	2374
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2375	case AMDGPU::S_ABS_I32:
				2376	lowerScalarAbs(Worklist, Inst);
				2377	Inst->eraseFromParent();
				2378	continue;
				2379
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2380	case AMDGPU::S_CBRANCH_SCC0:
				2381	case AMDGPU::S_CBRANCH_SCC1:
				2382	// Clear unused bits of vcc
				2383	BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC)
				2384	.addReg(AMDGPU::EXEC)
				2385	.addReg(AMDGPU::VCC);
				2386	break;
				2387
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2388	case AMDGPU::S_BFE_U64:
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2389	case AMDGPU::S_BFM_B64:
				2390	llvm_unreachable("Moving this op to VALU not implemented");
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	2391	}
				2392
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2393	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
				2394	// We cannot move this instruction to the VALU, so we should try to
				2395	// legalize its operands instead.
				2396	legalizeOperands(Inst);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2397	continue;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	2398	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2399
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2400	// Use the new VALU Opcode.
				2401	const MCInstrDesc &NewDesc = get(NewOpcode);
				2402	Inst->setDesc(NewDesc);
				2403
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2404	// Remove any references to SCC. Vector instructions can't read from it, and
				2405	// We're just about to add the implicit use / defs of VCC, and we don't want
				2406	// both.
				2407	for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
				2408	MachineOperand &Op = Inst->getOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2409	if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2410	Inst->RemoveOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2411	addSCCDefUsersToVALUWorklist(Inst, Worklist);
				2412	}
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	2413	}
				2414
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	2415	if (Opcode == AMDGPU::S_SEXT_I32_I8 \|\| Opcode == AMDGPU::S_SEXT_I32_I16) {
				2416	// We are converting these to a BFE, so we need to add the missing
				2417	// operands for the size and offset.
				2418	unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
				2419	Inst->addOperand(MachineOperand::CreateImm(0));
				2420	Inst->addOperand(MachineOperand::CreateImm(Size));
				2421
Matt Arsenault	b5b5110	2014-06-10 19:18:21 +0000	[diff] [blame]	2422	} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
				2423	// The VALU version adds the second operand to the result, so insert an
				2424	// extra 0 operand.
				2425	Inst->addOperand(MachineOperand::CreateImm(0));
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2426	}
				2427
Alex Lorenz	b4d0d6a	2015-07-31 23:30:09 +0000	[diff] [blame]	2428	Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2429
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2430	if (Opcode == AMDGPU::S_BFE_I32 \|\| Opcode == AMDGPU::S_BFE_U32) {
				2431	const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
				2432	// If we need to move this to VGPRs, we need to unpack the second operand
				2433	// back into the 2 separate ones for bit offset and width.
				2434	assert(OffsetWidthOp.isImm() &&
				2435	"Scalar BFE is only implemented for constant width and offset");
				2436	uint32_t Imm = OffsetWidthOp.getImm();
				2437
				2438	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2439	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2440	Inst->RemoveOperand(2); // Remove old immediate.
				2441	Inst->addOperand(MachineOperand::CreateImm(Offset));
Vincent Lejeune	94af31f	2014-05-10 19:18:33 +0000	[diff] [blame]	2442	Inst->addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	2443	}
				2444
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2445	bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef();
				2446	unsigned NewDstReg = AMDGPU::NoRegister;
				2447	if (HasDst) {
				2448	// Update the destination register class.
				2449	const TargetRegisterClass NewDstRC = getDestEquivalentVGPRClass(Inst);
				2450	if (!NewDstRC)
				2451	continue;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2452
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2453	unsigned DstReg = Inst->getOperand(0).getReg();
				2454	NewDstReg = MRI.createVirtualRegister(NewDstRC);
				2455	MRI.replaceRegWith(DstReg, NewDstReg);
				2456	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2457
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	2458	// Legalize the operands
				2459	legalizeOperands(Inst);
				2460
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2461	if (HasDst)
				2462	addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	2463	}
				2464	}
				2465
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2466	//===----------------------------------------------------------------------===//
				2467	// Indirect addressing callbacks
				2468	//===----------------------------------------------------------------------===//
				2469
Tom Stellard	26a3b67	2013-10-22 18:19:10 +0000	[diff] [blame]	2470	const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	2471	return &AMDGPU::VGPR_32RegClass;
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2472	}
				2473
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	2474	void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
				2475	MachineInstr *Inst) const {
				2476	MachineBasicBlock &MBB = *Inst->getParent();
				2477	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2478	MachineBasicBlock::iterator MII = Inst;
				2479	DebugLoc DL = Inst->getDebugLoc();
				2480
				2481	MachineOperand &Dest = Inst->getOperand(0);
				2482	MachineOperand &Src = Inst->getOperand(1);
				2483	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2484	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2485
				2486	BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
				2487	.addImm(0)
				2488	.addReg(Src.getReg());
				2489
				2490	BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
				2491	.addReg(Src.getReg())
				2492	.addReg(TmpReg);
				2493
				2494	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2495	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				2496	}
				2497
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2498	void SIInstrInfo::splitScalar64BitUnaryOp(
				2499	SmallVectorImpl<MachineInstr *> &Worklist,
				2500	MachineInstr *Inst,
				2501	unsigned Opcode) const {
				2502	MachineBasicBlock &MBB = *Inst->getParent();
				2503	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2504
				2505	MachineOperand &Dest = Inst->getOperand(0);
				2506	MachineOperand &Src0 = Inst->getOperand(1);
				2507	DebugLoc DL = Inst->getDebugLoc();
				2508
				2509	MachineBasicBlock::iterator MII = Inst;
				2510
				2511	const MCInstrDesc &InstDesc = get(Opcode);
				2512	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2513	MRI.getRegClass(Src0.getReg()) :
				2514	&AMDGPU::SGPR_32RegClass;
				2515
				2516	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2517
				2518	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2519	AMDGPU::sub0, Src0SubRC);
				2520
				2521	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2522	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2523	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2524
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2525	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
				2526	BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2527	.addOperand(SrcReg0Sub0);
				2528
				2529	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2530	AMDGPU::sub1, Src0SubRC);
				2531
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2532	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
				2533	BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2534	.addOperand(SrcReg0Sub1);
				2535
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2536	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2537	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2538	.addReg(DestSub0)
				2539	.addImm(AMDGPU::sub0)
				2540	.addReg(DestSub1)
				2541	.addImm(AMDGPU::sub1);
				2542
				2543	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2544
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2545	// We don't need to legalizeOperands here because for a single operand, src0
				2546	// will support any kind of input.
				2547
				2548	// Move all users of this moved value.
				2549	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	2550	}
				2551
				2552	void SIInstrInfo::splitScalar64BitBinaryOp(
				2553	SmallVectorImpl<MachineInstr *> &Worklist,
				2554	MachineInstr *Inst,
				2555	unsigned Opcode) const {
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2556	MachineBasicBlock &MBB = *Inst->getParent();
				2557	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2558
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2559	MachineOperand &Dest = Inst->getOperand(0);
				2560	MachineOperand &Src0 = Inst->getOperand(1);
				2561	MachineOperand &Src1 = Inst->getOperand(2);
				2562	DebugLoc DL = Inst->getDebugLoc();
				2563
				2564	MachineBasicBlock::iterator MII = Inst;
				2565
				2566	const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2567	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				2568	MRI.getRegClass(Src0.getReg()) :
				2569	&AMDGPU::SGPR_32RegClass;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2570
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2571	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				2572	const TargetRegisterClass *Src1RC = Src1.isReg() ?
				2573	MRI.getRegClass(Src1.getReg()) :
				2574	&AMDGPU::SGPR_32RegClass;
				2575
				2576	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				2577
				2578	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2579	AMDGPU::sub0, Src0SubRC);
				2580	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2581	AMDGPU::sub0, Src1SubRC);
				2582
				2583	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2584	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				2585	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2586
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2587	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2588	MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2589	.addOperand(SrcReg0Sub0)
				2590	.addOperand(SrcReg1Sub0);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2591
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	2592	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				2593	AMDGPU::sub1, Src0SubRC);
				2594	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				2595	AMDGPU::sub1, Src1SubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2596
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2597	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2598	MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	2599	.addOperand(SrcReg0Sub1)
				2600	.addOperand(SrcReg1Sub1);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2601
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2602	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2603	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				2604	.addReg(DestSub0)
				2605	.addImm(AMDGPU::sub0)
				2606	.addReg(DestSub1)
				2607	.addImm(AMDGPU::sub1);
				2608
				2609	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				2610
				2611	// Try to legalize the operands in case we need to swap the order to keep it
				2612	// valid.
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2613	legalizeOperands(LoHalf);
				2614	legalizeOperands(HiHalf);
				2615
				2616	// Move all users of this moved vlaue.
				2617	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	2618	}
				2619
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2620	void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
				2621	MachineInstr *Inst) const {
				2622	MachineBasicBlock &MBB = *Inst->getParent();
				2623	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2624
				2625	MachineBasicBlock::iterator MII = Inst;
				2626	DebugLoc DL = Inst->getDebugLoc();
				2627
				2628	MachineOperand &Dest = Inst->getOperand(0);
				2629	MachineOperand &Src = Inst->getOperand(1);
				2630
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	2631	const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2632	const TargetRegisterClass *SrcRC = Src.isReg() ?
				2633	MRI.getRegClass(Src.getReg()) :
				2634	&AMDGPU::SGPR_32RegClass;
				2635
				2636	unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2637	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2638
				2639	const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
				2640
				2641	MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2642	AMDGPU::sub0, SrcSubRC);
				2643	MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				2644	AMDGPU::sub1, SrcSubRC);
				2645
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2646	BuildMI(MBB, MII, DL, InstDesc, MidReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2647	.addOperand(SrcRegSub0)
				2648	.addImm(0);
				2649
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2650	BuildMI(MBB, MII, DL, InstDesc, ResultReg)
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2651	.addOperand(SrcRegSub1)
				2652	.addReg(MidReg);
				2653
				2654	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				2655
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	2656	// We don't need to legalize operands here. src0 for etiher instruction can be
				2657	// an SGPR, and the second input is unused or determined here.
				2658	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	2659	}
				2660
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2661	void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
				2662	MachineInstr *Inst) const {
				2663	MachineBasicBlock &MBB = *Inst->getParent();
				2664	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				2665	MachineBasicBlock::iterator MII = Inst;
				2666	DebugLoc DL = Inst->getDebugLoc();
				2667
				2668	MachineOperand &Dest = Inst->getOperand(0);
				2669	uint32_t Imm = Inst->getOperand(2).getImm();
				2670	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				2671	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
				2672
Matt Arsenault	6ad3426	2014-11-14 18:40:49 +0000	[diff] [blame]	2673	(void) Offset;
				2674
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2675	// Only sext_inreg cases handled.
				2676	assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
				2677	BitWidth <= 32 &&
				2678	Offset == 0 &&
				2679	"Not implemented");
				2680
				2681	if (BitWidth < 32) {
				2682	unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2683	unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2684	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2685
				2686	BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
				2687	.addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
				2688	.addImm(0)
				2689	.addImm(BitWidth);
				2690
				2691	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
				2692	.addImm(31)
				2693	.addReg(MidRegLo);
				2694
				2695	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2696	.addReg(MidRegLo)
				2697	.addImm(AMDGPU::sub0)
				2698	.addReg(MidRegHi)
				2699	.addImm(AMDGPU::sub1);
				2700
				2701	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2702	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2703	return;
				2704	}
				2705
				2706	MachineOperand &Src = Inst->getOperand(1);
				2707	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				2708	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				2709
				2710	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
				2711	.addImm(31)
				2712	.addReg(Src.getReg(), 0, AMDGPU::sub0);
				2713
				2714	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				2715	.addReg(Src.getReg(), 0, AMDGPU::sub0)
				2716	.addImm(AMDGPU::sub0)
				2717	.addReg(TmpReg)
				2718	.addImm(AMDGPU::sub1);
				2719
				2720	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	2721	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	2722	}
				2723
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	2724	void SIInstrInfo::addUsersToMoveToVALUWorklist(
				2725	unsigned DstReg,
				2726	MachineRegisterInfo &MRI,
				2727	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2728	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
				2729	E = MRI.use_end(); I != E; ++I) {
				2730	MachineInstr &UseMI = *I->getParent();
				2731	if (!canReadVGPR(UseMI, I.getOperandNo())) {
				2732	Worklist.push_back(&UseMI);
				2733	}
				2734	}
				2735	}
				2736
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	2737	void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst,
				2738	SmallVectorImpl<MachineInstr *> &Worklist) const {
				2739	// This assumes that all the users of SCC are in the same block
				2740	// as the SCC def.
				2741	for (MachineBasicBlock::iterator I = SCCDefInst,
				2742	E = SCCDefInst->getParent()->end(); I != E; ++I) {
				2743
				2744	// Exit if we find another SCC def.
				2745	if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
				2746	return;
				2747
				2748	if (I->findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
				2749	Worklist.push_back(I);
				2750	}
				2751	}
				2752
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	2753	const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
				2754	const MachineInstr &Inst) const {
				2755	const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
				2756
				2757	switch (Inst.getOpcode()) {
				2758	// For target instructions, getOpRegClass just returns the virtual register
				2759	// class associated with the operand, so we need to find an equivalent VGPR
				2760	// register class in order to move the instruction to the VALU.
				2761	case AMDGPU::COPY:
				2762	case AMDGPU::PHI:
				2763	case AMDGPU::REG_SEQUENCE:
				2764	case AMDGPU::INSERT_SUBREG:
				2765	if (RI.hasVGPRs(NewDstRC))
				2766	return nullptr;
				2767
				2768	NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
				2769	if (!NewDstRC)
				2770	return nullptr;
				2771	return NewDstRC;
				2772	default:
				2773	return NewDstRC;
				2774	}
				2775	}
				2776
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2777	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2778	unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
				2779	int OpIndices[3]) const {
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2780	const MCInstrDesc &Desc = MI->getDesc();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2781
				2782	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2783	//
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2784	// First we need to consider the instruction's operand requirements before
				2785	// legalizing. Some operands are required to be SGPRs, such as implicit uses
				2786	// of VCC, but we are still bound by the constant bus requirement to only use
				2787	// one.
				2788	//
				2789	// If the operand's class is an SGPR, we can never move it.
				2790
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2791	unsigned SGPRReg = findImplicitSGPRRead(*MI);
				2792	if (SGPRReg != AMDGPU::NoRegister)
				2793	return SGPRReg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2794
				2795	unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
				2796	const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				2797
				2798	for (unsigned i = 0; i < 3; ++i) {
				2799	int Idx = OpIndices[i];
				2800	if (Idx == -1)
				2801	break;
				2802
				2803	const MachineOperand &MO = MI->getOperand(Idx);
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2804	if (!MO.isReg())
				2805	continue;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2806
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2807	// Is this operand statically required to be an SGPR based on the operand
				2808	// constraints?
				2809	const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
				2810	bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
				2811	if (IsRequiredSGPR)
				2812	return MO.getReg();
				2813
				2814	// If this could be a VGPR or an SGPR, Check the dynamic register class.
				2815	unsigned Reg = MO.getReg();
				2816	const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
				2817	if (RI.isSGPRClass(RegRC))
				2818	UsedSGPRs[i] = Reg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2819	}
				2820
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2821	// We don't have a required SGPR operand, so we have a bit more freedom in
				2822	// selecting operands to move.
				2823
				2824	// Try to select the most used SGPR. If an SGPR is equal to one of the
				2825	// others, we choose that.
				2826	//
				2827	// e.g.
				2828	// V_FMA_F32 v0, s0, s0, s0 -> No moves
				2829	// V_FMA_F32 v0, s0, s1, s0 -> Move s1
				2830
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	2831	// TODO: If some of the operands are 64-bit SGPRs and some 32, we should
				2832	// prefer those.
				2833
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	2834	if (UsedSGPRs[0] != AMDGPU::NoRegister) {
				2835	if (UsedSGPRs[0] == UsedSGPRs[1] \|\| UsedSGPRs[0] == UsedSGPRs[2])
				2836	SGPRReg = UsedSGPRs[0];
				2837	}
				2838
				2839	if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
				2840	if (UsedSGPRs[1] == UsedSGPRs[2])
				2841	SGPRReg = UsedSGPRs[1];
				2842	}
				2843
				2844	return SGPRReg;
				2845	}
				2846
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2847	void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
				2848	const MachineFunction &MF) const {
				2849	int End = getIndirectIndexEnd(MF);
				2850	int Begin = getIndirectIndexBegin(MF);
				2851
				2852	if (End == -1)
				2853	return;
				2854
				2855
				2856	for (int Index = Begin; Index <= End; ++Index)
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	2857	Reserved.set(AMDGPU::VGPR_32RegClass.getRegister(Index));
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2858
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2859	for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2860	Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
				2861
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2862	for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2863	Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
				2864
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2865	for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2866	Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
				2867
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2868	for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2869	Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
				2870
Tom Stellard	415ef6d	2013-11-13 23:58:51 +0000	[diff] [blame]	2871	for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Tom Stellard	81d871d	2013-11-13 23:36:50 +0000	[diff] [blame]	2872	Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	2873	}
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	2874
Tom Stellard	6407e1e	2014-08-01 00:32:33 +0000	[diff] [blame]	2875	MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	2876	unsigned OperandName) const {
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	2877	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
				2878	if (Idx == -1)
				2879	return nullptr;
				2880
				2881	return &MI.getOperand(Idx);
				2882	}
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2883
				2884	uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
				2885	uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	2886	if (ST.isAmdHsaOS()) {
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2887	RsrcDataFormat \|= (1ULL << 56);
				2888
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	2889	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				2890	// Set MTYPE = 2
				2891	RsrcDataFormat \|= (2ULL << 59);
				2892	}
				2893
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	2894	return RsrcDataFormat;
				2895	}
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	2896
				2897	uint64_t SIInstrInfo::getScratchRsrcWords23() const {
				2898	uint64_t Rsrc23 = getDefaultRsrcDataFormat() \|
				2899	AMDGPU::RSRC_TID_ENABLE \|
				2900	0xffffffff; // Size;
				2901
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	2902	uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
				2903
				2904	Rsrc23 \|= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
				2905
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	2906	// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
				2907	// Clear them unless we want a huge stride.
				2908	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
				2909	Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
				2910
				2911	return Rsrc23;
				2912	}
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	2913
				2914	bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
				2915	unsigned Opc = MI->getOpcode();
				2916
				2917	return isSMRD(Opc);
				2918	}
				2919
				2920	bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
				2921	unsigned Opc = MI->getOpcode();
				2922
				2923	return isMUBUF(Opc) \|\| isMTBUF(Opc) \|\| isMIMG(Opc);
				2924	}
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	2925
				2926	ArrayRef<std::pair<int, const char *>>
				2927	SIInstrInfo::getSerializableTargetIndices() const {
				2928	static const std::pair<int, const char *> TargetIndices[] = {
				2929	{AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
				2930	{AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
				2931	{AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
				2932	{AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
				2933	{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
				2934	return makeArrayRef(TargetIndices);
				2935	}