Blame - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp - toolchain/llvm-project

blob: 7f7f1807987ab214d940a7f0cb5e108fc59ba43e [file] [log] [blame]

Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1	//===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	11	/// SI Implementation of TargetInstrInfo.
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	12	//
				13	//===----------------------------------------------------------------------===//
				14
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	15	#include "SIInstrInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	16	#include "AMDGPU.h"
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	17	#include "AMDGPUIntrinsicInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	18	#include "AMDGPUSubtarget.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	19	#include "GCNHazardRecognizer.h"
Tom Stellard	16a9a20	2013-08-14 23:24:17 +0000	[diff] [blame]	20	#include "SIDefines.h"
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	21	#include "SIMachineFunctionInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	22	#include "SIRegisterInfo.h"
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	23	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	24	#include "Utils/AMDGPUBaseInfo.h"
				25	#include "llvm/ADT/APInt.h"
				26	#include "llvm/ADT/ArrayRef.h"
				27	#include "llvm/ADT/SmallVector.h"
				28	#include "llvm/ADT/StringRef.h"
				29	#include "llvm/ADT/iterator_range.h"
				30	#include "llvm/Analysis/AliasAnalysis.h"
				31	#include "llvm/Analysis/MemoryLocation.h"
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	32	#include "llvm/Analysis/ValueTracking.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	33	#include "llvm/CodeGen/MachineBasicBlock.h"
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	34	#include "llvm/CodeGen/MachineDominators.h"
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	35	#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	36	#include "llvm/CodeGen/MachineFunction.h"
				37	#include "llvm/CodeGen/MachineInstr.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	38	#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	39	#include "llvm/CodeGen/MachineInstrBundle.h"
				40	#include "llvm/CodeGen/MachineMemOperand.h"
				41	#include "llvm/CodeGen/MachineOperand.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	42	#include "llvm/CodeGen/MachineRegisterInfo.h"
Chandler Carruth	6bda14b	2017-06-06 11:49:48 +0000	[diff] [blame]	43	#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	44	#include "llvm/CodeGen/ScheduleDAG.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	45	#include "llvm/CodeGen/SelectionDAGNodes.h"
David Blaikie	b3bde2e	2017-11-17 01:07:10 +0000	[diff] [blame]	46	#include "llvm/CodeGen/TargetOpcodes.h"
				47	#include "llvm/CodeGen/TargetRegisterInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	48	#include "llvm/IR/DebugLoc.h"
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	49	#include "llvm/IR/DiagnosticInfo.h"
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	50	#include "llvm/IR/Function.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	51	#include "llvm/IR/InlineAsm.h"
				52	#include "llvm/IR/LLVMContext.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	53	#include "llvm/MC/MCInstrDesc.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	54	#include "llvm/Support/Casting.h"
				55	#include "llvm/Support/CommandLine.h"
				56	#include "llvm/Support/Compiler.h"
				57	#include "llvm/Support/ErrorHandling.h"
David Blaikie	13e77db	2018-03-23 23:58:25 +0000	[diff] [blame]	58	#include "llvm/Support/MachineValueType.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	59	#include "llvm/Support/MathExtras.h"
				60	#include "llvm/Target/TargetMachine.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	61	#include <cassert>
				62	#include <cstdint>
				63	#include <iterator>
				64	#include <utility>
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	65
				66	using namespace llvm;
				67
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	68	#define GET_INSTRINFO_CTOR_DTOR
				69	#include "AMDGPUGenInstrInfo.inc"
				70
				71	namespace llvm {
				72	namespace AMDGPU {
				73	#define GET_D16ImageDimIntrinsics_IMPL
				74	#define GET_ImageDimIntrinsicTable_IMPL
				75	#define GET_RsrcIntrinsics_IMPL
				76	#include "AMDGPUGenSearchableTables.inc"
				77	}
				78	}
				79
				80
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	81	// Must be at least 4 to be able to branch over minimum unconditional branch
				82	// code. This is only for making it possible to write reasonably small tests for
				83	// long branches.
				84	static cl::opt<unsigned>
				85	BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
				86	cl::desc("Restrict range of branch instructions (DEBUG)"));
				87
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	88	SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	89	: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
				90	RI(ST), ST(ST) {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	91
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	92	//===----------------------------------------------------------------------===//
				93	// TargetInstrInfo callbacks
				94	//===----------------------------------------------------------------------===//
				95
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	96	static unsigned getNumOperandsNoGlue(SDNode *Node) {
				97	unsigned N = Node->getNumOperands();
				98	while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
				99	--N;
				100	return N;
				101	}
				102
				103	static SDValue findChainOperand(SDNode *Load) {
				104	SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
				105	assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
				106	return LastOp;
				107	}
				108
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	109	/// Returns true if both nodes have the same value for the given
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	110	/// operand \p Op, or if both nodes do not have this operand.
				111	static bool nodesHaveSameOperandValue(SDNode N0, SDNode N1, unsigned OpName) {
				112	unsigned Opc0 = N0->getMachineOpcode();
				113	unsigned Opc1 = N1->getMachineOpcode();
				114
				115	int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
				116	int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
				117
				118	if (Op0Idx == -1 && Op1Idx == -1)
				119	return true;
				120
				121
				122	if ((Op0Idx == -1 && Op1Idx != -1) \|\|
				123	(Op1Idx == -1 && Op0Idx != -1))
				124	return false;
				125
				126	// getNamedOperandIdx returns the index for the MachineInstr's operands,
				127	// which includes the result as the first operand. We are indexing into the
				128	// MachineSDNode's operands, so we need to skip the result operand to get
				129	// the real index.
				130	--Op0Idx;
				131	--Op1Idx;
				132
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	133	return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	134	}
				135
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	136	bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	137	AliasAnalysis *AA) const {
				138	// TODO: The generic check fails for VALU instructions that should be
				139	// rematerializable due to implicit reads of exec. We really want all of the
				140	// generic logic for this except for this.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	141	switch (MI.getOpcode()) {
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	142	case AMDGPU::V_MOV_B32_e32:
				143	case AMDGPU::V_MOV_B32_e64:
Matt Arsenault	80f766a	2015-09-10 01:23:28 +0000	[diff] [blame]	144	case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	145	return true;
				146	default:
				147	return false;
				148	}
				149	}
				150
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	151	bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1,
				152	int64_t &Offset0,
				153	int64_t &Offset1) const {
				154	if (!Load0->isMachineOpcode() \|\| !Load1->isMachineOpcode())
				155	return false;
				156
				157	unsigned Opc0 = Load0->getMachineOpcode();
				158	unsigned Opc1 = Load1->getMachineOpcode();
				159
				160	// Make sure both are actually loads.
				161	if (!get(Opc0).mayLoad() \|\| !get(Opc1).mayLoad())
				162	return false;
				163
				164	if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard	20fa0be	2014-10-07 21:09:20 +0000	[diff] [blame]	165
				166	// FIXME: Handle this case:
				167	if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
				168	return false;
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	169
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	170	// Check base reg.
				171	if (Load0->getOperand(1) != Load1->getOperand(1))
				172	return false;
				173
				174	// Check chain.
				175	if (findChainOperand(Load0) != findChainOperand(Load1))
				176	return false;
				177
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	178	// Skip read2 / write2 variants for simplicity.
				179	// TODO: We should report true if the used offsets are adjacent (excluded
				180	// st64 versions).
				181	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 \|\|
				182	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
				183	return false;
				184
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	185	Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
				186	Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
				187	return true;
				188	}
				189
				190	if (isSMRD(Opc0) && isSMRD(Opc1)) {
Nicolai Haehnle	ef44978	2017-04-24 16:53:52 +0000	[diff] [blame]	191	// Skip time and cache invalidation instructions.
				192	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 \|\|
				193	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
				194	return false;
				195
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	196	assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
				197
				198	// Check base reg.
				199	if (Load0->getOperand(0) != Load1->getOperand(0))
				200	return false;
				201
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	202	const ConstantSDNode *Load0Offset =
				203	dyn_cast<ConstantSDNode>(Load0->getOperand(1));
				204	const ConstantSDNode *Load1Offset =
				205	dyn_cast<ConstantSDNode>(Load1->getOperand(1));
				206
				207	if (!Load0Offset \|\| !Load1Offset)
				208	return false;
				209
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	210	// Check chain.
				211	if (findChainOperand(Load0) != findChainOperand(Load1))
				212	return false;
				213
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	214	Offset0 = Load0Offset->getZExtValue();
				215	Offset1 = Load1Offset->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	216	return true;
				217	}
				218
				219	// MUBUF and MTBUF can access the same addresses.
				220	if ((isMUBUF(Opc0) \|\| isMTBUF(Opc0)) && (isMUBUF(Opc1) \|\| isMTBUF(Opc1))) {
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	221
				222	// MUBUF and MTBUF have vaddr at different indices.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	223	if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) \|\|
				224	findChainOperand(Load0) != findChainOperand(Load1) \|\|
				225	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) \|\|
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	226	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	227	return false;
				228
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	229	int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				230	int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				231
				232	if (OffIdx0 == -1 \|\| OffIdx1 == -1)
				233	return false;
				234
				235	// getNamedOperandIdx returns the index for MachineInstrs. Since they
				236	// inlcude the output in the operand list, but SDNodes don't, we need to
				237	// subtract the index by one.
				238	--OffIdx0;
				239	--OffIdx1;
				240
				241	SDValue Off0 = Load0->getOperand(OffIdx0);
				242	SDValue Off1 = Load1->getOperand(OffIdx1);
				243
				244	// The offset might be a FrameIndexSDNode.
				245	if (!isa<ConstantSDNode>(Off0) \|\| !isa<ConstantSDNode>(Off1))
				246	return false;
				247
				248	Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
				249	Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	250	return true;
				251	}
				252
				253	return false;
				254	}
				255
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	256	static bool isStride64(unsigned Opc) {
				257	switch (Opc) {
				258	case AMDGPU::DS_READ2ST64_B32:
				259	case AMDGPU::DS_READ2ST64_B64:
				260	case AMDGPU::DS_WRITE2ST64_B32:
				261	case AMDGPU::DS_WRITE2ST64_B64:
				262	return true;
				263	default:
				264	return false;
				265	}
				266	}
				267
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	268	bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
				269	MachineOperand *&BaseOp,
				270	int64_t &Offset,
				271	const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	272	unsigned Opc = LdSt.getOpcode();
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	273
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	274	if (isDS(LdSt)) {
				275	const MachineOperand *OffsetImm =
				276	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	277	if (OffsetImm) {
				278	// Normal, single offset LDS instruction.
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	279	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	280	Offset = OffsetImm->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	281	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				282	"operands of type register.");
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	283	return true;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	284	}
				285
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	286	// The 2 offset instructions use offset0 and offset1 instead. We can treat
				287	// these as a load with a single offset if the 2 offsets are consecutive. We
				288	// will use this for some partially aligned loads.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	289	const MachineOperand *Offset0Imm =
				290	getNamedOperand(LdSt, AMDGPU::OpName::offset0);
				291	const MachineOperand *Offset1Imm =
				292	getNamedOperand(LdSt, AMDGPU::OpName::offset1);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	293
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	294	uint8_t Offset0 = Offset0Imm->getImm();
				295	uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	296
Matt Arsenault	84db5d9	2015-07-14 17:57:36 +0000	[diff] [blame]	297	if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	298	// Each of these offsets is in element sized units, so we need to convert
				299	// to bytes of the individual reads.
				300
				301	unsigned EltSize;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	302	if (LdSt.mayLoad())
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	303	EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	304	else {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	305	assert(LdSt.mayStore());
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	306	int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	307	EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	308	}
				309
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	310	if (isStride64(Opc))
				311	EltSize *= 64;
				312
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	313	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	314	Offset = EltSize * Offset0;
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	315	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				316	"operands of type register.");
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	317	return true;
				318	}
				319
				320	return false;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	321	}
				322
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	323	if (isMUBUF(LdSt) \|\| isMTBUF(LdSt)) {
Matt Arsenault	3666629	2016-11-15 20:14:27 +0000	[diff] [blame]	324	const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
				325	if (SOffset && SOffset->isReg())
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	326	return false;
				327
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	328	MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	329	if (!AddrReg)
				330	return false;
				331
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	332	const MachineOperand *OffsetImm =
				333	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	334	BaseOp = AddrReg;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	335	Offset = OffsetImm->getImm();
Matt Arsenault	3666629	2016-11-15 20:14:27 +0000	[diff] [blame]	336
				337	if (SOffset) // soffset can be an inline immediate.
				338	Offset += SOffset->getImm();
				339
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	340	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				341	"operands of type register.");
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	342	return true;
				343	}
				344
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	345	if (isSMRD(LdSt)) {
				346	const MachineOperand *OffsetImm =
				347	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	348	if (!OffsetImm)
				349	return false;
				350
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	351	MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
				352	BaseOp = SBaseReg;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	353	Offset = OffsetImm->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	354	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				355	"operands of type register.");
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	356	return true;
				357	}
				358
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	359	if (isFLAT(LdSt)) {
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	360	MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	361	if (VAddr) {
				362	// Can't analyze 2 offsets.
				363	if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
				364	return false;
				365
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	366	BaseOp = VAddr;
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	367	} else {
				368	// scratch instructions have either vaddr or saddr.
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	369	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	370	}
				371
				372	Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	373	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				374	"operands of type register.");
Matt Arsenault	43578ec	2016-06-02 20:05:20 +0000	[diff] [blame]	375	return true;
				376	}
				377
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	378	return false;
				379	}
				380
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	381	static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
				382	const MachineOperand &BaseOp1,
				383	const MachineInstr &MI2,
				384	const MachineOperand &BaseOp2) {
				385	// Support only base operands with base registers.
				386	// Note: this could be extended to support FI operands.
				387	if (!BaseOp1.isReg() \|\| !BaseOp2.isReg())
				388	return false;
				389
				390	if (BaseOp1.isIdenticalTo(BaseOp2))
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	391	return true;
				392
				393	if (!MI1.hasOneMemOperand() \|\| !MI2.hasOneMemOperand())
				394	return false;
				395
				396	auto MO1 = *MI1.memoperands_begin();
				397	auto MO2 = *MI2.memoperands_begin();
				398	if (MO1->getAddrSpace() != MO2->getAddrSpace())
				399	return false;
				400
				401	auto Base1 = MO1->getValue();
				402	auto Base2 = MO2->getValue();
				403	if (!Base1 \|\| !Base2)
				404	return false;
				405	const MachineFunction &MF = *MI1.getParent()->getParent();
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	406	const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	407	Base1 = GetUnderlyingObject(Base1, DL);
				408	Base2 = GetUnderlyingObject(Base1, DL);
				409
				410	if (isa<UndefValue>(Base1) \|\| isa<UndefValue>(Base2))
				411	return false;
				412
				413	return Base1 == Base2;
				414	}
				415
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	416	bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
				417	MachineOperand &BaseOp2,
Jun Bum Lim	4c5bd58	2016-04-15 14:58:38 +0000	[diff] [blame]	418	unsigned NumLoads) const {
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	419	MachineInstr &FirstLdSt = *BaseOp1.getParent();
				420	MachineInstr &SecondLdSt = *BaseOp2.getParent();
				421
				422	if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	423	return false;
				424
NAKAMURA Takumi	fe1202c	2016-06-20 00:37:41 +0000	[diff] [blame]	425	const MachineOperand *FirstDst = nullptr;
				426	const MachineOperand *SecondDst = nullptr;
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	427
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	428	if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) \|\|
Matt Arsenault	74f6483	2017-02-01 20:22:51 +0000	[diff] [blame]	429	(isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) \|\|
				430	(isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	431	const unsigned MaxGlobalLoadCluster = 6;
				432	if (NumLoads > MaxGlobalLoadCluster)
				433	return false;
				434
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	435	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin	949fac9	2017-09-06 15:31:30 +0000	[diff] [blame]	436	if (!FirstDst)
				437	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	438	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin	949fac9	2017-09-06 15:31:30 +0000	[diff] [blame]	439	if (!SecondDst)
				440	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Matt Arsenault	437fd71	2016-11-29 19:30:41 +0000	[diff] [blame]	441	} else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
				442	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
				443	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
				444	} else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
				445	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
				446	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	447	}
				448
				449	if (!FirstDst \|\| !SecondDst)
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	450	return false;
				451
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	452	// Try to limit clustering based on the total number of bytes loaded
				453	// rather than the number of instructions. This is done to help reduce
				454	// register pressure. The method used is somewhat inexact, though,
				455	// because it assumes that all loads in the cluster will load the
				456	// same number of bytes as FirstLdSt.
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	457
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	458	// The unit of this value is bytes.
				459	// FIXME: This needs finer tuning.
				460	unsigned LoadClusterThreshold = 16;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	461
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	462	const MachineRegisterInfo &MRI =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	463	FirstLdSt.getParent()->getParent()->getRegInfo();
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	464	const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
				465
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	466	return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	467	}
				468
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	469	// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
				470	// the first 16 loads will be interleaved with the stores, and the next 16 will
				471	// be clustered as expected. It should really split into 2 16 store batches.
				472	//
				473	// Loads are clustered until this returns false, rather than trying to schedule
				474	// groups of stores. This also means we have to deal with saying different
				475	// address space loads should be clustered, and ones which might cause bank
				476	// conflicts.
				477	//
				478	// This might be deprecated so it might not be worth that much effort to fix.
				479	bool SIInstrInfo::shouldScheduleLoadsNear(SDNode Load0, SDNode Load1,
				480	int64_t Offset0, int64_t Offset1,
				481	unsigned NumLoads) const {
				482	assert(Offset1 > Offset0 &&
				483	"Second offset should be larger than first offset!");
				484	// If we have less than 16 loads in a row, and the offsets are within 64
				485	// bytes, then schedule together.
				486
				487	// A cacheline is 64 bytes (for global memory).
				488	return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
				489	}
				490
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	491	static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
				492	MachineBasicBlock::iterator MI,
				493	const DebugLoc &DL, unsigned DestReg,
				494	unsigned SrcReg, bool KillSrc) {
				495	MachineFunction *MF = MBB.getParent();
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	496	DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	497	"illegal SGPR to VGPR copy",
				498	DL, DS_Error);
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	499	LLVMContext &C = MF->getFunction().getContext();
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	500	C.diagnose(IllegalCopy);
				501
				502	BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
				503	.addReg(SrcReg, getKillRegState(KillSrc));
				504	}
				505
Benjamin Kramer	bdc4956	2016-06-12 15:39:02 +0000	[diff] [blame]	506	void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
				507	MachineBasicBlock::iterator MI,
				508	const DebugLoc &DL, unsigned DestReg,
				509	unsigned SrcReg, bool KillSrc) const {
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	510	const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	511
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	512	if (RC == &AMDGPU::VGPR_32RegClass) {
				513	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) \|\|
				514	AMDGPU::SReg_32RegClass.contains(SrcReg));
				515	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				516	.addReg(SrcReg, getKillRegState(KillSrc));
				517	return;
				518	}
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	519
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	520	if (RC == &AMDGPU::SReg_32_XM0RegClass \|\|
				521	RC == &AMDGPU::SReg_32RegClass) {
Nicolai Haehnle	e58e0e3	2016-09-12 16:25:20 +0000	[diff] [blame]	522	if (SrcReg == AMDGPU::SCC) {
				523	BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
				524	.addImm(-1)
				525	.addImm(0);
				526	return;
				527	}
				528
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	529	if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
				530	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				531	return;
				532	}
				533
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	534	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				535	.addReg(SrcReg, getKillRegState(KillSrc));
				536	return;
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	537	}
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	538
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	539	if (RC == &AMDGPU::SReg_64RegClass) {
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	540	if (DestReg == AMDGPU::VCC) {
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	541	if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				542	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
				543	.addReg(SrcReg, getKillRegState(KillSrc));
				544	} else {
				545	// FIXME: Hack until VReg_1 removed.
				546	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault	5d8eb25	2016-09-30 01:50:20 +0000	[diff] [blame]	547	BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	548	.addImm(0)
				549	.addReg(SrcReg, getKillRegState(KillSrc));
				550	}
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	551
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	552	return;
				553	}
				554
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	555	if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				556	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				557	return;
				558	}
				559
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	560	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				561	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	562	return;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	563	}
				564
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	565	if (DestReg == AMDGPU::SCC) {
				566	assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
				567	BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
				568	.addReg(SrcReg, getKillRegState(KillSrc))
				569	.addImm(0);
				570	return;
				571	}
				572
				573	unsigned EltSize = 4;
				574	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
				575	if (RI.isSGPRClass(RC)) {
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	576	if (RI.getRegSizeInBits(*RC) > 32) {
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	577	Opcode = AMDGPU::S_MOV_B64;
				578	EltSize = 8;
				579	} else {
				580	Opcode = AMDGPU::S_MOV_B32;
				581	EltSize = 4;
				582	}
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	583
				584	if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
				585	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				586	return;
				587	}
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	588	}
				589
				590	ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
Matt Arsenault	73d2f89	2016-07-15 22:32:02 +0000	[diff] [blame]	591	bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	592
				593	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				594	unsigned SubIdx;
				595	if (Forward)
				596	SubIdx = SubIndices[Idx];
				597	else
				598	SubIdx = SubIndices[SubIndices.size() - Idx - 1];
				599
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	600	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				601	get(Opcode), RI.getSubReg(DestReg, SubIdx));
				602
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	603	Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	604
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	605	if (Idx == 0)
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	606	Builder.addReg(DestReg, RegState::Define \| RegState::Implicit);
Matt Arsenault	73d2f89	2016-07-15 22:32:02 +0000	[diff] [blame]	607
Matt Arsenault	05c2647	2017-06-12 17:19:20 +0000	[diff] [blame]	608	bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
				609	Builder.addReg(SrcReg, getKillRegState(UseKill) \| RegState::Implicit);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	610	}
				611	}
				612
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	613	int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	614	int NewOpc;
				615
				616	// Try to map original to commuted opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	617	NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	618	if (NewOpc != -1)
				619	// Check if the commuted (REV) opcode exists on the target.
				620	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	621
				622	// Try to map commuted to original opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	623	NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	624	if (NewOpc != -1)
				625	// Check if the original (non-REV) opcode exists on the target.
				626	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	627
				628	return Opcode;
				629	}
				630
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	631	void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
				632	MachineBasicBlock::iterator MI,
				633	const DebugLoc &DL, unsigned DestReg,
				634	int64_t Value) const {
				635	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				636	const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
				637	if (RegClass == &AMDGPU::SReg_32RegClass \|\|
				638	RegClass == &AMDGPU::SGPR_32RegClass \|\|
				639	RegClass == &AMDGPU::SReg_32_XM0RegClass \|\|
				640	RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
				641	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				642	.addImm(Value);
				643	return;
				644	}
				645
				646	if (RegClass == &AMDGPU::SReg_64RegClass \|\|
				647	RegClass == &AMDGPU::SGPR_64RegClass \|\|
				648	RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
				649	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				650	.addImm(Value);
				651	return;
				652	}
				653
				654	if (RegClass == &AMDGPU::VGPR_32RegClass) {
				655	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				656	.addImm(Value);
				657	return;
				658	}
				659	if (RegClass == &AMDGPU::VReg_64RegClass) {
				660	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
				661	.addImm(Value);
				662	return;
				663	}
				664
				665	unsigned EltSize = 4;
				666	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
				667	if (RI.isSGPRClass(RegClass)) {
				668	if (RI.getRegSizeInBits(*RegClass) > 32) {
				669	Opcode = AMDGPU::S_MOV_B64;
				670	EltSize = 8;
				671	} else {
				672	Opcode = AMDGPU::S_MOV_B32;
				673	EltSize = 4;
				674	}
				675	}
				676
				677	ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
				678	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				679	int64_t IdxValue = Idx == 0 ? Value : 0;
				680
				681	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				682	get(Opcode), RI.getSubReg(DestReg, Idx));
				683	Builder.addImm(IdxValue);
				684	}
				685	}
				686
				687	const TargetRegisterClass *
				688	SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
				689	return &AMDGPU::VGPR_32RegClass;
				690	}
				691
				692	void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
				693	MachineBasicBlock::iterator I,
				694	const DebugLoc &DL, unsigned DstReg,
				695	ArrayRef<MachineOperand> Cond,
				696	unsigned TrueReg,
				697	unsigned FalseReg) const {
				698	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
NAKAMURA Takumi	994a43d	2017-05-16 04:01:23 +0000	[diff] [blame]	699	assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
				700	"Not a VGPR32 reg");
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	701
				702	if (Cond.size() == 1) {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	703	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				704	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				705	.add(Cond[0]);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	706	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
				707	.addReg(FalseReg)
				708	.addReg(TrueReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	709	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	710	} else if (Cond.size() == 2) {
				711	assert(Cond[0].isImm() && "Cond[0] is not an immediate");
				712	switch (Cond[0].getImm()) {
				713	case SIInstrInfo::SCC_TRUE: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	714	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	715	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				716	.addImm(-1)
				717	.addImm(0);
				718	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
				719	.addReg(FalseReg)
				720	.addReg(TrueReg)
				721	.addReg(SReg);
				722	break;
				723	}
				724	case SIInstrInfo::SCC_FALSE: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	725	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	726	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				727	.addImm(0)
				728	.addImm(-1);
				729	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
				730	.addReg(FalseReg)
				731	.addReg(TrueReg)
				732	.addReg(SReg);
				733	break;
				734	}
				735	case SIInstrInfo::VCCNZ: {
				736	MachineOperand RegOp = Cond[1];
				737	RegOp.setImplicit(false);
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	738	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				739	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				740	.add(RegOp);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	741	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
				742	.addReg(FalseReg)
				743	.addReg(TrueReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	744	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	745	break;
				746	}
				747	case SIInstrInfo::VCCZ: {
				748	MachineOperand RegOp = Cond[1];
				749	RegOp.setImplicit(false);
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	750	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				751	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				752	.add(RegOp);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	753	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
				754	.addReg(TrueReg)
				755	.addReg(FalseReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	756	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	757	break;
				758	}
				759	case SIInstrInfo::EXECNZ: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	760	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	761	unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				762	BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
				763	.addImm(0);
				764	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				765	.addImm(-1)
				766	.addImm(0);
				767	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
				768	.addReg(FalseReg)
				769	.addReg(TrueReg)
				770	.addReg(SReg);
				771	break;
				772	}
				773	case SIInstrInfo::EXECZ: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	774	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	775	unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				776	BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
				777	.addImm(0);
				778	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				779	.addImm(0)
				780	.addImm(-1);
				781	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
				782	.addReg(FalseReg)
				783	.addReg(TrueReg)
				784	.addReg(SReg);
				785	llvm_unreachable("Unhandled branch predicate EXECZ");
				786	break;
				787	}
				788	default:
				789	llvm_unreachable("invalid branch predicate");
				790	}
				791	} else {
				792	llvm_unreachable("Can only handle Cond size 1 or 2");
				793	}
				794	}
				795
				796	unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
				797	MachineBasicBlock::iterator I,
				798	const DebugLoc &DL,
				799	unsigned SrcReg, int Value) const {
				800	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				801	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				802	BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
				803	.addImm(Value)
				804	.addReg(SrcReg);
				805
				806	return Reg;
				807	}
				808
				809	unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
				810	MachineBasicBlock::iterator I,
				811	const DebugLoc &DL,
				812	unsigned SrcReg, int Value) const {
				813	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				814	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				815	BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
				816	.addImm(Value)
				817	.addReg(SrcReg);
				818
				819	return Reg;
				820	}
				821
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	822	unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
				823
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	824	if (RI.getRegSizeInBits(*DstRC) == 32) {
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	825	return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	826	} else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	827	return AMDGPU::S_MOV_B64;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	828	} else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	829	return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	830	}
				831	return AMDGPU::COPY;
				832	}
				833
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	834	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
				835	switch (Size) {
				836	case 4:
				837	return AMDGPU::SI_SPILL_S32_SAVE;
				838	case 8:
				839	return AMDGPU::SI_SPILL_S64_SAVE;
				840	case 16:
				841	return AMDGPU::SI_SPILL_S128_SAVE;
				842	case 32:
				843	return AMDGPU::SI_SPILL_S256_SAVE;
				844	case 64:
				845	return AMDGPU::SI_SPILL_S512_SAVE;
				846	default:
				847	llvm_unreachable("unknown register size");
				848	}
				849	}
				850
				851	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
				852	switch (Size) {
				853	case 4:
				854	return AMDGPU::SI_SPILL_V32_SAVE;
				855	case 8:
				856	return AMDGPU::SI_SPILL_V64_SAVE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	857	case 12:
				858	return AMDGPU::SI_SPILL_V96_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	859	case 16:
				860	return AMDGPU::SI_SPILL_V128_SAVE;
				861	case 32:
				862	return AMDGPU::SI_SPILL_V256_SAVE;
				863	case 64:
				864	return AMDGPU::SI_SPILL_V512_SAVE;
				865	default:
				866	llvm_unreachable("unknown register size");
				867	}
				868	}
				869
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	870	void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
				871	MachineBasicBlock::iterator MI,
				872	unsigned SrcReg, bool isKill,
				873	int FrameIndex,
				874	const TargetRegisterClass *RC,
				875	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	876	MachineFunction *MF = MBB.getParent();
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	877	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	878	MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	879	const DebugLoc &DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	880
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	881	unsigned Size = FrameInfo.getObjectSize(FrameIndex);
				882	unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	883	MachinePointerInfo PtrInfo
				884	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				885	MachineMemOperand *MMO
				886	= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
				887	Size, Align);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	888	unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	889
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	890	if (RI.isSGPRClass(RC)) {
Matt Arsenault	5b22dfa	2015-11-05 05:27:10 +0000	[diff] [blame]	891	MFI->setHasSpilledSGPRs();
				892
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	893	// We are only allowed to create one new instruction when spilling
				894	// registers, so we need to use pseudo instruction for spilling SGPRs.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	895	const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	896
				897	// The SGPR spill/restore instructions only work on number sgprs, so we need
				898	// to make sure we are using the correct register class.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	899	if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
Matt Arsenault	b6e1cc2	2016-05-21 00:53:42 +0000	[diff] [blame]	900	MachineRegisterInfo &MRI = MF->getRegInfo();
				901	MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
				902	}
				903
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	904	MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	905	.addReg(SrcReg, getKillRegState(isKill)) // data
				906	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	907	.addMemOperand(MMO)
				908	.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	909	.addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	910	// Add the scratch resource registers as implicit uses because we may end up
				911	// needing them, and need to ensure that the reserved registers are
				912	// correctly handled.
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	913
Matt Arsenault	adc59d7	2018-04-23 15:51:26 +0000	[diff] [blame]	914	FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	915	if (ST.hasScalarStores()) {
				916	// m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle	43cc6c4	2017-06-27 08:04:13 +0000	[diff] [blame]	917	Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine \| RegState::Dead);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	918	}
				919
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	920	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	921	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	922
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	923	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				924
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	925	unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	926	MFI->setHasSpilledVGPRs();
				927	BuildMI(MBB, MI, DL, get(Opcode))
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	928	.addReg(SrcReg, getKillRegState(isKill)) // data
				929	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	930	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	931	.addReg(MFI->getFrameOffsetReg()) // scratch_offset
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	932	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	933	.addMemOperand(MMO);
				934	}
				935
				936	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
				937	switch (Size) {
				938	case 4:
				939	return AMDGPU::SI_SPILL_S32_RESTORE;
				940	case 8:
				941	return AMDGPU::SI_SPILL_S64_RESTORE;
				942	case 16:
				943	return AMDGPU::SI_SPILL_S128_RESTORE;
				944	case 32:
				945	return AMDGPU::SI_SPILL_S256_RESTORE;
				946	case 64:
				947	return AMDGPU::SI_SPILL_S512_RESTORE;
				948	default:
				949	llvm_unreachable("unknown register size");
				950	}
				951	}
				952
				953	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
				954	switch (Size) {
				955	case 4:
				956	return AMDGPU::SI_SPILL_V32_RESTORE;
				957	case 8:
				958	return AMDGPU::SI_SPILL_V64_RESTORE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	959	case 12:
				960	return AMDGPU::SI_SPILL_V96_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	961	case 16:
				962	return AMDGPU::SI_SPILL_V128_RESTORE;
				963	case 32:
				964	return AMDGPU::SI_SPILL_V256_RESTORE;
				965	case 64:
				966	return AMDGPU::SI_SPILL_V512_RESTORE;
				967	default:
				968	llvm_unreachable("unknown register size");
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	969	}
				970	}
				971
				972	void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
				973	MachineBasicBlock::iterator MI,
				974	unsigned DestReg, int FrameIndex,
				975	const TargetRegisterClass *RC,
				976	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	977	MachineFunction *MF = MBB.getParent();
Matt Arsenault	88ce3dc	2018-11-26 21:28:40 +0000	[diff] [blame]	978	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	979	MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	980	const DebugLoc &DL = MBB.findDebugLoc(MI);
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	981	unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
				982	unsigned Size = FrameInfo.getObjectSize(FrameIndex);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	983	unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	984
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	985	MachinePointerInfo PtrInfo
				986	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				987
				988	MachineMemOperand *MMO = MF->getMachineMemOperand(
				989	PtrInfo, MachineMemOperand::MOLoad, Size, Align);
				990
				991	if (RI.isSGPRClass(RC)) {
Matt Arsenault	88ce3dc	2018-11-26 21:28:40 +0000	[diff] [blame]	992	MFI->setHasSpilledSGPRs();
				993
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	994	// FIXME: Maybe this should not include a memoperand because it will be
				995	// lowered to non-memory instructions.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	996	const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
				997	if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
Matt Arsenault	b6e1cc2	2016-05-21 00:53:42 +0000	[diff] [blame]	998	MachineRegisterInfo &MRI = MF->getRegInfo();
				999	MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
				1000	}
				1001
Matt Arsenault	adc59d7	2018-04-23 15:51:26 +0000	[diff] [blame]	1002	FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1003	MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	1004	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	1005	.addMemOperand(MMO)
				1006	.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	1007	.addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1008
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1009	if (ST.hasScalarStores()) {
				1010	// m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle	43cc6c4	2017-06-27 08:04:13 +0000	[diff] [blame]	1011	Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine \| RegState::Dead);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1012	}
				1013
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1014	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1015	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1016
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1017	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				1018
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1019	unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1020	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	1021	.addFrameIndex(FrameIndex) // vaddr
				1022	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				1023	.addReg(MFI->getFrameOffsetReg()) // scratch_offset
				1024	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1025	.addMemOperand(MMO);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	1026	}
				1027
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1028	/// \param @Offset Offset in bytes of the FrameIndex being spilled
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1029	unsigned SIInstrInfo::calculateLDSSpillAddress(
				1030	MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
				1031	unsigned FrameOffset, unsigned Size) const {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1032	MachineFunction *MF = MBB.getParent();
				1033	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	1034	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1035	const DebugLoc &DL = MBB.findDebugLoc(MI);
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	1036	unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1037	unsigned WavefrontSize = ST.getWavefrontSize();
				1038
				1039	unsigned TIDReg = MFI->getTIDReg();
				1040	if (!MFI->hasCalculatedTID()) {
				1041	MachineBasicBlock &Entry = MBB.getParent()->front();
				1042	MachineBasicBlock::iterator Insert = Entry.front();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1043	const DebugLoc &DL = Insert->getDebugLoc();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1044
Tom Stellard	19f4301	2016-07-28 14:30:43 +0000	[diff] [blame]	1045	TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
				1046	*MF);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1047	if (TIDReg == AMDGPU::NoRegister)
				1048	return TIDReg;
				1049
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	1050	if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) &&
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1051	WorkGroupSize > WavefrontSize) {
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1052	unsigned TIDIGXReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1053	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1054	unsigned TIDIGYReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1055	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1056	unsigned TIDIGZReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1057	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1058	unsigned InputPtrReg =
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1059	MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer	7149aab	2015-03-01 18:09:56 +0000	[diff] [blame]	1060	for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1061	if (!Entry.isLiveIn(Reg))
				1062	Entry.addLiveIn(Reg);
				1063	}
				1064
Matthias Braun	7dc03f0	2016-04-06 02:47:09 +0000	[diff] [blame]	1065	RS->enterBasicBlock(Entry);
Matt Arsenault	0c90e95	2015-11-06 18:17:45 +0000	[diff] [blame]	1066	// FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1067	unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				1068	unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				1069	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
				1070	.addReg(InputPtrReg)
				1071	.addImm(SI::KernelInputOffsets::NGROUPS_Z);
				1072	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
				1073	.addReg(InputPtrReg)
				1074	.addImm(SI::KernelInputOffsets::NGROUPS_Y);
				1075
				1076	// NGROUPS.X * NGROUPS.Y
				1077	BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
				1078	.addReg(STmp1)
				1079	.addReg(STmp0);
				1080	// (NGROUPS.X * NGROUPS.Y) * TIDIG.X
				1081	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
				1082	.addReg(STmp1)
				1083	.addReg(TIDIGXReg);
				1084	// NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
				1085	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
				1086	.addReg(STmp0)
				1087	.addReg(TIDIGYReg)
				1088	.addReg(TIDReg);
				1089	// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	1090	getAddNoCarry(Entry, Insert, DL, TIDReg)
				1091	.addReg(TIDReg)
				1092	.addReg(TIDIGZReg);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1093	} else {
				1094	// Get the wave id
				1095	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
				1096	TIDReg)
				1097	.addImm(-1)
				1098	.addImm(0);
				1099
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	1100	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1101	TIDReg)
				1102	.addImm(-1)
				1103	.addReg(TIDReg);
				1104	}
				1105
				1106	BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
				1107	TIDReg)
				1108	.addImm(2)
				1109	.addReg(TIDReg);
				1110	MFI->setTIDReg(TIDReg);
				1111	}
				1112
				1113	// Add FrameIndex to LDS offset
Matt Arsenault	52ef401	2016-07-26 16:45:58 +0000	[diff] [blame]	1114	unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	1115	getAddNoCarry(MBB, MI, DL, TmpReg)
				1116	.addImm(LDSOffset)
				1117	.addReg(TIDReg);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1118
				1119	return TmpReg;
				1120	}
				1121
Tom Stellard	d37630e	2016-04-07 14:47:07 +0000	[diff] [blame]	1122	void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
				1123	MachineBasicBlock::iterator MI,
Nicolai Haehnle	87323da	2015-12-17 16:46:42 +0000	[diff] [blame]	1124	int Count) const {
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	1125	DebugLoc DL = MBB.findDebugLoc(MI);
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1126	while (Count > 0) {
				1127	int Arg;
				1128	if (Count >= 8)
				1129	Arg = 7;
				1130	else
				1131	Arg = Count - 1;
				1132	Count -= 8;
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	1133	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1134	.addImm(Arg);
				1135	}
				1136	}
				1137
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	1138	void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
				1139	MachineBasicBlock::iterator MI) const {
				1140	insertWaitStates(MBB, MI, 1);
				1141	}
				1142
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1143	void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
				1144	auto MF = MBB.getParent();
				1145	SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
				1146
				1147	assert(Info->isEntryFunction());
				1148
				1149	if (MBB.succ_empty()) {
				1150	bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
				1151	if (HasNoTerminator)
				1152	BuildMI(MBB, MBB.end(), DebugLoc(),
				1153	get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
				1154	}
				1155	}
				1156
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	1157	unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
				1158	switch (MI.getOpcode()) {
				1159	default: return 1; // FIXME: Do wait states equal cycles?
				1160
				1161	case AMDGPU::S_NOP:
				1162	return MI.getOperand(0).getImm() + 1;
				1163	}
				1164	}
				1165
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1166	bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
				1167	MachineBasicBlock &MBB = *MI.getParent();
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1168	DebugLoc DL = MBB.findDebugLoc(MI);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1169	switch (MI.getOpcode()) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	1170	default: return TargetInstrInfo::expandPostRAPseudo(MI);
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1171	case AMDGPU::S_MOV_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1172	// This is only a terminator to get the correct spill code placement during
				1173	// register allocation.
				1174	MI.setDesc(get(AMDGPU::S_MOV_B64));
				1175	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1176
				1177	case AMDGPU::S_XOR_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1178	// This is only a terminator to get the correct spill code placement during
				1179	// register allocation.
				1180	MI.setDesc(get(AMDGPU::S_XOR_B64));
				1181	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1182
				1183	case AMDGPU::S_ANDN2_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1184	// This is only a terminator to get the correct spill code placement during
				1185	// register allocation.
				1186	MI.setDesc(get(AMDGPU::S_ANDN2_B64));
				1187	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1188
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1189	case AMDGPU::V_MOV_B64_PSEUDO: {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1190	unsigned Dst = MI.getOperand(0).getReg();
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1191	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				1192	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				1193
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1194	const MachineOperand &SrcOp = MI.getOperand(1);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1195	// FIXME: Will this work for 64-bit floating point immediates?
				1196	assert(!SrcOp.isFPImm());
				1197	if (SrcOp.isImm()) {
				1198	APInt Imm(64, SrcOp.getImm());
				1199	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1200	.addImm(Imm.getLoBits(32).getZExtValue())
				1201	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1202	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1203	.addImm(Imm.getHiBits(32).getZExtValue())
				1204	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1205	} else {
				1206	assert(SrcOp.isReg());
				1207	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1208	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
				1209	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1210	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1211	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
				1212	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1213	}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1214	MI.eraseFromParent();
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1215	break;
				1216	}
Connor Abbott	66b9bd6	2017-08-04 18:36:54 +0000	[diff] [blame]	1217	case AMDGPU::V_SET_INACTIVE_B32: {
				1218	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1219	.addReg(AMDGPU::EXEC);
				1220	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
				1221	.add(MI.getOperand(2));
				1222	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1223	.addReg(AMDGPU::EXEC);
				1224	MI.eraseFromParent();
				1225	break;
				1226	}
				1227	case AMDGPU::V_SET_INACTIVE_B64: {
				1228	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1229	.addReg(AMDGPU::EXEC);
				1230	MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
				1231	MI.getOperand(0).getReg())
				1232	.add(MI.getOperand(2));
				1233	expandPostRAPseudo(*Copy);
				1234	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1235	.addReg(AMDGPU::EXEC);
				1236	MI.eraseFromParent();
				1237	break;
				1238	}
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1239	case AMDGPU::V_MOVRELD_B32_V1:
				1240	case AMDGPU::V_MOVRELD_B32_V2:
				1241	case AMDGPU::V_MOVRELD_B32_V4:
				1242	case AMDGPU::V_MOVRELD_B32_V8:
				1243	case AMDGPU::V_MOVRELD_B32_V16: {
				1244	const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
				1245	unsigned VecReg = MI.getOperand(0).getReg();
				1246	bool IsUndef = MI.getOperand(1).isUndef();
				1247	unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
				1248	assert(VecReg == MI.getOperand(1).getReg());
				1249
				1250	MachineInstr *MovRel =
				1251	BuildMI(MBB, MI, DL, MovRelDesc)
				1252	.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1253	.add(MI.getOperand(2))
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1254	.addReg(VecReg, RegState::ImplicitDefine)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1255	.addReg(VecReg,
				1256	RegState::Implicit \| (IsUndef ? RegState::Undef : 0));
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1257
				1258	const int ImpDefIdx =
				1259	MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
				1260	const int ImpUseIdx = ImpDefIdx + 1;
				1261	MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
				1262
				1263	MI.eraseFromParent();
				1264	break;
				1265	}
Tom Stellard	bf3e6e5	2016-06-14 20:29:59 +0000	[diff] [blame]	1266	case AMDGPU::SI_PC_ADD_REL_OFFSET: {
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1267	MachineFunction &MF = *MBB.getParent();
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1268	unsigned Reg = MI.getOperand(0).getReg();
Matt Arsenault	11587d9	2016-08-10 19:11:45 +0000	[diff] [blame]	1269	unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
				1270	unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1271
				1272	// Create a bundle so these instructions won't be re-ordered by the
				1273	// post-RA scheduler.
				1274	MIBundleBuilder Bundler(MBB, MI);
				1275	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
				1276
				1277	// Add 32-bit offset from this instruction to the start of the
				1278	// constant data.
				1279	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1280	.addReg(RegLo)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1281	.add(MI.getOperand(1)));
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1282
Konstantin Zhuravlyov	c96b5d7	2016-10-14 04:37:34 +0000	[diff] [blame]	1283	MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
				1284	.addReg(RegHi);
				1285	if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
				1286	MIB.addImm(0);
				1287	else
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1288	MIB.add(MI.getOperand(2));
Konstantin Zhuravlyov	c96b5d7	2016-10-14 04:37:34 +0000	[diff] [blame]	1289
				1290	Bundler.append(MIB);
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1291	finalizeBundle(MBB, Bundler.begin());
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1292
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1293	MI.eraseFromParent();
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1294	break;
				1295	}
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	1296	case AMDGPU::EXIT_WWM: {
				1297	// This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
				1298	// is exited.
				1299	MI.setDesc(get(AMDGPU::S_MOV_B64));
				1300	break;
				1301	}
Stanislav Mekhanoshin	739174c	2018-05-31 20:13:51 +0000	[diff] [blame]	1302	case TargetOpcode::BUNDLE: {
				1303	if (!MI.mayLoad())
				1304	return false;
				1305
				1306	// If it is a load it must be a memory clause
				1307	for (MachineBasicBlock::instr_iterator I = MI.getIterator();
				1308	I->isBundledWithSucc(); ++I) {
				1309	I->unbundleFromSucc();
				1310	for (MachineOperand &MO : I->operands())
				1311	if (MO.isReg())
				1312	MO.setIsInternalRead(false);
				1313	}
				1314
				1315	MI.eraseFromParent();
				1316	break;
				1317	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1318	}
				1319	return true;
				1320	}
				1321
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1322	bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
				1323	MachineOperand &Src0,
				1324	unsigned Src0OpName,
				1325	MachineOperand &Src1,
				1326	unsigned Src1OpName) const {
				1327	MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
				1328	if (!Src0Mods)
				1329	return false;
				1330
				1331	MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
				1332	assert(Src1Mods &&
				1333	"All commutable instructions have both src0 and src1 modifiers");
				1334
				1335	int Src0ModsVal = Src0Mods->getImm();
				1336	int Src1ModsVal = Src1Mods->getImm();
				1337
				1338	Src1Mods->setImm(Src0ModsVal);
				1339	Src0Mods->setImm(Src1ModsVal);
				1340	return true;
				1341	}
				1342
				1343	static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
				1344	MachineOperand &RegOp,
Matt Arsenault	25dba30	2016-09-13 19:03:12 +0000	[diff] [blame]	1345	MachineOperand &NonRegOp) {
				1346	unsigned Reg = RegOp.getReg();
				1347	unsigned SubReg = RegOp.getSubReg();
				1348	bool IsKill = RegOp.isKill();
				1349	bool IsDead = RegOp.isDead();
				1350	bool IsUndef = RegOp.isUndef();
				1351	bool IsDebug = RegOp.isDebug();
				1352
				1353	if (NonRegOp.isImm())
				1354	RegOp.ChangeToImmediate(NonRegOp.getImm());
				1355	else if (NonRegOp.isFI())
				1356	RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
				1357	else
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1358	return nullptr;
				1359
Matt Arsenault	25dba30	2016-09-13 19:03:12 +0000	[diff] [blame]	1360	NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
				1361	NonRegOp.setSubReg(SubReg);
				1362
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1363	return &MI;
				1364	}
				1365
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1366	MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1367	unsigned Src0Idx,
				1368	unsigned Src1Idx) const {
				1369	assert(!NewMI && "this should never be used");
				1370
				1371	unsigned Opc = MI.getOpcode();
				1372	int CommutedOpcode = commuteOpcode(Opc);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	1373	if (CommutedOpcode == -1)
				1374	return nullptr;
				1375
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1376	assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
				1377	static_cast<int>(Src0Idx) &&
				1378	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
				1379	static_cast<int>(Src1Idx) &&
				1380	"inconsistency with findCommutedOpIndices");
				1381
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1382	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1383	MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	1384
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1385	MachineInstr *CommutedMI = nullptr;
				1386	if (Src0.isReg() && Src1.isReg()) {
				1387	if (isOperandLegal(MI, Src1Idx, &Src0)) {
				1388	// Be sure to copy the source modifiers to the right place.
				1389	CommutedMI
				1390	= TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
Matt Arsenault	d282ada	2014-10-17 18:00:48 +0000	[diff] [blame]	1391	}
				1392
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1393	} else if (Src0.isReg() && !Src1.isReg()) {
				1394	// src0 should always be able to support any operand type, so no need to
				1395	// check operand legality.
				1396	CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
				1397	} else if (!Src0.isReg() && Src1.isReg()) {
				1398	if (isOperandLegal(MI, Src1Idx, &Src0))
				1399	CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1400	} else {
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1401	// FIXME: Found two non registers to commute. This does happen.
				1402	return nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1403	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1404
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1405	if (CommutedMI) {
				1406	swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
				1407	Src1, AMDGPU::OpName::src1_modifiers);
				1408
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1409	CommutedMI->setDesc(get(CommutedOpcode));
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1410	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1411
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1412	return CommutedMI;
Christian Konig	76edd4f	2013-02-26 17:52:29 +0000	[diff] [blame]	1413	}
				1414
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1415	// This needs to be implemented because the source modifiers may be inserted
				1416	// between the true commutable operands, and the base
				1417	// TargetInstrInfo::commuteInstruction uses it.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1418	bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1419	unsigned &SrcOpIdx1) const {
Alexander Timofeev	db7ee76	2018-09-11 11:56:50 +0000	[diff] [blame]	1420	return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
				1421	}
				1422
				1423	bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
				1424	unsigned &SrcOpIdx1) const {
				1425	if (!Desc.isCommutable())
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1426	return false;
				1427
Alexander Timofeev	db7ee76	2018-09-11 11:56:50 +0000	[diff] [blame]	1428	unsigned Opc = Desc.getOpcode();
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1429	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1430	if (Src0Idx == -1)
				1431	return false;
				1432
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1433	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1434	if (Src1Idx == -1)
				1435	return false;
				1436
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1437	return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1438	}
				1439
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1440	bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
				1441	int64_t BrOffset) const {
				1442	// BranchRelaxation should never have to check s_setpc_b64 because its dest
				1443	// block is unanalyzable.
				1444	assert(BranchOp != AMDGPU::S_SETPC_B64);
				1445
				1446	// Convert to dwords.
				1447	BrOffset /= 4;
				1448
				1449	// The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
				1450	// from the next instruction.
				1451	BrOffset -= 1;
				1452
				1453	return isIntN(BranchOffsetBits, BrOffset);
				1454	}
				1455
				1456	MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
				1457	const MachineInstr &MI) const {
				1458	if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
				1459	// This would be a difficult analysis to perform, but can always be legal so
				1460	// there's no need to analyze it.
				1461	return nullptr;
				1462	}
				1463
				1464	return MI.getOperand(0).getMBB();
				1465	}
				1466
				1467	unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
				1468	MachineBasicBlock &DestBB,
				1469	const DebugLoc &DL,
				1470	int64_t BrOffset,
				1471	RegScavenger *RS) const {
				1472	assert(RS && "RegScavenger required for long branching");
				1473	assert(MBB.empty() &&
				1474	"new block should be inserted for expanding unconditional branch");
				1475	assert(MBB.pred_size() == 1);
				1476
				1477	MachineFunction *MF = MBB.getParent();
				1478	MachineRegisterInfo &MRI = MF->getRegInfo();
				1479
				1480	// FIXME: Virtual register workaround for RegScavenger not working with empty
				1481	// blocks.
				1482	unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				1483
				1484	auto I = MBB.end();
				1485
				1486	// We need to compute the offset relative to the instruction immediately after
				1487	// s_getpc_b64. Insert pc arithmetic code before last terminator.
				1488	MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
				1489
				1490	// TODO: Handle > 32-bit block address.
				1491	if (BrOffset >= 0) {
				1492	BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
				1493	.addReg(PCReg, RegState::Define, AMDGPU::sub0)
				1494	.addReg(PCReg, 0, AMDGPU::sub0)
				1495	.addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
				1496	BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
				1497	.addReg(PCReg, RegState::Define, AMDGPU::sub1)
				1498	.addReg(PCReg, 0, AMDGPU::sub1)
				1499	.addImm(0);
				1500	} else {
				1501	// Backwards branch.
				1502	BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
				1503	.addReg(PCReg, RegState::Define, AMDGPU::sub0)
				1504	.addReg(PCReg, 0, AMDGPU::sub0)
				1505	.addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
				1506	BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
				1507	.addReg(PCReg, RegState::Define, AMDGPU::sub1)
				1508	.addReg(PCReg, 0, AMDGPU::sub1)
				1509	.addImm(0);
				1510	}
				1511
				1512	// Insert the indirect branch after the other terminator.
				1513	BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
				1514	.addReg(PCReg);
				1515
				1516	// FIXME: If spilling is necessary, this will fail because this scavenger has
				1517	// no emergency stack slots. It is non-trivial to spill in this situation,
				1518	// because the restore code needs to be specially placed after the
				1519	// jump. BranchRelaxation then needs to be made aware of the newly inserted
				1520	// block.
				1521	//
				1522	// If a spill is needed for the pc register pair, we need to insert a spill
				1523	// restore block right before the destination block, and insert a short branch
				1524	// into the old destination block's fallthrough predecessor.
				1525	// e.g.:
				1526	//
				1527	// s_cbranch_scc0 skip_long_branch:
				1528	//
				1529	// long_branch_bb:
				1530	// spill s[8:9]
				1531	// s_getpc_b64 s[8:9]
				1532	// s_add_u32 s8, s8, restore_bb
				1533	// s_addc_u32 s9, s9, 0
				1534	// s_setpc_b64 s[8:9]
				1535	//
				1536	// skip_long_branch:
				1537	// foo;
				1538	//
				1539	// .....
				1540	//
				1541	// dest_bb_fallthrough_predecessor:
				1542	// bar;
				1543	// s_branch dest_bb
				1544	//
				1545	// restore_bb:
				1546	// restore s[8:9]
				1547	// fallthrough dest_bb
				1548	///
				1549	// dest_bb:
				1550	// buzz;
				1551
				1552	RS->enterBasicBlockEnd(MBB);
Matt Arsenault	b0b741e	2018-10-30 01:33:14 +0000	[diff] [blame]	1553	unsigned Scav = RS->scavengeRegisterBackwards(
				1554	AMDGPU::SReg_64RegClass,
				1555	MachineBasicBlock::iterator(GetPC), false, 0);
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1556	MRI.replaceRegWith(PCReg, Scav);
				1557	MRI.clearVirtRegs();
				1558	RS->setRegUsed(Scav);
				1559
				1560	return 4 + 8 + 4 + 4;
				1561	}
				1562
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1563	unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
				1564	switch (Cond) {
				1565	case SIInstrInfo::SCC_TRUE:
				1566	return AMDGPU::S_CBRANCH_SCC1;
				1567	case SIInstrInfo::SCC_FALSE:
				1568	return AMDGPU::S_CBRANCH_SCC0;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame]	1569	case SIInstrInfo::VCCNZ:
				1570	return AMDGPU::S_CBRANCH_VCCNZ;
				1571	case SIInstrInfo::VCCZ:
				1572	return AMDGPU::S_CBRANCH_VCCZ;
				1573	case SIInstrInfo::EXECNZ:
				1574	return AMDGPU::S_CBRANCH_EXECNZ;
				1575	case SIInstrInfo::EXECZ:
				1576	return AMDGPU::S_CBRANCH_EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1577	default:
				1578	llvm_unreachable("invalid branch predicate");
				1579	}
				1580	}
				1581
				1582	SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
				1583	switch (Opcode) {
				1584	case AMDGPU::S_CBRANCH_SCC0:
				1585	return SCC_FALSE;
				1586	case AMDGPU::S_CBRANCH_SCC1:
				1587	return SCC_TRUE;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame]	1588	case AMDGPU::S_CBRANCH_VCCNZ:
				1589	return VCCNZ;
				1590	case AMDGPU::S_CBRANCH_VCCZ:
				1591	return VCCZ;
				1592	case AMDGPU::S_CBRANCH_EXECNZ:
				1593	return EXECNZ;
				1594	case AMDGPU::S_CBRANCH_EXECZ:
				1595	return EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1596	default:
				1597	return INVALID_BR;
				1598	}
				1599	}
				1600
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1601	bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
				1602	MachineBasicBlock::iterator I,
				1603	MachineBasicBlock *&TBB,
				1604	MachineBasicBlock *&FBB,
				1605	SmallVectorImpl<MachineOperand> &Cond,
				1606	bool AllowModify) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1607	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1608	// Unconditional Branch
				1609	TBB = I->getOperand(0).getMBB();
				1610	return false;
				1611	}
				1612
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1613	MachineBasicBlock *CondBB = nullptr;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1614
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1615	if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				1616	CondBB = I->getOperand(1).getMBB();
				1617	Cond.push_back(I->getOperand(0));
				1618	} else {
				1619	BranchPredicate Pred = getBranchPredicate(I->getOpcode());
				1620	if (Pred == INVALID_BR)
				1621	return true;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1622
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1623	CondBB = I->getOperand(0).getMBB();
				1624	Cond.push_back(MachineOperand::CreateImm(Pred));
				1625	Cond.push_back(I->getOperand(1)); // Save the branch register.
				1626	}
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1627	++I;
				1628
				1629	if (I == MBB.end()) {
				1630	// Conditional branch followed by fall-through.
				1631	TBB = CondBB;
				1632	return false;
				1633	}
				1634
				1635	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1636	TBB = CondBB;
				1637	FBB = I->getOperand(0).getMBB();
				1638	return false;
				1639	}
				1640
				1641	return true;
				1642	}
				1643
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1644	bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
				1645	MachineBasicBlock *&FBB,
				1646	SmallVectorImpl<MachineOperand> &Cond,
				1647	bool AllowModify) const {
				1648	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
Matt Arsenault	eabb8dd	2018-11-16 05:03:02 +0000	[diff] [blame]	1649	auto E = MBB.end();
				1650	if (I == E)
				1651	return false;
				1652
				1653	// Skip over the instructions that are artificially terminators for special
				1654	// exec management.
				1655	while (I != E && !I->isBranch() && !I->isReturn() &&
				1656	I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
				1657	switch (I->getOpcode()) {
				1658	case AMDGPU::SI_MASK_BRANCH:
				1659	case AMDGPU::S_MOV_B64_term:
				1660	case AMDGPU::S_XOR_B64_term:
				1661	case AMDGPU::S_ANDN2_B64_term:
				1662	break;
				1663	case AMDGPU::SI_IF:
				1664	case AMDGPU::SI_ELSE:
				1665	case AMDGPU::SI_KILL_I1_TERMINATOR:
				1666	case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
				1667	// FIXME: It's messy that these need to be considered here at all.
				1668	return true;
				1669	default:
				1670	llvm_unreachable("unexpected non-branch terminator inst");
				1671	}
				1672
				1673	++I;
				1674	}
				1675
				1676	if (I == E)
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1677	return false;
				1678
				1679	if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
				1680	return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
				1681
				1682	++I;
				1683
				1684	// TODO: Should be able to treat as fallthrough?
				1685	if (I == MBB.end())
				1686	return true;
				1687
				1688	if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
				1689	return true;
				1690
				1691	MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
				1692
				1693	// Specifically handle the case where the conditional branch is to the same
				1694	// destination as the mask branch. e.g.
				1695	//
				1696	// si_mask_branch BB8
				1697	// s_cbranch_execz BB8
				1698	// s_cbranch BB9
				1699	//
				1700	// This is required to understand divergent loops which may need the branches
				1701	// to be relaxed.
				1702	if (TBB != MaskBrDest \|\| Cond.empty())
				1703	return true;
				1704
				1705	auto Pred = Cond[0].getImm();
				1706	return (Pred != EXECZ && Pred != EXECNZ);
				1707	}
				1708
Matt Arsenault	1b9fc8e	2016-09-14 20:43:16 +0000	[diff] [blame]	1709	unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1710	int *BytesRemoved) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1711	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
				1712
				1713	unsigned Count = 0;
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1714	unsigned RemovedSize = 0;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1715	while (I != MBB.end()) {
				1716	MachineBasicBlock::iterator Next = std::next(I);
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1717	if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
				1718	I = Next;
				1719	continue;
				1720	}
				1721
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1722	RemovedSize += getInstSizeInBytes(*I);
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1723	I->eraseFromParent();
				1724	++Count;
				1725	I = Next;
				1726	}
				1727
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1728	if (BytesRemoved)
				1729	*BytesRemoved = RemovedSize;
				1730
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1731	return Count;
				1732	}
				1733
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1734	// Copy the flags onto the implicit condition register operand.
				1735	static void preserveCondRegFlags(MachineOperand &CondReg,
				1736	const MachineOperand &OrigCond) {
				1737	CondReg.setIsUndef(OrigCond.isUndef());
				1738	CondReg.setIsKill(OrigCond.isKill());
				1739	}
				1740
Matt Arsenault	e8e0f5c	2016-09-14 17:24:15 +0000	[diff] [blame]	1741	unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1742	MachineBasicBlock *TBB,
				1743	MachineBasicBlock *FBB,
				1744	ArrayRef<MachineOperand> Cond,
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1745	const DebugLoc &DL,
				1746	int *BytesAdded) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1747	if (!FBB && Cond.empty()) {
				1748	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1749	.addMBB(TBB);
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1750	if (BytesAdded)
				1751	*BytesAdded = 4;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1752	return 1;
				1753	}
				1754
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1755	if(Cond.size() == 1 && Cond[0].isReg()) {
				1756	BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
				1757	.add(Cond[0])
				1758	.addMBB(TBB);
				1759	return 1;
				1760	}
				1761
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1762	assert(TBB && Cond[0].isImm());
				1763
				1764	unsigned Opcode
				1765	= getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
				1766
				1767	if (!FBB) {
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1768	Cond[1].isUndef();
				1769	MachineInstr *CondBr =
				1770	BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1771	.addMBB(TBB);
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1772
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1773	// Copy the flags onto the implicit condition register operand.
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1774	preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1775
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1776	if (BytesAdded)
				1777	*BytesAdded = 4;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1778	return 1;
				1779	}
				1780
				1781	assert(TBB && FBB);
				1782
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1783	MachineInstr *CondBr =
				1784	BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1785	.addMBB(TBB);
				1786	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1787	.addMBB(FBB);
				1788
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1789	MachineOperand &CondReg = CondBr->getOperand(1);
				1790	CondReg.setIsUndef(Cond[1].isUndef());
				1791	CondReg.setIsKill(Cond[1].isKill());
				1792
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1793	if (BytesAdded)
				1794	*BytesAdded = 8;
				1795
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1796	return 2;
				1797	}
				1798
Matt Arsenault	1b9fc8e	2016-09-14 20:43:16 +0000	[diff] [blame]	1799	bool SIInstrInfo::reverseBranchCondition(
Matt Arsenault	72fcd5f	2016-05-21 00:29:34 +0000	[diff] [blame]	1800	SmallVectorImpl<MachineOperand> &Cond) const {
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1801	if (Cond.size() != 2) {
				1802	return true;
				1803	}
				1804
				1805	if (Cond[0].isImm()) {
				1806	Cond[0].setImm(-Cond[0].getImm());
				1807	return false;
				1808	}
				1809
				1810	return true;
Matt Arsenault	72fcd5f	2016-05-21 00:29:34 +0000	[diff] [blame]	1811	}
				1812
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1813	bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
				1814	ArrayRef<MachineOperand> Cond,
				1815	unsigned TrueReg, unsigned FalseReg,
				1816	int &CondCycles,
				1817	int &TrueCycles, int &FalseCycles) const {
				1818	switch (Cond[0].getImm()) {
				1819	case VCCNZ:
				1820	case VCCZ: {
				1821	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1822	const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
				1823	assert(MRI.getRegClass(FalseReg) == RC);
				1824
				1825	int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
				1826	CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
				1827
				1828	// Limit to equal cost for branch vs. N v_cndmask_b32s.
				1829	return !RI.isSGPRClass(RC) && NumInsts <= 6;
				1830	}
				1831	case SCC_TRUE:
				1832	case SCC_FALSE: {
				1833	// FIXME: We could insert for VGPRs if we could replace the original compare
				1834	// with a vector one.
				1835	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1836	const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
				1837	assert(MRI.getRegClass(FalseReg) == RC);
				1838
				1839	int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
				1840
				1841	// Multiples of 8 can do s_cselect_b64
				1842	if (NumInsts % 2 == 0)
				1843	NumInsts /= 2;
				1844
				1845	CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
				1846	return RI.isSGPRClass(RC);
				1847	}
				1848	default:
				1849	return false;
				1850	}
				1851	}
				1852
				1853	void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
				1854	MachineBasicBlock::iterator I, const DebugLoc &DL,
				1855	unsigned DstReg, ArrayRef<MachineOperand> Cond,
				1856	unsigned TrueReg, unsigned FalseReg) const {
				1857	BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
				1858	if (Pred == VCCZ \|\| Pred == SCC_FALSE) {
				1859	Pred = static_cast<BranchPredicate>(-Pred);
				1860	std::swap(TrueReg, FalseReg);
				1861	}
				1862
				1863	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1864	const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1865	unsigned DstSize = RI.getRegSizeInBits(*DstRC);
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1866
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1867	if (DstSize == 32) {
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1868	unsigned SelOp = Pred == SCC_TRUE ?
				1869	AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
				1870
				1871	// Instruction's operands are backwards from what is expected.
				1872	MachineInstr *Select =
				1873	BuildMI(MBB, I, DL, get(SelOp), DstReg)
				1874	.addReg(FalseReg)
				1875	.addReg(TrueReg);
				1876
				1877	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1878	return;
				1879	}
				1880
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1881	if (DstSize == 64 && Pred == SCC_TRUE) {
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1882	MachineInstr *Select =
				1883	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
				1884	.addReg(FalseReg)
				1885	.addReg(TrueReg);
				1886
				1887	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1888	return;
				1889	}
				1890
				1891	static const int16_t Sub0_15[] = {
				1892	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
				1893	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
				1894	AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
				1895	AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
				1896	};
				1897
				1898	static const int16_t Sub0_15_64[] = {
				1899	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				1900	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				1901	AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
				1902	AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
				1903	};
				1904
				1905	unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
				1906	const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
				1907	const int16_t *SubIndices = Sub0_15;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1908	int NElts = DstSize / 32;
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1909
				1910	// 64-bit select is only avaialble for SALU.
				1911	if (Pred == SCC_TRUE) {
				1912	SelOp = AMDGPU::S_CSELECT_B64;
				1913	EltRC = &AMDGPU::SGPR_64RegClass;
				1914	SubIndices = Sub0_15_64;
				1915
				1916	assert(NElts % 2 == 0);
				1917	NElts /= 2;
				1918	}
				1919
				1920	MachineInstrBuilder MIB = BuildMI(
				1921	MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
				1922
				1923	I = MIB->getIterator();
				1924
				1925	SmallVector<unsigned, 8> Regs;
				1926	for (int Idx = 0; Idx != NElts; ++Idx) {
				1927	unsigned DstElt = MRI.createVirtualRegister(EltRC);
				1928	Regs.push_back(DstElt);
				1929
				1930	unsigned SubIdx = SubIndices[Idx];
				1931
				1932	MachineInstr *Select =
				1933	BuildMI(MBB, I, DL, get(SelOp), DstElt)
				1934	.addReg(FalseReg, 0, SubIdx)
				1935	.addReg(TrueReg, 0, SubIdx);
				1936	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1937
				1938	MIB.addReg(DstElt)
				1939	.addImm(SubIdx);
				1940	}
				1941	}
				1942
Sam Kolton	27e0f8b	2017-03-31 11:42:43 +0000	[diff] [blame]	1943	bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
				1944	switch (MI.getOpcode()) {
				1945	case AMDGPU::V_MOV_B32_e32:
				1946	case AMDGPU::V_MOV_B32_e64:
				1947	case AMDGPU::V_MOV_B64_PSEUDO: {
				1948	// If there are additional implicit register operands, this may be used for
				1949	// register indexing so the source register operand isn't simply copied.
				1950	unsigned NumOps = MI.getDesc().getNumOperands() +
				1951	MI.getDesc().getNumImplicitUses();
				1952
				1953	return MI.getNumOperands() == NumOps;
				1954	}
				1955	case AMDGPU::S_MOV_B32:
				1956	case AMDGPU::S_MOV_B64:
				1957	case AMDGPU::COPY:
				1958	return true;
				1959	default:
				1960	return false;
				1961	}
				1962	}
				1963
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	1964	unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
Marcello Maggioni	5ca4128	2018-08-20 19:23:45 +0000	[diff] [blame]	1965	unsigned Kind) const {
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	1966	switch(Kind) {
				1967	case PseudoSourceValue::Stack:
				1968	case PseudoSourceValue::FixedStack:
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	1969	return AMDGPUAS::PRIVATE_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	1970	case PseudoSourceValue::ConstantPool:
				1971	case PseudoSourceValue::GOT:
				1972	case PseudoSourceValue::JumpTable:
				1973	case PseudoSourceValue::GlobalValueCallEntry:
				1974	case PseudoSourceValue::ExternalSymbolCallEntry:
				1975	case PseudoSourceValue::TargetCustom:
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	1976	return AMDGPUAS::CONSTANT_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	1977	}
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	1978	return AMDGPUAS::FLAT_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	1979	}
				1980
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1981	static void removeModOperands(MachineInstr &MI) {
				1982	unsigned Opc = MI.getOpcode();
				1983	int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1984	AMDGPU::OpName::src0_modifiers);
				1985	int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1986	AMDGPU::OpName::src1_modifiers);
				1987	int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				1988	AMDGPU::OpName::src2_modifiers);
				1989
				1990	MI.RemoveOperand(Src2ModIdx);
				1991	MI.RemoveOperand(Src1ModIdx);
				1992	MI.RemoveOperand(Src0ModIdx);
				1993	}
				1994
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1995	bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	1996	unsigned Reg, MachineRegisterInfo *MRI) const {
				1997	if (!MRI->hasOneNonDBGUse(Reg))
				1998	return false;
				1999
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2000	switch (DefMI.getOpcode()) {
				2001	default:
				2002	return false;
				2003	case AMDGPU::S_MOV_B64:
				2004	// TODO: We could fold 64-bit immediates, but this get compilicated
				2005	// when there are sub-registers.
				2006	return false;
				2007
				2008	case AMDGPU::V_MOV_B32_e32:
				2009	case AMDGPU::S_MOV_B32:
				2010	break;
				2011	}
				2012
				2013	const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
				2014	assert(ImmOp);
				2015	// FIXME: We could handle FrameIndex values here.
				2016	if (!ImmOp->isImm())
				2017	return false;
				2018
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2019	unsigned Opc = UseMI.getOpcode();
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2020	if (Opc == AMDGPU::COPY) {
				2021	bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2022	unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2023	UseMI.setDesc(get(NewOpc));
				2024	UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
				2025	UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
				2026	return true;
				2027	}
				2028
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2029	if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2030	Opc == AMDGPU::V_MAD_F16 \|\| Opc == AMDGPU::V_MAC_F16_e64) {
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2031	// Don't fold if we are using source or output modifiers. The new VOP2
				2032	// instructions don't have them.
				2033	if (hasAnyModifiersSet(UseMI))
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2034	return false;
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2035
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	2036	// If this is a free constant, there's no reason to do this.
				2037	// TODO: We could fold this here instead of letting SIFoldOperands do it
				2038	// later.
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2039	MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
				2040
				2041	// Any src operand can be used for the legality check.
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2042	if (isInlineConstant(UseMI, Src0, ImmOp))
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	2043	return false;
				2044
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2045	bool IsF32 = Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2046	MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
				2047	MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2048
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2049	// Multiplied part is the constant: Use v_madmk_{f16, f32}.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2050	// We should only expect these to be on src0 due to canonicalizations.
				2051	if (Src0->isReg() && Src0->getReg() == Reg) {
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	2052	if (!Src1->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2053	return false;
				2054
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	2055	if (!Src2->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2056	return false;
				2057
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	2058	// We need to swap operands 0 and 1 since madmk constant is at operand 1.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2059
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2060	const int64_t Imm = ImmOp->getImm();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2061
				2062	// FIXME: This would be a lot easier if we could return a new instruction
				2063	// instead of having to modify in place.
				2064
				2065	// Remove these first since they are at the end.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2066	UseMI.RemoveOperand(
				2067	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
				2068	UseMI.RemoveOperand(
				2069	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2070
				2071	unsigned Src1Reg = Src1->getReg();
				2072	unsigned Src1SubReg = Src1->getSubReg();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2073	Src0->setReg(Src1Reg);
				2074	Src0->setSubReg(Src1SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	2075	Src0->setIsKill(Src1->isKill());
				2076
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2077	if (Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2078	Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2079	UseMI.untieRegOperand(
				2080	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2081
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	2082	Src1->ChangeToImmediate(Imm);
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2083
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2084	removeModOperands(UseMI);
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2085	UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2086
				2087	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				2088	if (DeleteDef)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2089	DefMI.eraseFromParent();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2090
				2091	return true;
				2092	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2093
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2094	// Added part is the constant: Use v_madak_{f16, f32}.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2095	if (Src2->isReg() && Src2->getReg() == Reg) {
				2096	// Not allowed to use constant bus for another operand.
				2097	// We can however allow an inline immediate as src0.
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2098	bool Src0Inlined = false;
				2099	if (Src0->isReg()) {
				2100	// Try to inline constant if possible.
				2101	// If the Def moves immediate and the use is single
				2102	// We are saving VGPR here.
				2103	MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
				2104	if (Def && Def->isMoveImmediate() &&
				2105	isInlineConstant(Def->getOperand(1)) &&
				2106	MRI->hasOneUse(Src0->getReg())) {
				2107	Src0->ChangeToImmediate(Def->getOperand(1).getImm());
				2108	Src0Inlined = true;
				2109	} else if ((RI.isPhysicalRegister(Src0->getReg()) &&
				2110	RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) \|\|
				2111	(RI.isVirtualRegister(Src0->getReg()) &&
				2112	RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
				2113	return false;
				2114	// VGPR is okay as Src0 - fallthrough
				2115	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2116
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2117	if (Src1->isReg() && !Src0Inlined ) {
				2118	// We have one slot for inlinable constant so far - try to fill it
				2119	MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
				2120	if (Def && Def->isMoveImmediate() &&
				2121	isInlineConstant(Def->getOperand(1)) &&
				2122	MRI->hasOneUse(Src1->getReg()) &&
				2123	commuteInstruction(UseMI)) {
				2124	Src0->ChangeToImmediate(Def->getOperand(1).getImm());
				2125	} else if ((RI.isPhysicalRegister(Src1->getReg()) &&
				2126	RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) \|\|
				2127	(RI.isVirtualRegister(Src1->getReg()) &&
				2128	RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
				2129	return false;
				2130	// VGPR is okay as Src1 - fallthrough
				2131	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2132
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2133	const int64_t Imm = ImmOp->getImm();
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2134
				2135	// FIXME: This would be a lot easier if we could return a new instruction
				2136	// instead of having to modify in place.
				2137
				2138	// Remove these first since they are at the end.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2139	UseMI.RemoveOperand(
				2140	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
				2141	UseMI.RemoveOperand(
				2142	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2143
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2144	if (Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2145	Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2146	UseMI.untieRegOperand(
				2147	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2148
				2149	// ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2150	Src2->ChangeToImmediate(Imm);
				2151
				2152	// These come before src2.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2153	removeModOperands(UseMI);
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2154	UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2155
				2156	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				2157	if (DeleteDef)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2158	DefMI.eraseFromParent();
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2159
				2160	return true;
				2161	}
				2162	}
				2163
				2164	return false;
				2165	}
				2166
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2167	static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
				2168	int WidthB, int OffsetB) {
				2169	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
				2170	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
				2171	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
				2172	return LowOffset + LowWidth <= HighOffset;
				2173	}
				2174
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2175	bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
				2176	MachineInstr &MIb) const {
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	2177	MachineOperand BaseOp0, BaseOp1;
Chad Rosier	c27a18f	2016-03-09 16:00:35 +0000	[diff] [blame]	2178	int64_t Offset0, Offset1;
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2179
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	2180	if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
				2181	getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
				2182	if (!BaseOp0->isIdenticalTo(*BaseOp1))
				2183	return false;
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	2184
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2185	if (!MIa.hasOneMemOperand() \|\| !MIb.hasOneMemOperand()) {
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	2186	// FIXME: Handle ds_read2 / ds_write2.
				2187	return false;
				2188	}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2189	unsigned Width0 = (*MIa.memoperands_begin())->getSize();
				2190	unsigned Width1 = (*MIb.memoperands_begin())->getSize();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	2191	if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2192	return true;
				2193	}
				2194	}
				2195
				2196	return false;
				2197	}
				2198
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2199	bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
				2200	MachineInstr &MIb,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2201	AliasAnalysis *AA) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2202	assert((MIa.mayLoad() \|\| MIa.mayStore()) &&
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2203	"MIa must load from or modify a memory location");
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2204	assert((MIb.mayLoad() \|\| MIb.mayStore()) &&
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2205	"MIb must load from or modify a memory location");
				2206
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2207	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects())
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2208	return false;
				2209
				2210	// XXX - Can we relax this between address spaces?
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2211	if (MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2212	return false;
				2213
Tom Stellard	662f330	2016-08-29 12:05:32 +0000	[diff] [blame]	2214	if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
				2215	const MachineMemOperand MMOa = MIa.memoperands_begin();
				2216	const MachineMemOperand MMOb = MIb.memoperands_begin();
				2217	if (MMOa->getValue() && MMOb->getValue()) {
				2218	MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
				2219	MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
				2220	if (!AA->alias(LocA, LocB))
				2221	return true;
				2222	}
				2223	}
				2224
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2225	// TODO: Should we check the address space from the MachineMemOperand? That
				2226	// would allow us to distinguish objects we know don't alias based on the
Benjamin Kramer	df005cb	2015-08-08 18:27:36 +0000	[diff] [blame]	2227	// underlying address space, even if it was lowered to a different one,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2228	// e.g. private accesses lowered to use MUBUF instructions on a scratch
				2229	// buffer.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2230	if (isDS(MIa)) {
				2231	if (isDS(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2232	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2233
Matt Arsenault	9608a289	2017-07-29 01:26:21 +0000	[diff] [blame]	2234	return !isFLAT(MIb) \|\| isSegmentSpecificFLAT(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2235	}
				2236
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2237	if (isMUBUF(MIa) \|\| isMTBUF(MIa)) {
				2238	if (isMUBUF(MIb) \|\| isMTBUF(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2239	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2240
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2241	return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2242	}
				2243
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2244	if (isSMRD(MIa)) {
				2245	if (isSMRD(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2246	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2247
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2248	return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2249	}
				2250
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2251	if (isFLAT(MIa)) {
				2252	if (isFLAT(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2253	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2254
				2255	return false;
				2256	}
				2257
				2258	return false;
				2259	}
				2260
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2261	static int64_t getFoldableImm(const MachineOperand* MO) {
				2262	if (!MO->isReg())
				2263	return false;
				2264	const MachineFunction *MF = MO->getParent()->getParent()->getParent();
				2265	const MachineRegisterInfo &MRI = MF->getRegInfo();
				2266	auto Def = MRI.getUniqueVRegDef(MO->getReg());
Matt Arsenault	c317287	2017-09-14 20:54:29 +0000	[diff] [blame]	2267	if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
				2268	Def->getOperand(1).isImm())
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2269	return Def->getOperand(1).getImm();
				2270	return AMDGPU::NoRegister;
				2271	}
				2272
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2273	MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2274	MachineInstr &MI,
				2275	LiveVariables *LV) const {
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2276	unsigned Opc = MI.getOpcode();
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2277	bool IsF16 = false;
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2278	bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 \|\| Opc == AMDGPU::V_FMAC_F32_e64;
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2279
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2280	switch (Opc) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2281	default:
				2282	return nullptr;
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2283	case AMDGPU::V_MAC_F16_e64:
				2284	IsF16 = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	2285	LLVM_FALLTHROUGH;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2286	case AMDGPU::V_MAC_F32_e64:
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2287	case AMDGPU::V_FMAC_F32_e64:
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2288	break;
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2289	case AMDGPU::V_MAC_F16_e32:
				2290	IsF16 = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	2291	LLVM_FALLTHROUGH;
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2292	case AMDGPU::V_MAC_F32_e32:
				2293	case AMDGPU::V_FMAC_F32_e32: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2294	int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
				2295	AMDGPU::OpName::src0);
				2296	const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2297	if (!Src0->isReg() && !Src0->isImm())
				2298	return nullptr;
				2299
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2300	if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2301	return nullptr;
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2302
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2303	break;
				2304	}
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2305	}
				2306
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2307	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
				2308	const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2309	const MachineOperand *Src0Mods =
				2310	getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2311	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2312	const MachineOperand *Src1Mods =
				2313	getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2314	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2315	const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
				2316	const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2317
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2318	if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
Matt Arsenault	c317287	2017-09-14 20:54:29 +0000	[diff] [blame]	2319	// If we have an SGPR input, we will violate the constant bus restriction.
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2320	(!Src0->isReg() \|\| !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2321	if (auto Imm = getFoldableImm(Src2)) {
				2322	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2323	get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
				2324	.add(*Dst)
				2325	.add(*Src0)
				2326	.add(*Src1)
				2327	.addImm(Imm);
				2328	}
				2329	if (auto Imm = getFoldableImm(Src1)) {
				2330	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2331	get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
				2332	.add(*Dst)
				2333	.add(*Src0)
				2334	.addImm(Imm)
				2335	.add(*Src2);
				2336	}
				2337	if (auto Imm = getFoldableImm(Src0)) {
				2338	if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
				2339	AMDGPU::OpName::src0), Src1))
				2340	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2341	get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
				2342	.add(*Dst)
				2343	.add(*Src1)
				2344	.addImm(Imm)
				2345	.add(*Src2);
				2346	}
				2347	}
				2348
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2349	assert((!IsFMA \|\| !IsF16) && "fmac only expected with f32");
				2350	unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
				2351	(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
				2352	return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2353	.add(*Dst)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2354	.addImm(Src0Mods ? Src0Mods->getImm() : 0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2355	.add(*Src0)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2356	.addImm(Src1Mods ? Src1Mods->getImm() : 0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2357	.add(*Src1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2358	.addImm(0) // Src mods
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2359	.add(*Src2)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2360	.addImm(Clamp ? Clamp->getImm() : 0)
				2361	.addImm(Omod ? Omod->getImm() : 0);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2362	}
				2363
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2364	// It's not generally safe to move VALU instructions across these since it will
				2365	// start using the register as a base index rather than directly.
				2366	// XXX - Why isn't hasSideEffects sufficient for these?
				2367	static bool changesVGPRIndexingMode(const MachineInstr &MI) {
				2368	switch (MI.getOpcode()) {
				2369	case AMDGPU::S_SET_GPR_IDX_ON:
				2370	case AMDGPU::S_SET_GPR_IDX_MODE:
				2371	case AMDGPU::S_SET_GPR_IDX_OFF:
				2372	return true;
				2373	default:
				2374	return false;
				2375	}
				2376	}
				2377
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2378	bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2379	const MachineBasicBlock *MBB,
				2380	const MachineFunction &MF) const {
Matt Arsenault	95c7897	2016-07-09 01:13:51 +0000	[diff] [blame]	2381	// XXX - Do we want the SP check in the base implementation?
				2382
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2383	// Target-independent instructions do not have an implicit-use of EXEC, even
				2384	// when they operate on VGPRs. Treating EXEC modifications as scheduling
				2385	// boundaries prevents incorrect movements of such instructions.
Matt Arsenault	95c7897	2016-07-09 01:13:51 +0000	[diff] [blame]	2386	return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) \|\|
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2387	MI.modifiesRegister(AMDGPU::EXEC, &RI) \|\|
Tom Stellard	8485fa0	2016-12-07 02:42:15 +0000	[diff] [blame]	2388	MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 \|\|
				2389	MI.getOpcode() == AMDGPU::S_SETREG_B32 \|\|
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2390	changesVGPRIndexingMode(MI);
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2391	}
				2392
Marek Olsak	c5cec5e	2019-01-16 15:43:53 +0000	[diff] [blame^]	2393	bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
				2394	return Opcode == AMDGPU::DS_ORDERED_COUNT \|\|
				2395	Opcode == AMDGPU::DS_GWS_INIT \|\|
				2396	Opcode == AMDGPU::DS_GWS_SEMA_V \|\|
				2397	Opcode == AMDGPU::DS_GWS_SEMA_BR \|\|
				2398	Opcode == AMDGPU::DS_GWS_SEMA_P \|\|
				2399	Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL \|\|
				2400	Opcode == AMDGPU::DS_GWS_BARRIER;
				2401	}
				2402
Nicolai Haehnle	7f0d05d	2018-07-30 09:23:59 +0000	[diff] [blame]	2403	bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
				2404	unsigned Opcode = MI.getOpcode();
				2405
				2406	if (MI.mayStore() && isSMRD(MI))
				2407	return true; // scalar store or atomic
				2408
				2409	// These instructions cause shader I/O that may cause hardware lockups
				2410	// when executed with an empty EXEC mask.
				2411	//
				2412	// Note: exp with VM = DONE = 0 is automatically skipped by hardware when
				2413	// EXEC = 0, but checking for that case here seems not worth it
				2414	// given the typical code patterns.
				2415	if (Opcode == AMDGPU::S_SENDMSG \|\| Opcode == AMDGPU::S_SENDMSGHALT \|\|
Marek Olsak	c5cec5e	2019-01-16 15:43:53 +0000	[diff] [blame^]	2416	Opcode == AMDGPU::EXP \|\| Opcode == AMDGPU::EXP_DONE \|\|
				2417	Opcode == AMDGPU::DS_ORDERED_COUNT)
Nicolai Haehnle	7f0d05d	2018-07-30 09:23:59 +0000	[diff] [blame]	2418	return true;
				2419
				2420	if (MI.isInlineAsm())
				2421	return true; // conservative assumption
				2422
				2423	// These are like SALU instructions in terms of effects, so it's questionable
				2424	// whether we should return true for those.
				2425	//
				2426	// However, executing them with EXEC = 0 causes them to operate on undefined
				2427	// data, which we avoid by returning true here.
				2428	if (Opcode == AMDGPU::V_READFIRSTLANE_B32 \|\| Opcode == AMDGPU::V_READLANE_B32)
				2429	return true;
				2430
				2431	return false;
				2432	}
				2433
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2434	bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault	26faed3	2016-12-05 22:26:17 +0000	[diff] [blame]	2435	switch (Imm.getBitWidth()) {
				2436	case 32:
				2437	return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
				2438	ST.hasInv2PiInlineImm());
				2439	case 64:
				2440	return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
				2441	ST.hasInv2PiInlineImm());
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2442	case 16:
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2443	return ST.has16BitInsts() &&
				2444	AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2445	ST.hasInv2PiInlineImm());
Matt Arsenault	26faed3	2016-12-05 22:26:17 +0000	[diff] [blame]	2446	default:
				2447	llvm_unreachable("invalid bitwidth");
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	2448	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2449	}
				2450
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2451	bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2452	uint8_t OperandType) const {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2453	if (!MO.isImm() \|\|
				2454	OperandType < AMDGPU::OPERAND_SRC_FIRST \|\|
				2455	OperandType > AMDGPU::OPERAND_SRC_LAST)
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2456	return false;
				2457
				2458	// MachineOperand provides no way to tell the true operand size, since it only
				2459	// records a 64-bit value. We need to know the size to determine if a 32-bit
				2460	// floating point immediate bit pattern is legal for an integer immediate. It
				2461	// would be for any 32-bit integer operand, but would not be for a 64-bit one.
				2462
				2463	int64_t Imm = MO.getImm();
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2464	switch (OperandType) {
				2465	case AMDGPU::OPERAND_REG_IMM_INT32:
				2466	case AMDGPU::OPERAND_REG_IMM_FP32:
				2467	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
				2468	case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2469	int32_t Trunc = static_cast<int32_t>(Imm);
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	2470	return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2471	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2472	case AMDGPU::OPERAND_REG_IMM_INT64:
				2473	case AMDGPU::OPERAND_REG_IMM_FP64:
				2474	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	2475	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2476	return AMDGPU::isInlinableLiteral64(MO.getImm(),
				2477	ST.hasInv2PiInlineImm());
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2478	case AMDGPU::OPERAND_REG_IMM_INT16:
				2479	case AMDGPU::OPERAND_REG_IMM_FP16:
				2480	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
				2481	case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2482	if (isInt<16>(Imm) \|\| isUInt<16>(Imm)) {
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2483	// A few special case instructions have 16-bit operands on subtargets
				2484	// where 16-bit instructions are not legal.
				2485	// TODO: Do the 32-bit immediates work? We shouldn't really need to handle
				2486	// constants in these cases
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2487	int16_t Trunc = static_cast<int16_t>(Imm);
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2488	return ST.has16BitInsts() &&
				2489	AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2490	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2491
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2492	return false;
				2493	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2494	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
				2495	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
Stanislav Mekhanoshin	160f857	2018-04-19 21:16:50 +0000	[diff] [blame]	2496	if (isUInt<16>(Imm)) {
				2497	int16_t Trunc = static_cast<int16_t>(Imm);
				2498	return ST.has16BitInsts() &&
				2499	AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
				2500	}
				2501	if (!(Imm & 0xffff)) {
				2502	return ST.has16BitInsts() &&
				2503	AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm());
				2504	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2505	uint32_t Trunc = static_cast<uint32_t>(Imm);
				2506	return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
				2507	}
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2508	default:
				2509	llvm_unreachable("invalid bitwidth");
				2510	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2511	}
				2512
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2513	bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2514	const MCOperandInfo &OpInfo) const {
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2515	switch (MO.getType()) {
				2516	case MachineOperand::MO_Register:
				2517	return false;
				2518	case MachineOperand::MO_Immediate:
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2519	return !isInlineConstant(MO, OpInfo);
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2520	case MachineOperand::MO_FrameIndex:
				2521	case MachineOperand::MO_MachineBasicBlock:
				2522	case MachineOperand::MO_ExternalSymbol:
				2523	case MachineOperand::MO_GlobalAddress:
				2524	case MachineOperand::MO_MCSymbol:
				2525	return true;
				2526	default:
				2527	llvm_unreachable("unexpected operand type");
				2528	}
				2529	}
				2530
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	2531	static bool compareMachineOp(const MachineOperand &Op0,
				2532	const MachineOperand &Op1) {
				2533	if (Op0.getType() != Op1.getType())
				2534	return false;
				2535
				2536	switch (Op0.getType()) {
				2537	case MachineOperand::MO_Register:
				2538	return Op0.getReg() == Op1.getReg();
				2539	case MachineOperand::MO_Immediate:
				2540	return Op0.getImm() == Op1.getImm();
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	2541	default:
				2542	llvm_unreachable("Didn't expect to be comparing these operand types");
				2543	}
				2544	}
				2545
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2546	bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
				2547	const MachineOperand &MO) const {
				2548	const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2549
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	2550	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2551
				2552	if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
				2553	return true;
				2554
				2555	if (OpInfo.RegClass < 0)
				2556	return false;
				2557
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2558	if (MO.isImm() && isInlineConstant(MO, OpInfo))
				2559	return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2560
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2561	return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2562	}
				2563
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	2564	bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	2565	int Op32 = AMDGPU::getVOPe32(Opcode);
				2566	if (Op32 == -1)
				2567	return false;
				2568
				2569	return pseudoToMCOpcode(Op32) != -1;
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	2570	}
				2571
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	2572	bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
				2573	// The src0_modifier operand is present on all instructions
				2574	// that have modifiers.
				2575
				2576	return AMDGPU::getNamedOperandIdx(Opcode,
				2577	AMDGPU::OpName::src0_modifiers) != -1;
				2578	}
				2579
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	2580	bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
				2581	unsigned OpName) const {
				2582	const MachineOperand *Mods = getNamedOperand(MI, OpName);
				2583	return Mods && Mods->getImm();
				2584	}
				2585
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2586	bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
				2587	return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) \|\|
				2588	hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) \|\|
				2589	hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) \|\|
				2590	hasModifiersSet(MI, AMDGPU::OpName::clamp) \|\|
				2591	hasModifiersSet(MI, AMDGPU::OpName::omod);
				2592	}
				2593
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2594	bool SIInstrInfo::canShrink(const MachineInstr &MI,
				2595	const MachineRegisterInfo &MRI) const {
				2596	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				2597	// Can't shrink instruction with three operands.
				2598	// FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
				2599	// a special case for it. It can only be shrunk if the third operand
				2600	// is vcc. We should handle this the same way we handle vopc, by addding
				2601	// a register allocation hint pre-regalloc and then do the shrinking
				2602	// post-regalloc.
				2603	if (Src2) {
				2604	switch (MI.getOpcode()) {
				2605	default: return false;
				2606
				2607	case AMDGPU::V_ADDC_U32_e64:
				2608	case AMDGPU::V_SUBB_U32_e64:
				2609	case AMDGPU::V_SUBBREV_U32_e64: {
				2610	const MachineOperand *Src1
				2611	= getNamedOperand(MI, AMDGPU::OpName::src1);
				2612	if (!Src1->isReg() \|\| !RI.isVGPR(MRI, Src1->getReg()))
				2613	return false;
				2614	// Additional verification is needed for sdst/src2.
				2615	return true;
				2616	}
				2617	case AMDGPU::V_MAC_F32_e64:
				2618	case AMDGPU::V_MAC_F16_e64:
				2619	case AMDGPU::V_FMAC_F32_e64:
				2620	if (!Src2->isReg() \|\| !RI.isVGPR(MRI, Src2->getReg()) \|\|
				2621	hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
				2622	return false;
				2623	break;
				2624
				2625	case AMDGPU::V_CNDMASK_B32_e64:
				2626	break;
				2627	}
				2628	}
				2629
				2630	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				2631	if (Src1 && (!Src1->isReg() \|\| !RI.isVGPR(MRI, Src1->getReg()) \|\|
				2632	hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
				2633	return false;
				2634
				2635	// We don't need to check src0, all input types are legal, so just make sure
				2636	// src0 isn't using any modifiers.
				2637	if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
				2638	return false;
				2639
Ron Lieberman	16de4fd	2018-12-03 13:04:54 +0000	[diff] [blame]	2640	// Can it be shrunk to a valid 32 bit opcode?
				2641	if (!hasVALU32BitEncoding(MI.getOpcode()))
				2642	return false;
				2643
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2644	// Check output modifiers
				2645	return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
				2646	!hasModifiersSet(MI, AMDGPU::OpName::clamp);
Matt Arsenault	de6c421	2018-08-28 18:34:24 +0000	[diff] [blame]	2647	}
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2648
Matt Arsenault	de6c421	2018-08-28 18:34:24 +0000	[diff] [blame]	2649	// Set VCC operand with all flags from \p Orig, except for setting it as
				2650	// implicit.
				2651	static void copyFlagsToImplicitVCC(MachineInstr &MI,
				2652	const MachineOperand &Orig) {
				2653
				2654	for (MachineOperand &Use : MI.implicit_operands()) {
				2655	if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
				2656	Use.setIsUndef(Orig.isUndef());
				2657	Use.setIsKill(Orig.isKill());
				2658	return;
				2659	}
				2660	}
				2661	}
				2662
				2663	MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
				2664	unsigned Op32) const {
				2665	MachineBasicBlock *MBB = MI.getParent();;
				2666	MachineInstrBuilder Inst32 =
				2667	BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
				2668
				2669	// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
				2670	// For VOPC instructions, this is replaced by an implicit def of vcc.
				2671	int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
				2672	if (Op32DstIdx != -1) {
				2673	// dst
				2674	Inst32.add(MI.getOperand(0));
				2675	} else {
				2676	assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
				2677	"Unexpected case");
				2678	}
				2679
				2680	Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
				2681
				2682	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				2683	if (Src1)
				2684	Inst32.add(*Src1);
				2685
				2686	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				2687
				2688	if (Src2) {
				2689	int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
				2690	if (Op32Src2Idx != -1) {
				2691	Inst32.add(*Src2);
				2692	} else {
				2693	// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
				2694	// replaced with an implicit read of vcc. This was already added
				2695	// during the initial BuildMI, so find it to preserve the flags.
				2696	copyFlagsToImplicitVCC(Inst32, Src2);
				2697	}
				2698	}
				2699
				2700	return Inst32;
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2701	}
				2702
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2703	bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2704	const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2705	const MCOperandInfo &OpInfo) const {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2706	// Literal constants use the constant bus.
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2707	//if (isLiteralConstantLike(MO, OpInfo))
				2708	// return true;
				2709	if (MO.isImm())
				2710	return !isInlineConstant(MO, OpInfo);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2711
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2712	if (!MO.isReg())
				2713	return true; // Misc other operands like FrameIndex
				2714
				2715	if (!MO.isUse())
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2716	return false;
				2717
				2718	if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
				2719	return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
				2720
				2721	// FLAT_SCR is just an SGPR pair.
				2722	if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
				2723	return true;
				2724
				2725	// EXEC register uses the constant bus.
				2726	if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
				2727	return true;
				2728
				2729	// SGPRs use the constant bus
Matt Arsenault	8226fc4	2016-03-02 23:00:21 +0000	[diff] [blame]	2730	return (MO.getReg() == AMDGPU::VCC \|\| MO.getReg() == AMDGPU::M0 \|\|
				2731	(!MO.isImplicit() &&
				2732	(AMDGPU::SGPR_32RegClass.contains(MO.getReg()) \|\|
				2733	AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2734	}
				2735
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2736	static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
				2737	for (const MachineOperand &MO : MI.implicit_operands()) {
				2738	// We only care about reads.
				2739	if (MO.isDef())
				2740	continue;
				2741
				2742	switch (MO.getReg()) {
				2743	case AMDGPU::VCC:
				2744	case AMDGPU::M0:
				2745	case AMDGPU::FLAT_SCR:
				2746	return MO.getReg();
				2747
				2748	default:
				2749	break;
				2750	}
				2751	}
				2752
				2753	return AMDGPU::NoRegister;
				2754	}
				2755
Matt Arsenault	529cf25	2016-06-23 01:26:16 +0000	[diff] [blame]	2756	static bool shouldReadExec(const MachineInstr &MI) {
				2757	if (SIInstrInfo::isVALU(MI)) {
				2758	switch (MI.getOpcode()) {
				2759	case AMDGPU::V_READLANE_B32:
				2760	case AMDGPU::V_READLANE_B32_si:
				2761	case AMDGPU::V_READLANE_B32_vi:
				2762	case AMDGPU::V_WRITELANE_B32:
				2763	case AMDGPU::V_WRITELANE_B32_si:
				2764	case AMDGPU::V_WRITELANE_B32_vi:
				2765	return false;
				2766	}
				2767
				2768	return true;
				2769	}
				2770
				2771	if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) \|\|
				2772	SIInstrInfo::isSALU(MI) \|\|
				2773	SIInstrInfo::isSMRD(MI))
				2774	return false;
				2775
				2776	return true;
				2777	}
				2778
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	2779	static bool isSubRegOf(const SIRegisterInfo &TRI,
				2780	const MachineOperand &SuperVec,
				2781	const MachineOperand &SubReg) {
				2782	if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
				2783	return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
				2784
				2785	return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
				2786	SubReg.getReg() == SuperVec.getReg();
				2787	}
				2788
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2789	bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2790	StringRef &ErrInfo) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2791	uint16_t Opcode = MI.getOpcode();
Tom Stellard	dde28a8	2017-05-26 16:40:03 +0000	[diff] [blame]	2792	if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
				2793	return true;
				2794
Matt Arsenault	89ad17c	2017-06-12 16:37:55 +0000	[diff] [blame]	2795	const MachineFunction *MF = MI.getParent()->getParent();
				2796	const MachineRegisterInfo &MRI = MF->getRegInfo();
				2797
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2798	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
				2799	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
				2800	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
				2801
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2802	// Make sure the number of operands is correct.
				2803	const MCInstrDesc &Desc = get(Opcode);
				2804	if (!Desc.isVariadic() &&
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2805	Desc.getNumOperands() != MI.getNumExplicitOperands()) {
				2806	ErrInfo = "Instruction has wrong number of operands.";
				2807	return false;
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2808	}
				2809
Matt Arsenault	3d46319	2016-11-01 22:55:07 +0000	[diff] [blame]	2810	if (MI.isInlineAsm()) {
				2811	// Verify register classes for inlineasm constraints.
				2812	for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
				2813	I != E; ++I) {
				2814	const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
				2815	if (!RC)
				2816	continue;
				2817
				2818	const MachineOperand &Op = MI.getOperand(I);
				2819	if (!Op.isReg())
				2820	continue;
				2821
				2822	unsigned Reg = Op.getReg();
				2823	if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
				2824	ErrInfo = "inlineasm operand has incorrect register class.";
				2825	return false;
				2826	}
				2827	}
				2828
				2829	return true;
				2830	}
				2831
Changpeng Fang	c996393	2015-12-18 20:04:28 +0000	[diff] [blame]	2832	// Make sure the register classes are correct.
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	2833	for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2834	if (MI.getOperand(i).isFPImm()) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	2835	ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
				2836	"all fp values to integers.";
				2837	return false;
				2838	}
				2839
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	2840	int RegClass = Desc.OpInfo[i].RegClass;
				2841
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2842	switch (Desc.OpInfo[i].OperandType) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2843	case MCOI::OPERAND_REGISTER:
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2844	if (MI.getOperand(i).isImm()) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2845	ErrInfo = "Illegal immediate value for operand.";
				2846	return false;
				2847	}
				2848	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2849	case AMDGPU::OPERAND_REG_IMM_INT32:
				2850	case AMDGPU::OPERAND_REG_IMM_FP32:
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2851	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2852	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
				2853	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
				2854	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
				2855	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
				2856	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
				2857	case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
				2858	const MachineOperand &MO = MI.getOperand(i);
				2859	if (!MO.isReg() && (!MO.isImm() \|\| !isInlineConstant(MI, i))) {
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	2860	ErrInfo = "Illegal immediate value for operand.";
				2861	return false;
Tom Stellard	a305f93	2014-07-02 20:53:44 +0000	[diff] [blame]	2862	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2863	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2864	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2865	case MCOI::OPERAND_IMMEDIATE:
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	2866	case AMDGPU::OPERAND_KIMM32:
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2867	// Check if this operand is an immediate.
				2868	// FrameIndex operands will be replaced by immediates, so they are
				2869	// allowed.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2870	if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2871	ErrInfo = "Expected immediate, but got non-immediate";
				2872	return false;
				2873	}
Justin Bogner	b03fd12	2016-08-17 05:10:15 +0000	[diff] [blame]	2874	LLVM_FALLTHROUGH;
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2875	default:
				2876	continue;
				2877	}
				2878
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2879	if (!MI.getOperand(i).isReg())
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2880	continue;
				2881
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2882	if (RegClass != -1) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2883	unsigned Reg = MI.getOperand(i).getReg();
Matt Arsenault	1322b6f	2016-07-09 01:13:56 +0000	[diff] [blame]	2884	if (Reg == AMDGPU::NoRegister \|\|
				2885	TargetRegisterInfo::isVirtualRegister(Reg))
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2886	continue;
				2887
				2888	const TargetRegisterClass *RC = RI.getRegClass(RegClass);
				2889	if (!RC->contains(Reg)) {
				2890	ErrInfo = "Operand has incorrect register class.";
				2891	return false;
				2892	}
				2893	}
				2894	}
				2895
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2896	// Verify SDWA
				2897	if (isSDWA(MI)) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2898	if (!ST.hasSDWA()) {
				2899	ErrInfo = "SDWA is not supported on this target";
				2900	return false;
				2901	}
				2902
				2903	int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2904
				2905	const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
				2906
				2907	for (int OpIdx: OpIndicies) {
				2908	if (OpIdx == -1)
				2909	continue;
				2910	const MachineOperand &MO = MI.getOperand(OpIdx);
				2911
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	2912	if (!ST.hasSDWAScalar()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2913	// Only VGPRS on VI
				2914	if (!MO.isReg() \|\| !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
				2915	ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
				2916	return false;
				2917	}
				2918	} else {
				2919	// No immediates on GFX9
				2920	if (!MO.isReg()) {
				2921	ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
				2922	return false;
				2923	}
				2924	}
				2925	}
				2926
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	2927	if (!ST.hasSDWAOmod()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2928	// No omod allowed on VI
				2929	const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
				2930	if (OMod != nullptr &&
				2931	(!OMod->isImm() \|\| OMod->getImm() != 0)) {
				2932	ErrInfo = "OMod not allowed in SDWA instructions on VI";
				2933	return false;
				2934	}
				2935	}
				2936
				2937	uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
				2938	if (isVOPC(BasicOpcode)) {
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	2939	if (!ST.hasSDWASdst() && DstIdx != -1) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2940	// Only vcc allowed as dst on VI for VOPC
				2941	const MachineOperand &Dst = MI.getOperand(DstIdx);
				2942	if (!Dst.isReg() \|\| Dst.getReg() != AMDGPU::VCC) {
				2943	ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
				2944	return false;
				2945	}
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	2946	} else if (!ST.hasSDWAOutModsVOPC()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2947	// No clamp allowed on GFX9 for VOPC
				2948	const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	2949	if (Clamp && (!Clamp->isImm() \|\| Clamp->getImm() != 0)) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2950	ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
				2951	return false;
				2952	}
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	2953
				2954	// No omod allowed on GFX9 for VOPC
				2955	const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
				2956	if (OMod && (!OMod->isImm() \|\| OMod->getImm() != 0)) {
				2957	ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
				2958	return false;
				2959	}
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2960	}
				2961	}
Sam Kolton	5f7f32c	2017-12-04 16:22:32 +0000	[diff] [blame]	2962
				2963	const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused);
				2964	if (DstUnused && DstUnused->isImm() &&
				2965	DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) {
				2966	const MachineOperand &Dst = MI.getOperand(DstIdx);
				2967	if (!Dst.isReg() \|\| !Dst.isTied()) {
				2968	ErrInfo = "Dst register should have tied register";
				2969	return false;
				2970	}
				2971
				2972	const MachineOperand &TiedMO =
				2973	MI.getOperand(MI.findTiedOperandIdx(DstIdx));
				2974	if (!TiedMO.isReg() \|\| !TiedMO.isImplicit() \|\| !TiedMO.isUse()) {
				2975	ErrInfo =
				2976	"Dst register should be tied to implicit use of preserved register";
				2977	return false;
				2978	} else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
				2979	Dst.getReg() != TiedMO.getReg()) {
				2980	ErrInfo = "Dst register should use same physical register as preserved";
				2981	return false;
				2982	}
				2983	}
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2984	}
				2985
David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	2986	// Verify MIMG
				2987	if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
				2988	// Ensure that the return type used is large enough for all the options
				2989	// being used TFE/LWE require an extra result register.
				2990	const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
				2991	if (DMask) {
				2992	uint64_t DMaskImm = DMask->getImm();
				2993	uint32_t RegCount =
				2994	isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm);
				2995	const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe);
				2996	const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe);
				2997	const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16);
				2998
				2999	// Adjust for packed 16 bit values
				3000	if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
				3001	RegCount >>= 1;
				3002
				3003	// Adjust if using LWE or TFE
				3004	if ((LWE && LWE->getImm()) \|\| (TFE && TFE->getImm()))
				3005	RegCount += 1;
				3006
				3007	const uint32_t DstIdx =
				3008	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
				3009	const MachineOperand &Dst = MI.getOperand(DstIdx);
				3010	if (Dst.isReg()) {
				3011	const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
				3012	uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
				3013	if (RegCount > DstSize) {
				3014	ErrInfo = "MIMG instruction returns too many registers for dst "
				3015	"register class";
				3016	return false;
				3017	}
				3018	}
				3019	}
				3020	}
				3021
Tim Renouf	2a99fa2	2018-02-28 19:10:32 +0000	[diff] [blame]	3022	// Verify VOP*. Ignore multiple sgpr operands on writelane.
				3023	if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
				3024	&& (isVOP1(MI) \|\| isVOP2(MI) \|\| isVOP3(MI) \|\| isVOPC(MI) \|\| isSDWA(MI))) {
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	3025	// Only look at the true operands. Only a real operand can use the constant
				3026	// bus, and we don't want to check pseudo-operands like the source modifier
				3027	// flags.
				3028	const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
				3029
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3030	unsigned ConstantBusCount = 0;
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3031	unsigned LiteralCount = 0;
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3032
				3033	if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
				3034	++ConstantBusCount;
				3035
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3036	unsigned SGPRUsed = findImplicitSGPRRead(MI);
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	3037	if (SGPRUsed != AMDGPU::NoRegister)
				3038	++ConstantBusCount;
				3039
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	3040	for (int OpIdx : OpIndices) {
				3041	if (OpIdx == -1)
				3042	break;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3043	const MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3044	if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3045	if (MO.isReg()) {
				3046	if (MO.getReg() != SGPRUsed)
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3047	++ConstantBusCount;
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3048	SGPRUsed = MO.getReg();
				3049	} else {
				3050	++ConstantBusCount;
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3051	++LiteralCount;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3052	}
				3053	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3054	}
				3055	if (ConstantBusCount > 1) {
				3056	ErrInfo = "VOP* instruction uses the constant bus more than once";
				3057	return false;
				3058	}
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3059
				3060	if (isVOP3(MI) && LiteralCount) {
				3061	ErrInfo = "VOP3 instruction uses literal";
				3062	return false;
				3063	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3064	}
				3065
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	3066	// Verify misc. restrictions on specific instructions.
				3067	if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 \|\|
				3068	Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3069	const MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3070	const MachineOperand &Src1 = MI.getOperand(Src1Idx);
				3071	const MachineOperand &Src2 = MI.getOperand(Src2Idx);
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	3072	if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
				3073	if (!compareMachineOp(Src0, Src1) &&
				3074	!compareMachineOp(Src0, Src2)) {
				3075	ErrInfo = "v_div_scale_{f32\|f64} require src0 = src1 or src2";
				3076	return false;
				3077	}
				3078	}
				3079	}
				3080
Matt Arsenault	7ccf6cd	2016-09-16 21:41:16 +0000	[diff] [blame]	3081	if (isSOPK(MI)) {
				3082	int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
				3083	if (sopkIsZext(MI)) {
				3084	if (!isUInt<16>(Imm)) {
				3085	ErrInfo = "invalid immediate for SOPK instruction";
				3086	return false;
				3087	}
				3088	} else {
				3089	if (!isInt<16>(Imm)) {
				3090	ErrInfo = "invalid immediate for SOPK instruction";
				3091	return false;
				3092	}
				3093	}
				3094	}
				3095
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	3096	if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 \|\|
				3097	Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 \|\|
				3098	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 \|\|
				3099	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
				3100	const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 \|\|
				3101	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
				3102
				3103	const unsigned StaticNumOps = Desc.getNumOperands() +
				3104	Desc.getNumImplicitUses();
				3105	const unsigned NumImplicitOps = IsDst ? 2 : 1;
				3106
Nicolai Haehnle	368972c	2016-11-02 17:03:11 +0000	[diff] [blame]	3107	// Allow additional implicit operands. This allows a fixup done by the post
				3108	// RA scheduler where the main implicit operand is killed and implicit-defs
				3109	// are added for sub-registers that remain live after this instruction.
				3110	if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	3111	ErrInfo = "missing implicit register operands";
				3112	return false;
				3113	}
				3114
				3115	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
				3116	if (IsDst) {
				3117	if (!Dst->isUse()) {
				3118	ErrInfo = "v_movreld_b32 vdst should be a use operand";
				3119	return false;
				3120	}
				3121
				3122	unsigned UseOpIdx;
				3123	if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) \|\|
				3124	UseOpIdx != StaticNumOps + 1) {
				3125	ErrInfo = "movrel implicit operands should be tied";
				3126	return false;
				3127	}
				3128	}
				3129
				3130	const MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3131	const MachineOperand &ImpUse
				3132	= MI.getOperand(StaticNumOps + NumImplicitOps - 1);
				3133	if (!ImpUse.isReg() \|\| !ImpUse.isUse() \|\|
				3134	!isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
				3135	ErrInfo = "src0 should be subreg of implicit vector use";
				3136	return false;
				3137	}
				3138	}
				3139
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	3140	// Make sure we aren't losing exec uses in the td files. This mostly requires
				3141	// being careful when using let Uses to try to add other use registers.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3142	if (shouldReadExec(MI)) {
				3143	if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	3144	ErrInfo = "VALU instruction does not implicitly read exec mask";
				3145	return false;
				3146	}
				3147	}
				3148
Matt Arsenault	7b64755	2016-10-28 21:55:15 +0000	[diff] [blame]	3149	if (isSMRD(MI)) {
				3150	if (MI.mayStore()) {
				3151	// The register offset form of scalar stores may only use m0 as the
				3152	// soffset register.
				3153	const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
				3154	if (Soff && Soff->getReg() != AMDGPU::M0) {
				3155	ErrInfo = "scalar stores must use m0 as offset register";
				3156	return false;
				3157	}
				3158	}
				3159	}
				3160
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	3161	if (isFLAT(MI) && !MF->getSubtarget<GCNSubtarget>().hasFlatInstOffsets()) {
Matt Arsenault	89ad17c	2017-06-12 16:37:55 +0000	[diff] [blame]	3162	const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				3163	if (Offset->getImm() != 0) {
				3164	ErrInfo = "subtarget does not support offsets in flat instructions";
				3165	return false;
				3166	}
				3167	}
				3168
Stanislav Mekhanoshin	4329361	2018-05-08 16:53:02 +0000	[diff] [blame]	3169	const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl);
				3170	if (DppCt) {
				3171	using namespace AMDGPU::DPP;
				3172
				3173	unsigned DC = DppCt->getImm();
				3174	if (DC == DppCtrl::DPP_UNUSED1 \|\| DC == DppCtrl::DPP_UNUSED2 \|\|
				3175	DC == DppCtrl::DPP_UNUSED3 \|\| DC > DppCtrl::DPP_LAST \|\|
				3176	(DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) \|\|
				3177	(DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) \|\|
				3178	(DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) \|\|
				3179	(DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST)) {
				3180	ErrInfo = "Invalid dpp_ctrl value";
				3181	return false;
				3182	}
				3183	}
				3184
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3185	return true;
				3186	}
				3187
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3188	unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3189	switch (MI.getOpcode()) {
				3190	default: return AMDGPU::INSTRUCTION_LIST_END;
				3191	case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
				3192	case AMDGPU::COPY: return AMDGPU::COPY;
				3193	case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard	204e61b	2014-04-07 19:45:45 +0000	[diff] [blame]	3194	case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Connor Abbott	8c217d0	2017-08-04 18:36:49 +0000	[diff] [blame]	3195	case AMDGPU::WQM: return AMDGPU::WQM;
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	3196	case AMDGPU::WWM: return AMDGPU::WWM;
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	3197	case AMDGPU::S_MOV_B32:
				3198	return MI.getOperand(1).isReg() ?
Tom Stellard	8c12fd9	2014-03-24 16:12:34 +0000	[diff] [blame]	3199	AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	3200	case AMDGPU::S_ADD_I32:
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3201	return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
				3202	case AMDGPU::S_ADDC_U32:
				3203	return AMDGPU::V_ADDC_U32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	3204	case AMDGPU::S_SUB_I32:
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3205	return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
				3206	// FIXME: These are not consistently handled, and selected when the carry is
				3207	// used.
				3208	case AMDGPU::S_ADD_U32:
				3209	return AMDGPU::V_ADD_I32_e32;
				3210	case AMDGPU::S_SUB_U32:
				3211	return AMDGPU::V_SUB_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	3212	case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault	869cd07	2014-09-03 23:24:35 +0000	[diff] [blame]	3213	case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Matt Arsenault	124384f	2016-09-09 23:32:53 +0000	[diff] [blame]	3214	case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
				3215	case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
				3216	case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	3217	case AMDGPU::S_XNOR_B32:
				3218	return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
Matt Arsenault	124384f	2016-09-09 23:32:53 +0000	[diff] [blame]	3219	case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
				3220	case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
				3221	case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
				3222	case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3223	case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
				3224	case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
				3225	case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
				3226	case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
				3227	case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
				3228	case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	3229	case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
				3230	case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	3231	case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
				3232	case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak	63a7b08	2015-03-24 13:40:21 +0000	[diff] [blame]	3233	case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault	43160e7	2014-06-18 17:13:57 +0000	[diff] [blame]	3234	case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault	2c33562	2014-04-09 07:16:16 +0000	[diff] [blame]	3235	case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	3236	case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	0cb92e1	2014-04-11 19:25:18 +0000	[diff] [blame]	3237	case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
				3238	case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
				3239	case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
				3240	case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
				3241	case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
				3242	case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	3243	case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
				3244	case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
				3245	case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
				3246	case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
				3247	case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
				3248	case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Matt Arsenault	7b1dc2c	2016-09-17 02:02:19 +0000	[diff] [blame]	3249	case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
				3250	case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	3251	case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault	295b86e	2014-06-17 17:36:27 +0000	[diff] [blame]	3252	case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault	8579601	2014-06-17 17:36:24 +0000	[diff] [blame]	3253	case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsak	d2af89d	2015-03-04 17:33:45 +0000	[diff] [blame]	3254	case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	3255	case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
				3256	case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3257	}
				3258	}
				3259
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3260	const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
				3261	unsigned OpNo) const {
				3262	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				3263	const MCInstrDesc &Desc = get(MI.getOpcode());
				3264	if (MI.isVariadic() \|\| OpNo >= Desc.getNumOperands() \|\|
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	3265	Desc.OpInfo[OpNo].RegClass == -1) {
				3266	unsigned Reg = MI.getOperand(OpNo).getReg();
				3267
				3268	if (TargetRegisterInfo::isVirtualRegister(Reg))
				3269	return MRI.getRegClass(Reg);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	3270	return RI.getPhysRegClass(Reg);
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	3271	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3272
				3273	unsigned RCID = Desc.OpInfo[OpNo].RegClass;
				3274	return RI.getRegClass(RCID);
				3275	}
				3276
				3277	bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
				3278	switch (MI.getOpcode()) {
				3279	case AMDGPU::COPY:
				3280	case AMDGPU::REG_SEQUENCE:
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	3281	case AMDGPU::PHI:
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	3282	case AMDGPU::INSERT_SUBREG:
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3283	return RI.hasVGPRs(getOpRegClass(MI, 0));
				3284	default:
				3285	return RI.hasVGPRs(getOpRegClass(MI, OpNo));
				3286	}
				3287	}
				3288
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3289	void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3290	MachineBasicBlock::iterator I = MI;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3291	MachineBasicBlock *MBB = MI.getParent();
				3292	MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3293	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3294	unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3295	const TargetRegisterClass *RC = RI.getRegClass(RCID);
				3296	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3297	if (MO.isReg())
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3298	Opcode = AMDGPU::COPY;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3299	else if (RI.isSGPRClass(RC))
Matt Arsenault	671a005	2013-11-14 10:08:50 +0000	[diff] [blame]	3300	Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3301
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	3302	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3303	if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard	0c93c9e	2014-09-05 14:08:01 +0000	[diff] [blame]	3304	VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3305	else
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	3306	VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3307
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	3308	unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3309	DebugLoc DL = MBB->findDebugLoc(I);
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	3310	BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3311	MO.ChangeToRegister(Reg, false);
				3312	}
				3313
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3314	unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
				3315	MachineRegisterInfo &MRI,
				3316	MachineOperand &SuperReg,
				3317	const TargetRegisterClass *SuperRC,
				3318	unsigned SubIdx,
				3319	const TargetRegisterClass *SubRC)
				3320	const {
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3321	MachineBasicBlock *MBB = MI->getParent();
				3322	DebugLoc DL = MI->getDebugLoc();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3323	unsigned SubReg = MRI.createVirtualRegister(SubRC);
				3324
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3325	if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
				3326	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				3327	.addReg(SuperReg.getReg(), 0, SubIdx);
				3328	return SubReg;
				3329	}
				3330
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3331	// Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault	08d8494	2014-06-03 23:06:13 +0000	[diff] [blame]	3332	// value so we don't need to worry about merging its subreg index with the
				3333	// SubIdx passed to this function. The register coalescer should be able to
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3334	// eliminate this extra copy.
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3335	unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3336
Matt Arsenault	7480a0e	2014-11-17 21:11:37 +0000	[diff] [blame]	3337	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
				3338	.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
				3339
				3340	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				3341	.addReg(NewSuperReg, 0, SubIdx);
				3342
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3343	return SubReg;
				3344	}
				3345
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3346	MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
				3347	MachineBasicBlock::iterator MII,
				3348	MachineRegisterInfo &MRI,
				3349	MachineOperand &Op,
				3350	const TargetRegisterClass *SuperRC,
				3351	unsigned SubIdx,
				3352	const TargetRegisterClass *SubRC) const {
				3353	if (Op.isImm()) {
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3354	if (SubIdx == AMDGPU::sub0)
Matt Arsenault	d745c28	2016-09-08 17:44:36 +0000	[diff] [blame]	3355	return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3356	if (SubIdx == AMDGPU::sub1)
Matt Arsenault	d745c28	2016-09-08 17:44:36 +0000	[diff] [blame]	3357	return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3358
				3359	llvm_unreachable("Unhandled register index for immediate");
				3360	}
				3361
				3362	unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
				3363	SubIdx, SubRC);
				3364	return MachineOperand::CreateReg(SubReg, false);
				3365	}
				3366
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	3367	// Change the order of operands from (0, 1, 2) to (0, 2, 1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3368	void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
				3369	assert(Inst.getNumExplicitOperands() == 3);
				3370	MachineOperand Op1 = Inst.getOperand(1);
				3371	Inst.RemoveOperand(1);
				3372	Inst.addOperand(Op1);
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	3373	}
				3374
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3375	bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
				3376	const MCOperandInfo &OpInfo,
				3377	const MachineOperand &MO) const {
				3378	if (!MO.isReg())
				3379	return false;
				3380
				3381	unsigned Reg = MO.getReg();
				3382	const TargetRegisterClass *RC =
				3383	TargetRegisterInfo::isVirtualRegister(Reg) ?
				3384	MRI.getRegClass(Reg) :
				3385	RI.getPhysRegClass(Reg);
				3386
Nicolai Haehnle	82fc962	2016-01-07 17:10:29 +0000	[diff] [blame]	3387	const SIRegisterInfo *TRI =
				3388	static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
				3389	RC = TRI->getSubRegClass(RC, MO.getSubReg());
				3390
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3391	// In order to be legal, the common sub-class must be equal to the
				3392	// class of the current operand. For example:
				3393	//
Sam Kolton	1eeb11b	2016-09-09 14:44:04 +0000	[diff] [blame]	3394	// v_mov_b32 s0 ; Operand defined as vsrc_b32
				3395	// ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3396	//
				3397	// s_sendmsg 0, s0 ; Operand defined as m0reg
				3398	// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
				3399
				3400	return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
				3401	}
				3402
				3403	bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
				3404	const MCOperandInfo &OpInfo,
				3405	const MachineOperand &MO) const {
				3406	if (MO.isReg())
				3407	return isLegalRegOperand(MRI, OpInfo, MO);
				3408
				3409	// Handle non-register types that are treated like immediates.
				3410	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
				3411	return true;
				3412	}
				3413
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3414	bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3415	const MachineOperand *MO) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3416	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				3417	const MCInstrDesc &InstDesc = MI.getDesc();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3418	const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
				3419	const TargetRegisterClass *DefinedRC =
				3420	OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
				3421	if (!MO)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3422	MO = &MI.getOperand(OpIdx);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3423
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3424	if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	3425
				3426	RegSubRegPair SGPRUsed;
				3427	if (MO->isReg())
				3428	SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
				3429
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3430	for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3431	if (i == OpIdx)
				3432	continue;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3433	const MachineOperand &Op = MI.getOperand(i);
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3434	if (Op.isReg()) {
				3435	if ((Op.getReg() != SGPRUsed.Reg \|\| Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3436	usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3437	return false;
				3438	}
				3439	} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3440	return false;
				3441	}
				3442	}
				3443	}
				3444
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3445	if (MO->isReg()) {
				3446	assert(DefinedRC);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3447	return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3448	}
				3449
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3450	// Handle non-register types that are treated like immediates.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	3451	assert(MO->isImm() \|\| MO->isTargetIndex() \|\| MO->isFI());
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3452
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	3453	if (!DefinedRC) {
				3454	// This operand expects an immediate.
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3455	return true;
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	3456	}
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3457
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3458	return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3459	}
				3460
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3461	void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3462	MachineInstr &MI) const {
				3463	unsigned Opc = MI.getOpcode();
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3464	const MCInstrDesc &InstrDesc = get(Opc);
				3465
				3466	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3467	MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3468
				3469	// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
				3470	// we need to only have one constant bus use.
				3471	//
				3472	// Note we do not need to worry about literal constants here. They are
				3473	// disabled for the operand type for instructions because they will always
				3474	// violate the one constant bus use rule.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3475	bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3476	if (HasImplicitSGPR) {
				3477	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3478	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3479
				3480	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
				3481	legalizeOpWithMove(MI, Src0Idx);
				3482	}
				3483
Tim Renouf	2a99fa2	2018-02-28 19:10:32 +0000	[diff] [blame]	3484	// Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for
				3485	// both the value to write (src0) and lane select (src1). Fix up non-SGPR
				3486	// src0/src1 with V_READFIRSTLANE.
				3487	if (Opc == AMDGPU::V_WRITELANE_B32) {
				3488	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				3489	MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3490	const DebugLoc &DL = MI.getDebugLoc();
				3491	if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
				3492	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3493	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3494	.add(Src0);
				3495	Src0.ChangeToRegister(Reg, false);
				3496	}
				3497	if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) {
				3498	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3499	const DebugLoc &DL = MI.getDebugLoc();
				3500	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3501	.add(Src1);
				3502	Src1.ChangeToRegister(Reg, false);
				3503	}
				3504	return;
				3505	}
				3506
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3507	// VOP2 src0 instructions support all operand types, so we don't need to check
				3508	// their legality. If src1 is already legal, we don't need to do anything.
				3509	if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
				3510	return;
				3511
Nicolai Haehnle	5dea645	2017-04-24 17:17:36 +0000	[diff] [blame]	3512	// Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
				3513	// lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
				3514	// select is uniform.
				3515	if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
				3516	RI.isVGPR(MRI, Src1.getReg())) {
				3517	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3518	const DebugLoc &DL = MI.getDebugLoc();
				3519	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3520	.add(Src1);
				3521	Src1.ChangeToRegister(Reg, false);
				3522	return;
				3523	}
				3524
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3525	// We do not use commuteInstruction here because it is too aggressive and will
				3526	// commute if it is possible. We only want to commute here if it improves
				3527	// legality. This can be called a fairly large number of times so don't waste
				3528	// compile time pointlessly swapping and checking legality again.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3529	if (HasImplicitSGPR \|\| !MI.isCommutable()) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3530	legalizeOpWithMove(MI, Src1Idx);
				3531	return;
				3532	}
				3533
				3534	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3535	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3536
				3537	// If src0 can be used as src1, commuting will make the operands legal.
				3538	// Otherwise we have to give up and insert a move.
				3539	//
				3540	// TODO: Other immediate-like operand kinds could be commuted if there was a
				3541	// MachineOperand::ChangeTo* for them.
				3542	if ((!Src1.isImm() && !Src1.isReg()) \|\|
				3543	!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
				3544	legalizeOpWithMove(MI, Src1Idx);
				3545	return;
				3546	}
				3547
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3548	int CommutedOpc = commuteOpcode(MI);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3549	if (CommutedOpc == -1) {
				3550	legalizeOpWithMove(MI, Src1Idx);
				3551	return;
				3552	}
				3553
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3554	MI.setDesc(get(CommutedOpc));
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3555
				3556	unsigned Src0Reg = Src0.getReg();
				3557	unsigned Src0SubReg = Src0.getSubReg();
				3558	bool Src0Kill = Src0.isKill();
				3559
				3560	if (Src1.isImm())
				3561	Src0.ChangeToImmediate(Src1.getImm());
				3562	else if (Src1.isReg()) {
				3563	Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
				3564	Src0.setSubReg(Src1.getSubReg());
				3565	} else
				3566	llvm_unreachable("Should only have register or immediate operands");
				3567
				3568	Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
				3569	Src1.setSubReg(Src0SubReg);
				3570	}
				3571
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3572	// Legalize VOP3 operands. Because all operand types are supported for any
				3573	// operand, and since literal constants are not allowed and should never be
				3574	// seen, we only need to worry about inserting copies if we use multiple SGPR
				3575	// operands.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3576	void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
				3577	MachineInstr &MI) const {
				3578	unsigned Opc = MI.getOpcode();
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3579
				3580	int VOP3Idx[3] = {
				3581	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
				3582	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
				3583	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
				3584	};
				3585
				3586	// Find the one SGPR operand we are allowed to use.
				3587	unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
				3588
				3589	for (unsigned i = 0; i < 3; ++i) {
				3590	int Idx = VOP3Idx[i];
				3591	if (Idx == -1)
				3592	break;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3593	MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3594
				3595	// We should never see a VOP3 instruction with an illegal immediate operand.
				3596	if (!MO.isReg())
				3597	continue;
				3598
				3599	if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
				3600	continue; // VGPRs are legal
				3601
				3602	if (SGPRReg == AMDGPU::NoRegister \|\| SGPRReg == MO.getReg()) {
				3603	SGPRReg = MO.getReg();
				3604	// We can use one SGPR in each VOP3 instruction.
				3605	continue;
				3606	}
				3607
				3608	// If we make it this far, then the operand is not legal and we must
				3609	// legalize it.
				3610	legalizeOpWithMove(MI, Idx);
				3611	}
				3612	}
				3613
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3614	unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
				3615	MachineRegisterInfo &MRI) const {
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3616	const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
				3617	const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
				3618	unsigned DstReg = MRI.createVirtualRegister(SRC);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	3619	unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3620
Nicolai Haehnle	7a87977	2018-04-20 07:14:25 +0000	[diff] [blame]	3621	if (SubRegs == 1) {
				3622	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
				3623	get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
				3624	.addReg(SrcReg);
				3625	return DstReg;
				3626	}
				3627
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3628	SmallVector<unsigned, 8> SRegs;
				3629	for (unsigned i = 0; i < SubRegs; ++i) {
				3630	unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3631	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3632	get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3633	.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3634	SRegs.push_back(SGPR);
				3635	}
				3636
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3637	MachineInstrBuilder MIB =
				3638	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
				3639	get(AMDGPU::REG_SEQUENCE), DstReg);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3640	for (unsigned i = 0; i < SubRegs; ++i) {
				3641	MIB.addReg(SRegs[i]);
				3642	MIB.addImm(RI.getSubRegFromChannel(i));
				3643	}
				3644	return DstReg;
				3645	}
				3646
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3647	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3648	MachineInstr &MI) const {
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3649
				3650	// If the pointer is store in VGPRs, then we need to move them to
				3651	// SGPRs using v_readfirstlane. This is safe because we only select
				3652	// loads with uniform pointers to SMRD instruction so we know the
				3653	// pointer value is uniform.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3654	MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3655	if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
Nicolai Haehnle	a7b0005	2018-11-30 22:55:38 +0000	[diff] [blame]	3656	unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
				3657	SBase->setReg(SGPR);
				3658	}
				3659	MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
				3660	if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
				3661	unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
				3662	SOff->setReg(SGPR);
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3663	}
				3664	}
				3665
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3666	void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
				3667	MachineBasicBlock::iterator I,
				3668	const TargetRegisterClass *DstRC,
				3669	MachineOperand &Op,
				3670	MachineRegisterInfo &MRI,
				3671	const DebugLoc &DL) const {
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3672	unsigned OpReg = Op.getReg();
				3673	unsigned OpSubReg = Op.getSubReg();
				3674
				3675	const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
				3676	RI.getRegClassForReg(MRI, OpReg), OpSubReg);
				3677
				3678	// Check if operand is already the correct register class.
				3679	if (DstRC == OpRC)
				3680	return;
				3681
				3682	unsigned DstReg = MRI.createVirtualRegister(DstRC);
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	3683	MachineInstr *Copy =
				3684	BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3685
				3686	Op.setReg(DstReg);
				3687	Op.setSubReg(0);
				3688
				3689	MachineInstr *Def = MRI.getVRegDef(OpReg);
				3690	if (!Def)
				3691	return;
				3692
				3693	// Try to eliminate the copy if it is copying an immediate value.
				3694	if (Def->isMoveImmediate())
				3695	FoldImmediate(Copy, Def, OpReg, &MRI);
				3696	}
				3697
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	3698	// Emit the actual waterfall loop, executing the wrapped instruction for each
				3699	// unique value of \p Rsrc across all lanes. In the best case we execute 1
				3700	// iteration, in the worst case we execute 64 (once per lane).
				3701	static void
				3702	emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
				3703	MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
				3704	const DebugLoc &DL, MachineOperand &Rsrc) {
				3705	MachineBasicBlock::iterator I = LoopBB.begin();
				3706
				3707	unsigned VRsrc = Rsrc.getReg();
				3708	unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
				3709
				3710	unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3711	unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3712	unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3713	unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3714	unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3715	unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3716	unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3717	unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3718	unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
				3719
				3720	// Beginning of the loop, read the next Rsrc variant.
				3721	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
				3722	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
				3723	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
				3724	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
				3725	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
				3726	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
				3727	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
				3728	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
				3729
				3730	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
				3731	.addReg(SRsrcSub0)
				3732	.addImm(AMDGPU::sub0)
				3733	.addReg(SRsrcSub1)
				3734	.addImm(AMDGPU::sub1)
				3735	.addReg(SRsrcSub2)
				3736	.addImm(AMDGPU::sub2)
				3737	.addReg(SRsrcSub3)
				3738	.addImm(AMDGPU::sub3);
				3739
				3740	// Update Rsrc operand to use the SGPR Rsrc.
				3741	Rsrc.setReg(SRsrc);
				3742	Rsrc.setIsKill(true);
				3743
				3744	// Identify all lanes with identical Rsrc operands in their VGPRs.
				3745	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
				3746	.addReg(SRsrc, 0, AMDGPU::sub0_sub1)
				3747	.addReg(VRsrc, 0, AMDGPU::sub0_sub1);
				3748	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
				3749	.addReg(SRsrc, 0, AMDGPU::sub2_sub3)
				3750	.addReg(VRsrc, 0, AMDGPU::sub2_sub3);
				3751	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
				3752	.addReg(CondReg0)
				3753	.addReg(CondReg1);
				3754
				3755	MRI.setSimpleHint(SaveExec, AndCond);
				3756
				3757	// Update EXEC to matching lanes, saving original to SaveExec.
				3758	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
				3759	.addReg(AndCond, RegState::Kill);
				3760
				3761	// The original instruction is here; we insert the terminators after it.
				3762	I = LoopBB.end();
				3763
				3764	// Update EXEC, switch all done bits to 0 and all todo bits to 1.
				3765	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
				3766	.addReg(AMDGPU::EXEC)
				3767	.addReg(SaveExec);
				3768	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
				3769	}
				3770
				3771	// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
				3772	// with SGPRs by iterating over all unique values across all lanes.
				3773	static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
				3774	MachineOperand &Rsrc, MachineDominatorTree *MDT) {
				3775	MachineBasicBlock &MBB = *MI.getParent();
				3776	MachineFunction &MF = *MBB.getParent();
				3777	MachineRegisterInfo &MRI = MF.getRegInfo();
				3778	MachineBasicBlock::iterator I(&MI);
				3779	const DebugLoc &DL = MI.getDebugLoc();
				3780
				3781	unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				3782
				3783	// Save the EXEC mask
				3784	BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
				3785	.addReg(AMDGPU::EXEC);
				3786
				3787	// Killed uses in the instruction we are waterfalling around will be
				3788	// incorrect due to the added control-flow.
				3789	for (auto &MO : MI.uses()) {
				3790	if (MO.isReg() && MO.isUse()) {
				3791	MRI.clearKillFlags(MO.getReg());
				3792	}
				3793	}
				3794
				3795	// To insert the loop we need to split the block. Move everything after this
				3796	// point to a new block, and insert a new empty block between the two.
				3797	MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
				3798	MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
				3799	MachineFunction::iterator MBBI(MBB);
				3800	++MBBI;
				3801
				3802	MF.insert(MBBI, LoopBB);
				3803	MF.insert(MBBI, RemainderBB);
				3804
				3805	LoopBB->addSuccessor(LoopBB);
				3806	LoopBB->addSuccessor(RemainderBB);
				3807
				3808	// Move MI to the LoopBB, and the remainder of the block to RemainderBB.
				3809	MachineBasicBlock::iterator J = I++;
				3810	RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
				3811	RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
				3812	LoopBB->splice(LoopBB->begin(), &MBB, J);
				3813
				3814	MBB.addSuccessor(LoopBB);
				3815
				3816	// Update dominators. We know that MBB immediately dominates LoopBB, that
				3817	// LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
				3818	// dominates all of the successors transferred to it from MBB that MBB used
				3819	// to dominate.
				3820	if (MDT) {
				3821	MDT->addNewBlock(LoopBB, &MBB);
				3822	MDT->addNewBlock(RemainderBB, LoopBB);
				3823	for (auto &Succ : RemainderBB->successors()) {
				3824	if (MDT->dominates(&MBB, Succ)) {
				3825	MDT->changeImmediateDominator(Succ, RemainderBB);
				3826	}
				3827	}
				3828	}
				3829
				3830	emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
				3831
				3832	// Restore the EXEC mask
				3833	MachineBasicBlock::iterator First = RemainderBB->begin();
				3834	BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
				3835	.addReg(SaveExec);
				3836	}
				3837
				3838	// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
				3839	static std::tuple<unsigned, unsigned>
				3840	extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
				3841	MachineBasicBlock &MBB = *MI.getParent();
				3842	MachineFunction &MF = *MBB.getParent();
				3843	MachineRegisterInfo &MRI = MF.getRegInfo();
				3844
				3845	// Extract the ptr from the resource descriptor.
				3846	unsigned RsrcPtr =
				3847	TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
				3848	AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
				3849
				3850	// Create an empty resource descriptor
				3851	unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3852	unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3853	unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3854	unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
				3855	uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
				3856
				3857	// Zero64 = 0
				3858	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64)
				3859	.addImm(0);
				3860
				3861	// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
				3862	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
				3863	.addImm(RsrcDataFormat & 0xFFFFFFFF);
				3864
				3865	// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
				3866	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
				3867	.addImm(RsrcDataFormat >> 32);
				3868
				3869	// NewSRsrc = {Zero64, SRsrcFormat}
				3870	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc)
				3871	.addReg(Zero64)
				3872	.addImm(AMDGPU::sub0_sub1)
				3873	.addReg(SRsrcFormatLo)
				3874	.addImm(AMDGPU::sub2)
				3875	.addReg(SRsrcFormatHi)
				3876	.addImm(AMDGPU::sub3);
				3877
				3878	return std::make_tuple(RsrcPtr, NewSRsrc);
				3879	}
				3880
				3881	void SIInstrInfo::legalizeOperands(MachineInstr &MI,
				3882	MachineDominatorTree *MDT) const {
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	3883	MachineFunction &MF = *MI.getParent()->getParent();
				3884	MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3885
				3886	// Legalize VOP2
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3887	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3888	legalizeOperandsVOP2(MRI, MI);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3889	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3890	}
				3891
				3892	// Legalize VOP3
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3893	if (isVOP3(MI)) {
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3894	legalizeOperandsVOP3(MRI, MI);
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	3895	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3896	}
				3897
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3898	// Legalize SMRD
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3899	if (isSMRD(MI)) {
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3900	legalizeOperandsSMRD(MRI, MI);
				3901	return;
				3902	}
				3903
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	3904	// Legalize REG_SEQUENCE and PHI
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3905	// The register class of the operands much be the same type as the register
				3906	// class of the output.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3907	if (MI.getOpcode() == AMDGPU::PHI) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	3908	const TargetRegisterClass RC = nullptr, SRC = nullptr, *VRC = nullptr;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3909	for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
				3910	if (!MI.getOperand(i).isReg() \|\|
				3911	!TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3912	continue;
				3913	const TargetRegisterClass *OpRC =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3914	MRI.getRegClass(MI.getOperand(i).getReg());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3915	if (RI.hasVGPRs(OpRC)) {
				3916	VRC = OpRC;
				3917	} else {
				3918	SRC = OpRC;
				3919	}
				3920	}
				3921
				3922	// If any of the operands are VGPR registers, then they all most be
				3923	// otherwise we will create illegal VGPR->SGPR copies when legalizing
				3924	// them.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3925	if (VRC \|\| !RI.isSGPRClass(getOpRegClass(MI, 0))) {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3926	if (!VRC) {
				3927	assert(SRC);
				3928	VRC = RI.getEquivalentVGPRClass(SRC);
				3929	}
				3930	RC = VRC;
				3931	} else {
				3932	RC = SRC;
				3933	}
				3934
				3935	// Update all the operands so they have the same type.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3936	for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
				3937	MachineOperand &Op = MI.getOperand(I);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3938	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3939	continue;
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3940
				3941	// MI is a PHI instruction.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3942	MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3943	MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
				3944
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3945	// Avoid creating no-op copies with the same src and dst reg class. These
				3946	// confuse some of the machine passes.
				3947	legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3948	}
				3949	}
				3950
				3951	// REG_SEQUENCE doesn't really require operand legalization, but if one has a
				3952	// VGPR dest type and SGPR sources, insert copies so all operands are
				3953	// VGPRs. This seems to help operand folding / the register coalescer.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3954	if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
				3955	MachineBasicBlock *MBB = MI.getParent();
				3956	const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3957	if (RI.hasVGPRs(DstRC)) {
				3958	// Update all the operands so they are VGPR register classes. These may
				3959	// not be the same register class because REG_SEQUENCE supports mixing
				3960	// subregister index types e.g. sub0_sub1 + sub2 + sub3
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3961	for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
				3962	MachineOperand &Op = MI.getOperand(I);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3963	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
				3964	continue;
				3965
				3966	const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
				3967	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
				3968	if (VRC == OpRC)
				3969	continue;
				3970
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3971	legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3972	Op.setIsKill();
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	3973	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3974	}
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	3975
				3976	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3977	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3978
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	3979	// Legalize INSERT_SUBREG
				3980	// src0 must have the same register class as dst
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3981	if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
				3982	unsigned Dst = MI.getOperand(0).getReg();
				3983	unsigned Src0 = MI.getOperand(1).getReg();
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	3984	const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
				3985	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
				3986	if (DstRC != Src0RC) {
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3987	MachineBasicBlock *MBB = MI.getParent();
				3988	MachineOperand &Op = MI.getOperand(1);
				3989	legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	3990	}
				3991	return;
				3992	}
				3993
Nicolai Haehnle	7a87977	2018-04-20 07:14:25 +0000	[diff] [blame]	3994	// Legalize SI_INIT_M0
				3995	if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
				3996	MachineOperand &Src = MI.getOperand(0);
				3997	if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
				3998	Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
				3999	return;
				4000	}
				4001
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	4002	// Legalize MIMG and MUBUF/MTBUF for shaders.
				4003	//
				4004	// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
				4005	// scratch memory access. In both cases, the legalization never involves
				4006	// conversion to the addr64 form.
				4007	if (isMIMG(MI) \|\|
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	4008	(AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	4009	(isMUBUF(MI) \|\| isMTBUF(MI)))) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4010	MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	4011	if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
				4012	unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
				4013	SRsrc->setReg(SGPR);
				4014	}
				4015
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4016	MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	4017	if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
				4018	unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
				4019	SSamp->setReg(SGPR);
				4020	}
				4021	return;
				4022	}
				4023
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4024	// Legalize MUBUF* instructions.
				4025	int RsrcIdx =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4026	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4027	if (RsrcIdx != -1) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4028	// We have an MUBUF instruction
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4029	MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
				4030	unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass;
				4031	if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
				4032	RI.getRegClass(RsrcRC))) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4033	// The operands are legal.
				4034	// FIXME: We may need to legalize operands besided srsrc.
				4035	return;
				4036	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4037
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4038	// Legalize a VGPR Rsrc.
				4039	//
				4040	// If the instruction is _ADDR64, we can avoid a waterfall by extracting
				4041	// the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
				4042	// a zero-value SRsrc.
				4043	//
				4044	// If the instruction is _OFFSET (both idxen and offen disabled), and we
				4045	// support ADDR64 instructions, we can convert to ADDR64 and do the same as
				4046	// above.
				4047	//
				4048	// Otherwise we are on non-ADDR64 hardware, and/or we have
				4049	// idxen/offen/bothen and we fall back to a waterfall loop.
				4050
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4051	MachineBasicBlock &MBB = *MI.getParent();
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4052
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4053	MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4054	if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4055	// This is already an ADDR64 instruction so we need to add the pointer
				4056	// extracted from the resource descriptor to the current value of VAddr.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4057	unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4058	unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4059	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4060
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4061	unsigned RsrcPtr, NewSRsrc;
				4062	std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(this, MI, Rsrc);
				4063
				4064	// NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4065	DebugLoc DL = MI.getDebugLoc();
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	4066	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4067	.addReg(RsrcPtr, 0, AMDGPU::sub0)
				4068	.addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4069
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4070	// NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	4071	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4072	.addReg(RsrcPtr, 0, AMDGPU::sub1)
				4073	.addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4074
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4075	// NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4076	BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				4077	.addReg(NewVAddrLo)
				4078	.addImm(AMDGPU::sub0)
				4079	.addReg(NewVAddrHi)
				4080	.addImm(AMDGPU::sub1);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4081
				4082	VAddr->setReg(NewVAddr);
				4083	Rsrc->setReg(NewSRsrc);
				4084	} else if (!VAddr && ST.hasAddr64()) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4085	// This instructions is the _OFFSET variant, so we need to convert it to
				4086	// ADDR64.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4087	assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
				4088	< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4089	"FIXME: Need to emit flat atomics here");
				4090
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4091	unsigned RsrcPtr, NewSRsrc;
				4092	std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(this, MI, Rsrc);
				4093
				4094	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4095	MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
				4096	MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				4097	MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
				4098	unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4099
				4100	// Atomics rith return have have an additional tied operand and are
				4101	// missing some of the special bits.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4102	MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4103	MachineInstr *Addr64;
				4104
				4105	if (!VDataIn) {
				4106	// Regular buffer load / store.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4107	MachineInstrBuilder MIB =
				4108	BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4109	.add(*VData)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4110	.addReg(NewVAddr)
				4111	.addReg(NewSRsrc)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4112	.add(*SOffset)
				4113	.add(*Offset);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4114
				4115	// Atomics do not have this operand.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4116	if (const MachineOperand *GLC =
				4117	getNamedOperand(MI, AMDGPU::OpName::glc)) {
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4118	MIB.addImm(GLC->getImm());
				4119	}
				4120
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4121	MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4122
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4123	if (const MachineOperand *TFE =
				4124	getNamedOperand(MI, AMDGPU::OpName::tfe)) {
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4125	MIB.addImm(TFE->getImm());
				4126	}
				4127
Chandler Carruth	c73c030	2018-08-16 21:30:05 +0000	[diff] [blame]	4128	MIB.cloneMemRefs(MI);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4129	Addr64 = MIB;
				4130	} else {
				4131	// Atomics with return.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4132	Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4133	.add(*VData)
				4134	.add(*VDataIn)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4135	.addReg(NewVAddr)
				4136	.addReg(NewSRsrc)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4137	.add(*SOffset)
				4138	.add(*Offset)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4139	.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
Chandler Carruth	c73c030	2018-08-16 21:30:05 +0000	[diff] [blame]	4140	.cloneMemRefs(MI);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4141	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4142
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4143	MI.removeFromParent();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4144
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4145	// NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4146	BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
				4147	NewVAddr)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4148	.addReg(RsrcPtr, 0, AMDGPU::sub0)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4149	.addImm(AMDGPU::sub0)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4150	.addReg(RsrcPtr, 0, AMDGPU::sub1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4151	.addImm(AMDGPU::sub1);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4152	} else {
				4153	// This is another variant; legalize Rsrc with waterfall loop from VGPRs
				4154	// to SGPRs.
				4155	loadSRsrcFromVGPR(this, MI, Rsrc, MDT);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4156	}
				4157	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4158	}
				4159
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4160	void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
				4161	MachineDominatorTree *MDT) const {
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4162	SetVectorType Worklist;
				4163	Worklist.insert(&TopInst);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4164
				4165	while (!Worklist.empty()) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4166	MachineInstr &Inst = *Worklist.pop_back_val();
				4167	MachineBasicBlock *MBB = Inst.getParent();
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4168	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				4169
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4170	unsigned Opcode = Inst.getOpcode();
				4171	unsigned NewOpcode = getVALUOp(Inst);
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4172
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4173	// Handle some special cases
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4174	switch (Opcode) {
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	4175	default:
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	4176	break;
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4177	case AMDGPU::S_ADD_U64_PSEUDO:
				4178	case AMDGPU::S_SUB_U64_PSEUDO:
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4179	splitScalar64BitAddSub(Worklist, Inst, MDT);
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4180	Inst.eraseFromParent();
				4181	continue;
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4182	case AMDGPU::S_ADD_I32:
				4183	case AMDGPU::S_SUB_I32:
				4184	// FIXME: The u32 versions currently selected use the carry.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4185	if (moveScalarAddSub(Worklist, Inst, MDT))
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4186	continue;
				4187
				4188	// Default handling
				4189	break;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4190	case AMDGPU::S_AND_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4191	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4192	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4193	continue;
				4194
				4195	case AMDGPU::S_OR_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4196	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4197	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4198	continue;
				4199
				4200	case AMDGPU::S_XOR_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4201	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
				4202	Inst.eraseFromParent();
				4203	continue;
				4204
				4205	case AMDGPU::S_NAND_B64:
				4206	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
				4207	Inst.eraseFromParent();
				4208	continue;
				4209
				4210	case AMDGPU::S_NOR_B64:
				4211	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
				4212	Inst.eraseFromParent();
				4213	continue;
				4214
				4215	case AMDGPU::S_XNOR_B64:
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4216	if (ST.hasDLInsts())
				4217	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
				4218	else
				4219	splitScalar64BitXnor(Worklist, Inst, MDT);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4220	Inst.eraseFromParent();
				4221	continue;
				4222
				4223	case AMDGPU::S_ANDN2_B64:
				4224	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
				4225	Inst.eraseFromParent();
				4226	continue;
				4227
				4228	case AMDGPU::S_ORN2_B64:
				4229	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4230	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4231	continue;
				4232
				4233	case AMDGPU::S_NOT_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4234	splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4235	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4236	continue;
				4237
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4238	case AMDGPU::S_BCNT1_I32_B64:
				4239	splitScalar64BitBCNT(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4240	Inst.eraseFromParent();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4241	continue;
				4242
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	4243	case AMDGPU::S_BFE_I64:
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4244	splitScalar64BitBFE(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4245	Inst.eraseFromParent();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4246	continue;
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4247
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4248	case AMDGPU::S_LSHL_B32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4249	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4250	NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
				4251	swapOperands(Inst);
				4252	}
				4253	break;
				4254	case AMDGPU::S_ASHR_I32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4255	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4256	NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
				4257	swapOperands(Inst);
				4258	}
				4259	break;
				4260	case AMDGPU::S_LSHR_B32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4261	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4262	NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
				4263	swapOperands(Inst);
				4264	}
				4265	break;
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4266	case AMDGPU::S_LSHL_B64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4267	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4268	NewOpcode = AMDGPU::V_LSHLREV_B64;
				4269	swapOperands(Inst);
				4270	}
				4271	break;
				4272	case AMDGPU::S_ASHR_I64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4273	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4274	NewOpcode = AMDGPU::V_ASHRREV_I64;
				4275	swapOperands(Inst);
				4276	}
				4277	break;
				4278	case AMDGPU::S_LSHR_B64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4279	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4280	NewOpcode = AMDGPU::V_LSHRREV_B64;
				4281	swapOperands(Inst);
				4282	}
				4283	break;
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4284
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4285	case AMDGPU::S_ABS_I32:
				4286	lowerScalarAbs(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4287	Inst.eraseFromParent();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4288	continue;
				4289
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4290	case AMDGPU::S_CBRANCH_SCC0:
				4291	case AMDGPU::S_CBRANCH_SCC1:
				4292	// Clear unused bits of vcc
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4293	BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
				4294	AMDGPU::VCC)
				4295	.addReg(AMDGPU::EXEC)
				4296	.addReg(AMDGPU::VCC);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4297	break;
				4298
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4299	case AMDGPU::S_BFE_U64:
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4300	case AMDGPU::S_BFM_B64:
				4301	llvm_unreachable("Moving this op to VALU not implemented");
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4302
				4303	case AMDGPU::S_PACK_LL_B32_B16:
				4304	case AMDGPU::S_PACK_LH_B32_B16:
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	4305	case AMDGPU::S_PACK_HH_B32_B16:
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4306	movePackToVALU(Worklist, MRI, Inst);
				4307	Inst.eraseFromParent();
				4308	continue;
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4309
				4310	case AMDGPU::S_XNOR_B32:
				4311	lowerScalarXnor(Worklist, Inst);
				4312	Inst.eraseFromParent();
				4313	continue;
				4314
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4315	case AMDGPU::S_NAND_B32:
				4316	splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
				4317	Inst.eraseFromParent();
				4318	continue;
				4319
				4320	case AMDGPU::S_NOR_B32:
				4321	splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
				4322	Inst.eraseFromParent();
				4323	continue;
				4324
				4325	case AMDGPU::S_ANDN2_B32:
				4326	splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
				4327	Inst.eraseFromParent();
				4328	continue;
				4329
				4330	case AMDGPU::S_ORN2_B32:
				4331	splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4332	Inst.eraseFromParent();
				4333	continue;
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4334	}
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4335
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4336	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
				4337	// We cannot move this instruction to the VALU, so we should try to
				4338	// legalize its operands instead.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4339	legalizeOperands(Inst, MDT);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4340	continue;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4341	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4342
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4343	// Use the new VALU Opcode.
				4344	const MCInstrDesc &NewDesc = get(NewOpcode);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4345	Inst.setDesc(NewDesc);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4346
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	4347	// Remove any references to SCC. Vector instructions can't read from it, and
				4348	// We're just about to add the implicit use / defs of VCC, and we don't want
				4349	// both.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4350	for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
				4351	MachineOperand &Op = Inst.getOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4352	if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4353	Inst.RemoveOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4354	addSCCDefUsersToVALUWorklist(Inst, Worklist);
				4355	}
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	4356	}
				4357
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4358	if (Opcode == AMDGPU::S_SEXT_I32_I8 \|\| Opcode == AMDGPU::S_SEXT_I32_I16) {
				4359	// We are converting these to a BFE, so we need to add the missing
				4360	// operands for the size and offset.
				4361	unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4362	Inst.addOperand(MachineOperand::CreateImm(0));
				4363	Inst.addOperand(MachineOperand::CreateImm(Size));
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4364
Matt Arsenault	b5b5110	2014-06-10 19:18:21 +0000	[diff] [blame]	4365	} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
				4366	// The VALU version adds the second operand to the result, so insert an
				4367	// extra 0 operand.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4368	Inst.addOperand(MachineOperand::CreateImm(0));
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4369	}
				4370
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4371	Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4372
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4373	if (Opcode == AMDGPU::S_BFE_I32 \|\| Opcode == AMDGPU::S_BFE_U32) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4374	const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4375	// If we need to move this to VGPRs, we need to unpack the second operand
				4376	// back into the 2 separate ones for bit offset and width.
				4377	assert(OffsetWidthOp.isImm() &&
				4378	"Scalar BFE is only implemented for constant width and offset");
				4379	uint32_t Imm = OffsetWidthOp.getImm();
				4380
				4381	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				4382	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4383	Inst.RemoveOperand(2); // Remove old immediate.
				4384	Inst.addOperand(MachineOperand::CreateImm(Offset));
				4385	Inst.addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4386	}
				4387
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4388	bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4389	unsigned NewDstReg = AMDGPU::NoRegister;
				4390	if (HasDst) {
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	4391	unsigned DstReg = Inst.getOperand(0).getReg();
				4392	if (TargetRegisterInfo::isPhysicalRegister(DstReg))
				4393	continue;
				4394
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4395	// Update the destination register class.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4396	const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4397	if (!NewDstRC)
				4398	continue;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4399
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4400	if (Inst.isCopy() &&
				4401	TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
				4402	NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
				4403	// Instead of creating a copy where src and dst are the same register
				4404	// class, we just replace all uses of dst with src. These kinds of
				4405	// copies interfere with the heuristics MachineSink uses to decide
				4406	// whether or not to split a critical edge. Since the pass assumes
				4407	// that copies will end up as machine instructions and not be
				4408	// eliminated.
				4409	addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
				4410	MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
				4411	MRI.clearKillFlags(Inst.getOperand(1).getReg());
				4412	Inst.getOperand(0).setReg(DstReg);
Matt Arsenault	69932e4	2018-03-19 14:07:15 +0000	[diff] [blame]	4413
				4414	// Make sure we don't leave around a dead VGPR->SGPR copy. Normally
				4415	// these are deleted later, but at -O0 it would leave a suspicious
				4416	// looking illegal copy of an undef register.
				4417	for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
				4418	Inst.RemoveOperand(I);
				4419	Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4420	continue;
				4421	}
				4422
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4423	NewDstReg = MRI.createVirtualRegister(NewDstRC);
				4424	MRI.replaceRegWith(DstReg, NewDstReg);
				4425	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4426
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	4427	// Legalize the operands
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4428	legalizeOperands(Inst, MDT);
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	4429
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4430	if (HasDst)
				4431	addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4432	}
				4433	}
				4434
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4435	// Add/sub require special handling to deal with carry outs.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4436	bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
				4437	MachineDominatorTree *MDT) const {
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4438	if (ST.hasAddNoCarry()) {
				4439	// Assume there is no user of scc since we don't select this in that case.
				4440	// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
				4441	// is used.
				4442
				4443	MachineBasicBlock &MBB = *Inst.getParent();
				4444	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4445
				4446	unsigned OldDstReg = Inst.getOperand(0).getReg();
				4447	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4448
				4449	unsigned Opc = Inst.getOpcode();
				4450	assert(Opc == AMDGPU::S_ADD_I32 \|\| Opc == AMDGPU::S_SUB_I32);
				4451
				4452	unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
				4453	AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
				4454
				4455	assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
				4456	Inst.RemoveOperand(3);
				4457
				4458	Inst.setDesc(get(NewOpc));
				4459	Inst.addImplicitDefUseOperands(*MBB.getParent());
				4460	MRI.replaceRegWith(OldDstReg, ResultReg);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4461	legalizeOperands(Inst, MDT);
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4462
				4463	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				4464	return true;
				4465	}
				4466
				4467	return false;
				4468	}
				4469
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4470	void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4471	MachineInstr &Inst) const {
				4472	MachineBasicBlock &MBB = *Inst.getParent();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4473	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4474	MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4475	DebugLoc DL = Inst.getDebugLoc();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4476
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4477	MachineOperand &Dest = Inst.getOperand(0);
				4478	MachineOperand &Src = Inst.getOperand(1);
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4479	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4480	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4481
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4482	unsigned SubOp = ST.hasAddNoCarry() ?
				4483	AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
				4484
				4485	BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4486	.addImm(0)
				4487	.addReg(Src.getReg());
				4488
				4489	BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
				4490	.addReg(Src.getReg())
				4491	.addReg(TmpReg);
				4492
				4493	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				4494	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				4495	}
				4496
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4497	void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
				4498	MachineInstr &Inst) const {
				4499	MachineBasicBlock &MBB = *Inst.getParent();
				4500	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4501	MachineBasicBlock::iterator MII = Inst;
				4502	const DebugLoc &DL = Inst.getDebugLoc();
				4503
				4504	MachineOperand &Dest = Inst.getOperand(0);
				4505	MachineOperand &Src0 = Inst.getOperand(1);
				4506	MachineOperand &Src1 = Inst.getOperand(2);
				4507
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4508	if (ST.hasDLInsts()) {
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4509	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4510	legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
				4511	legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
				4512
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4513	BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
				4514	.add(Src0)
				4515	.add(Src1);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4516
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4517	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4518	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
				4519	} else {
				4520	// Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
				4521	// invert either source and then perform the XOR. If either source is a
				4522	// scalar register, then we can leave the inversion on the scalar unit to
				4523	// acheive a better distrubution of scalar and vector instructions.
				4524	bool Src0IsSGPR = Src0.isReg() &&
				4525	RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
				4526	bool Src1IsSGPR = Src1.isReg() &&
				4527	RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
				4528	MachineInstr *Not = nullptr;
				4529	MachineInstr *Xor = nullptr;
				4530	unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4531	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4532
				4533	// Build a pair of scalar instructions and add them to the work list.
				4534	// The next iteration over the work list will lower these to the vector
				4535	// unit as necessary.
				4536	if (Src0IsSGPR) {
				4537	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
				4538	.add(Src0);
				4539	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
				4540	.addReg(Temp)
				4541	.add(Src1);
				4542	} else if (Src1IsSGPR) {
				4543	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
				4544	.add(Src1);
				4545	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
				4546	.add(Src0)
				4547	.addReg(Temp);
				4548	} else {
				4549	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
				4550	.add(Src0)
				4551	.add(Src1);
				4552	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
				4553	.addReg(Temp);
				4554	Worklist.insert(Not);
				4555	}
				4556
				4557	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4558
				4559	Worklist.insert(Xor);
				4560
				4561	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4562	}
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4563	}
				4564
				4565	void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
				4566	MachineInstr &Inst,
				4567	unsigned Opcode) const {
				4568	MachineBasicBlock &MBB = *Inst.getParent();
				4569	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4570	MachineBasicBlock::iterator MII = Inst;
				4571	const DebugLoc &DL = Inst.getDebugLoc();
				4572
				4573	MachineOperand &Dest = Inst.getOperand(0);
				4574	MachineOperand &Src0 = Inst.getOperand(1);
				4575	MachineOperand &Src1 = Inst.getOperand(2);
				4576
				4577	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4578	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4579
				4580	MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
				4581	.add(Src0)
				4582	.add(Src1);
				4583
				4584	MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
				4585	.addReg(Interm);
				4586
				4587	Worklist.insert(&Op);
				4588	Worklist.insert(&Not);
				4589
				4590	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4591	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
				4592	}
				4593
				4594	void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
				4595	MachineInstr &Inst,
				4596	unsigned Opcode) const {
				4597	MachineBasicBlock &MBB = *Inst.getParent();
				4598	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4599	MachineBasicBlock::iterator MII = Inst;
				4600	const DebugLoc &DL = Inst.getDebugLoc();
				4601
				4602	MachineOperand &Dest = Inst.getOperand(0);
				4603	MachineOperand &Src0 = Inst.getOperand(1);
				4604	MachineOperand &Src1 = Inst.getOperand(2);
				4605
				4606	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4607	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4608
				4609	MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
				4610	.add(Src1);
				4611
				4612	MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
				4613	.add(Src0)
				4614	.addReg(Interm);
				4615
				4616	Worklist.insert(&Not);
				4617	Worklist.insert(&Op);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4618
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4619	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4620	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4621	}
				4622
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4623	void SIInstrInfo::splitScalar64BitUnaryOp(
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4624	SetVectorType &Worklist, MachineInstr &Inst,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4625	unsigned Opcode) const {
				4626	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4627	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4628
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4629	MachineOperand &Dest = Inst.getOperand(0);
				4630	MachineOperand &Src0 = Inst.getOperand(1);
				4631	DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4632
				4633	MachineBasicBlock::iterator MII = Inst;
				4634
				4635	const MCInstrDesc &InstDesc = get(Opcode);
				4636	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				4637	MRI.getRegClass(Src0.getReg()) :
				4638	&AMDGPU::SGPR_32RegClass;
				4639
				4640	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4641
				4642	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4643	AMDGPU::sub0, Src0SubRC);
				4644
				4645	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4646	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				4647	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4648
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4649	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4650	MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4651
				4652	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4653	AMDGPU::sub1, Src0SubRC);
				4654
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4655	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4656	MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4657
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4658	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4659	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4660	.addReg(DestSub0)
				4661	.addImm(AMDGPU::sub0)
				4662	.addReg(DestSub1)
				4663	.addImm(AMDGPU::sub1);
				4664
				4665	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4666
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4667	Worklist.insert(&LoHalf);
				4668	Worklist.insert(&HiHalf);
				4669
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4670	// We don't need to legalizeOperands here because for a single operand, src0
				4671	// will support any kind of input.
				4672
				4673	// Move all users of this moved value.
				4674	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4675	}
				4676
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4677	void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
				4678	MachineInstr &Inst,
				4679	MachineDominatorTree *MDT) const {
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4680	bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
				4681
				4682	MachineBasicBlock &MBB = *Inst.getParent();
				4683	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4684
				4685	unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				4686	unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4687	unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4688
				4689	unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				4690	unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				4691
				4692	MachineOperand &Dest = Inst.getOperand(0);
				4693	MachineOperand &Src0 = Inst.getOperand(1);
				4694	MachineOperand &Src1 = Inst.getOperand(2);
				4695	const DebugLoc &DL = Inst.getDebugLoc();
				4696	MachineBasicBlock::iterator MII = Inst;
				4697
				4698	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
				4699	const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
				4700	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4701	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				4702
				4703	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4704	AMDGPU::sub0, Src0SubRC);
				4705	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4706	AMDGPU::sub0, Src1SubRC);
				4707
				4708
				4709	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4710	AMDGPU::sub1, Src0SubRC);
				4711	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4712	AMDGPU::sub1, Src1SubRC);
				4713
				4714	unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
				4715	MachineInstr *LoHalf =
				4716	BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
				4717	.addReg(CarryReg, RegState::Define)
				4718	.add(SrcReg0Sub0)
				4719	.add(SrcReg1Sub0);
				4720
				4721	unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
				4722	MachineInstr *HiHalf =
				4723	BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
				4724	.addReg(DeadCarryReg, RegState::Define \| RegState::Dead)
				4725	.add(SrcReg0Sub1)
				4726	.add(SrcReg1Sub1)
				4727	.addReg(CarryReg, RegState::Kill);
				4728
				4729	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4730	.addReg(DestSub0)
				4731	.addImm(AMDGPU::sub0)
				4732	.addReg(DestSub1)
				4733	.addImm(AMDGPU::sub1);
				4734
				4735	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4736
				4737	// Try to legalize the operands in case we need to swap the order to keep it
				4738	// valid.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4739	legalizeOperands(*LoHalf, MDT);
				4740	legalizeOperands(*HiHalf, MDT);
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4741
				4742	// Move all users of this moved vlaue.
				4743	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
				4744	}
				4745
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4746	void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
				4747	MachineInstr &Inst, unsigned Opcode,
				4748	MachineDominatorTree *MDT) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4749	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4750	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4751
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4752	MachineOperand &Dest = Inst.getOperand(0);
				4753	MachineOperand &Src0 = Inst.getOperand(1);
				4754	MachineOperand &Src1 = Inst.getOperand(2);
				4755	DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4756
				4757	MachineBasicBlock::iterator MII = Inst;
				4758
				4759	const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4760	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				4761	MRI.getRegClass(Src0.getReg()) :
				4762	&AMDGPU::SGPR_32RegClass;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4763
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4764	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4765	const TargetRegisterClass *Src1RC = Src1.isReg() ?
				4766	MRI.getRegClass(Src1.getReg()) :
				4767	&AMDGPU::SGPR_32RegClass;
				4768
				4769	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				4770
				4771	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4772	AMDGPU::sub0, Src0SubRC);
				4773	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4774	AMDGPU::sub0, Src1SubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4775	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4776	AMDGPU::sub1, Src0SubRC);
				4777	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4778	AMDGPU::sub1, Src1SubRC);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4779
				4780	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4781	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				4782	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4783
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4784	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4785	MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4786	.add(SrcReg0Sub0)
				4787	.add(SrcReg1Sub0);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4788
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4789	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4790	MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4791	.add(SrcReg0Sub1)
				4792	.add(SrcReg1Sub1);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4793
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4794	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4795	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4796	.addReg(DestSub0)
				4797	.addImm(AMDGPU::sub0)
				4798	.addReg(DestSub1)
				4799	.addImm(AMDGPU::sub1);
				4800
				4801	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4802
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4803	Worklist.insert(&LoHalf);
				4804	Worklist.insert(&HiHalf);
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4805
				4806	// Move all users of this moved vlaue.
				4807	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4808	}
				4809
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4810	void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
				4811	MachineInstr &Inst,
				4812	MachineDominatorTree *MDT) const {
				4813	MachineBasicBlock &MBB = *Inst.getParent();
				4814	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4815
				4816	MachineOperand &Dest = Inst.getOperand(0);
				4817	MachineOperand &Src0 = Inst.getOperand(1);
				4818	MachineOperand &Src1 = Inst.getOperand(2);
				4819	const DebugLoc &DL = Inst.getDebugLoc();
				4820
				4821	MachineBasicBlock::iterator MII = Inst;
				4822
				4823	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
				4824
				4825	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				4826
				4827	MachineOperand* Op0;
				4828	MachineOperand* Op1;
				4829
				4830	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
				4831	Op0 = &Src0;
				4832	Op1 = &Src1;
				4833	} else {
				4834	Op0 = &Src1;
				4835	Op1 = &Src0;
				4836	}
				4837
				4838	BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
				4839	.add(*Op0);
				4840
				4841	unsigned NewDest = MRI.createVirtualRegister(DestRC);
				4842
				4843	MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
				4844	.addReg(Interm)
				4845	.add(*Op1);
				4846
				4847	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4848
				4849	Worklist.insert(&Xor);
				4850	}
				4851
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4852	void SIInstrInfo::splitScalar64BitBCNT(
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4853	SetVectorType &Worklist, MachineInstr &Inst) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4854	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4855	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4856
				4857	MachineBasicBlock::iterator MII = Inst;
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4858	const DebugLoc &DL = Inst.getDebugLoc();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4859
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4860	MachineOperand &Dest = Inst.getOperand(0);
				4861	MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4862
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	4863	const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4864	const TargetRegisterClass *SrcRC = Src.isReg() ?
				4865	MRI.getRegClass(Src.getReg()) :
				4866	&AMDGPU::SGPR_32RegClass;
				4867
				4868	unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4869	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4870
				4871	const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
				4872
				4873	MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				4874	AMDGPU::sub0, SrcSubRC);
				4875	MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				4876	AMDGPU::sub1, SrcSubRC);
				4877
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4878	BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4879
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4880	BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4881
				4882	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				4883
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	4884	// We don't need to legalize operands here. src0 for etiher instruction can be
				4885	// an SGPR, and the second input is unused or determined here.
				4886	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4887	}
				4888
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4889	void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4890	MachineInstr &Inst) const {
				4891	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4892	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4893	MachineBasicBlock::iterator MII = Inst;
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4894	const DebugLoc &DL = Inst.getDebugLoc();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4895
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4896	MachineOperand &Dest = Inst.getOperand(0);
				4897	uint32_t Imm = Inst.getOperand(2).getImm();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4898	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				4899	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
				4900
Matt Arsenault	6ad3426	2014-11-14 18:40:49 +0000	[diff] [blame]	4901	(void) Offset;
				4902
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4903	// Only sext_inreg cases handled.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4904	assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
				4905	Offset == 0 && "Not implemented");
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4906
				4907	if (BitWidth < 32) {
				4908	unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4909	unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4910	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				4911
				4912	BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4913	.addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
				4914	.addImm(0)
				4915	.addImm(BitWidth);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4916
				4917	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
				4918	.addImm(31)
				4919	.addReg(MidRegLo);
				4920
				4921	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				4922	.addReg(MidRegLo)
				4923	.addImm(AMDGPU::sub0)
				4924	.addReg(MidRegHi)
				4925	.addImm(AMDGPU::sub1);
				4926
				4927	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	4928	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4929	return;
				4930	}
				4931
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4932	MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4933	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4934	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				4935
				4936	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
				4937	.addImm(31)
				4938	.addReg(Src.getReg(), 0, AMDGPU::sub0);
				4939
				4940	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				4941	.addReg(Src.getReg(), 0, AMDGPU::sub0)
				4942	.addImm(AMDGPU::sub0)
				4943	.addReg(TmpReg)
				4944	.addImm(AMDGPU::sub1);
				4945
				4946	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	4947	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4948	}
				4949
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4950	void SIInstrInfo::addUsersToMoveToVALUWorklist(
				4951	unsigned DstReg,
				4952	MachineRegisterInfo &MRI,
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4953	SetVectorType &Worklist) const {
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4954	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
Matt Arsenault	4c1e9ec	2016-12-20 18:55:06 +0000	[diff] [blame]	4955	E = MRI.use_end(); I != E;) {
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4956	MachineInstr &UseMI = *I->getParent();
				4957	if (!canReadVGPR(UseMI, I.getOperandNo())) {
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4958	Worklist.insert(&UseMI);
Matt Arsenault	4c1e9ec	2016-12-20 18:55:06 +0000	[diff] [blame]	4959
				4960	do {
				4961	++I;
				4962	} while (I != E && I->getParent() == &UseMI);
				4963	} else {
				4964	++I;
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4965	}
				4966	}
				4967	}
				4968
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4969	void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4970	MachineRegisterInfo &MRI,
				4971	MachineInstr &Inst) const {
				4972	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4973	MachineBasicBlock *MBB = Inst.getParent();
				4974	MachineOperand &Src0 = Inst.getOperand(1);
				4975	MachineOperand &Src1 = Inst.getOperand(2);
				4976	const DebugLoc &DL = Inst.getDebugLoc();
				4977
				4978	switch (Inst.getOpcode()) {
				4979	case AMDGPU::S_PACK_LL_B32_B16: {
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	4980	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4981	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4982
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	4983	// FIXME: Can do a lot better if we know the high bits of src0 or src1 are
				4984	// 0.
				4985	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
				4986	.addImm(0xffff);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4987
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	4988	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
				4989	.addReg(ImmReg, RegState::Kill)
				4990	.add(Src0);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4991
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	4992	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
				4993	.add(Src1)
				4994	.addImm(16)
				4995	.addReg(TmpReg, RegState::Kill);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4996	break;
				4997	}
				4998	case AMDGPU::S_PACK_LH_B32_B16: {
				4999	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5000	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
				5001	.addImm(0xffff);
				5002	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
				5003	.addReg(ImmReg, RegState::Kill)
				5004	.add(Src0)
				5005	.add(Src1);
				5006	break;
				5007	}
				5008	case AMDGPU::S_PACK_HH_B32_B16: {
				5009	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5010	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5011	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
				5012	.addImm(16)
				5013	.add(Src0);
				5014	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
Konstantin Zhuravlyov	88938d4	2017-04-21 19:35:05 +0000	[diff] [blame]	5015	.addImm(0xffff0000);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5016	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
				5017	.add(Src1)
				5018	.addReg(ImmReg, RegState::Kill)
				5019	.addReg(TmpReg, RegState::Kill);
				5020	break;
				5021	}
				5022	default:
				5023	llvm_unreachable("unhandled s_pack_* instruction");
				5024	}
				5025
				5026	MachineOperand &Dest = Inst.getOperand(0);
				5027	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				5028	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				5029	}
				5030
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5031	void SIInstrInfo::addSCCDefUsersToVALUWorklist(
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5032	MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5033	// This assumes that all the users of SCC are in the same block
				5034	// as the SCC def.
Duncan P. N. Exon Smith	4d29511	2016-07-08 19:16:05 +0000	[diff] [blame]	5035	for (MachineInstr &MI :
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	5036	make_range(MachineBasicBlock::iterator(SCCDefInst),
				5037	SCCDefInst.getParent()->end())) {
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5038	// Exit if we find another SCC def.
Stanislav Mekhanoshin	13d3371	2018-11-09 17:58:59 +0000	[diff] [blame]	5039	if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5040	return;
				5041
Stanislav Mekhanoshin	13d3371	2018-11-09 17:58:59 +0000	[diff] [blame]	5042	if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5043	Worklist.insert(&MI);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5044	}
				5045	}
				5046
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	5047	const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
				5048	const MachineInstr &Inst) const {
				5049	const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
				5050
				5051	switch (Inst.getOpcode()) {
				5052	// For target instructions, getOpRegClass just returns the virtual register
				5053	// class associated with the operand, so we need to find an equivalent VGPR
				5054	// register class in order to move the instruction to the VALU.
				5055	case AMDGPU::COPY:
				5056	case AMDGPU::PHI:
				5057	case AMDGPU::REG_SEQUENCE:
				5058	case AMDGPU::INSERT_SUBREG:
Connor Abbott	8c217d0	2017-08-04 18:36:49 +0000	[diff] [blame]	5059	case AMDGPU::WQM:
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	5060	case AMDGPU::WWM:
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	5061	if (RI.hasVGPRs(NewDstRC))
				5062	return nullptr;
				5063
				5064	NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
				5065	if (!NewDstRC)
				5066	return nullptr;
				5067	return NewDstRC;
				5068	default:
				5069	return NewDstRC;
				5070	}
				5071	}
				5072
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5073	// Find the one SGPR operand we are allowed to use.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5074	unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5075	int OpIndices[3]) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5076	const MCInstrDesc &Desc = MI.getDesc();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5077
				5078	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	5079	//
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5080	// First we need to consider the instruction's operand requirements before
				5081	// legalizing. Some operands are required to be SGPRs, such as implicit uses
				5082	// of VCC, but we are still bound by the constant bus requirement to only use
				5083	// one.
				5084	//
				5085	// If the operand's class is an SGPR, we can never move it.
				5086
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5087	unsigned SGPRReg = findImplicitSGPRRead(MI);
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	5088	if (SGPRReg != AMDGPU::NoRegister)
				5089	return SGPRReg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5090
				5091	unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5092	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5093
				5094	for (unsigned i = 0; i < 3; ++i) {
				5095	int Idx = OpIndices[i];
				5096	if (Idx == -1)
				5097	break;
				5098
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5099	const MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5100	if (!MO.isReg())
				5101	continue;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5102
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5103	// Is this operand statically required to be an SGPR based on the operand
				5104	// constraints?
				5105	const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
				5106	bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
				5107	if (IsRequiredSGPR)
				5108	return MO.getReg();
				5109
				5110	// If this could be a VGPR or an SGPR, Check the dynamic register class.
				5111	unsigned Reg = MO.getReg();
				5112	const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
				5113	if (RI.isSGPRClass(RegRC))
				5114	UsedSGPRs[i] = Reg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5115	}
				5116
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5117	// We don't have a required SGPR operand, so we have a bit more freedom in
				5118	// selecting operands to move.
				5119
				5120	// Try to select the most used SGPR. If an SGPR is equal to one of the
				5121	// others, we choose that.
				5122	//
				5123	// e.g.
				5124	// V_FMA_F32 v0, s0, s0, s0 -> No moves
				5125	// V_FMA_F32 v0, s0, s1, s0 -> Move s1
				5126
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5127	// TODO: If some of the operands are 64-bit SGPRs and some 32, we should
				5128	// prefer those.
				5129
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5130	if (UsedSGPRs[0] != AMDGPU::NoRegister) {
				5131	if (UsedSGPRs[0] == UsedSGPRs[1] \|\| UsedSGPRs[0] == UsedSGPRs[2])
				5132	SGPRReg = UsedSGPRs[0];
				5133	}
				5134
				5135	if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
				5136	if (UsedSGPRs[1] == UsedSGPRs[2])
				5137	SGPRReg = UsedSGPRs[1];
				5138	}
				5139
				5140	return SGPRReg;
				5141	}
				5142
Tom Stellard	6407e1e	2014-08-01 00:32:33 +0000	[diff] [blame]	5143	MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	5144	unsigned OperandName) const {
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	5145	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
				5146	if (Idx == -1)
				5147	return nullptr;
				5148
				5149	return &MI.getOperand(Idx);
				5150	}
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5151
				5152	uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
				5153	uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	5154	if (ST.isAmdHsaOS()) {
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5155	// Set ATC = 1. GFX9 doesn't have this bit.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5156	if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5157	RsrcDataFormat \|= (1ULL << 56);
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5158
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5159	// Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
				5160	// BTW, it disables TC L2 and therefore decreases performance.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5161	if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS)
Michel Danzer	beb79ce	2016-03-16 09:10:35 +0000	[diff] [blame]	5162	RsrcDataFormat \|= (2ULL << 59);
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	5163	}
				5164
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5165	return RsrcDataFormat;
				5166	}
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5167
				5168	uint64_t SIInstrInfo::getScratchRsrcWords23() const {
				5169	uint64_t Rsrc23 = getDefaultRsrcDataFormat() \|
				5170	AMDGPU::RSRC_TID_ENABLE \|
				5171	0xffffffff; // Size;
				5172
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5173	// GFX9 doesn't have ELEMENT_SIZE.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5174	if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5175	uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
				5176	Rsrc23 \|= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
				5177	}
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	5178
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5179	// IndexStride = 64.
				5180	Rsrc23 \|= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	5181
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5182	// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
				5183	// Clear them unless we want a huge stride.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5184	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5185	Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
				5186
				5187	return Rsrc23;
				5188	}
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5189
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5190	bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
				5191	unsigned Opc = MI.getOpcode();
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5192
				5193	return isSMRD(Opc);
				5194	}
				5195
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5196	bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
				5197	unsigned Opc = MI.getOpcode();
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5198
				5199	return isMUBUF(Opc) \|\| isMTBUF(Opc) \|\| isMIMG(Opc);
				5200	}
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	5201
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5202	unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
				5203	int &FrameIndex) const {
				5204	const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
				5205	if (!Addr \|\| !Addr->isFI())
				5206	return AMDGPU::NoRegister;
				5207
				5208	assert(!MI.memoperands_empty() &&
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	5209	(*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5210
				5211	FrameIndex = Addr->getIndex();
				5212	return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
				5213	}
				5214
				5215	unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
				5216	int &FrameIndex) const {
				5217	const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
				5218	assert(Addr && Addr->isFI());
				5219	FrameIndex = Addr->getIndex();
				5220	return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
				5221	}
				5222
				5223	unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
				5224	int &FrameIndex) const {
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5225	if (!MI.mayLoad())
				5226	return AMDGPU::NoRegister;
				5227
				5228	if (isMUBUF(MI) \|\| isVGPRSpill(MI))
				5229	return isStackAccess(MI, FrameIndex);
				5230
				5231	if (isSGPRSpill(MI))
				5232	return isSGPRStackAccess(MI, FrameIndex);
				5233
				5234	return AMDGPU::NoRegister;
				5235	}
				5236
				5237	unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
				5238	int &FrameIndex) const {
				5239	if (!MI.mayStore())
				5240	return AMDGPU::NoRegister;
				5241
				5242	if (isMUBUF(MI) \|\| isVGPRSpill(MI))
				5243	return isStackAccess(MI, FrameIndex);
				5244
				5245	if (isSGPRSpill(MI))
				5246	return isSGPRStackAccess(MI, FrameIndex);
				5247
				5248	return AMDGPU::NoRegister;
				5249	}
				5250
Matt Arsenault	9ab1fa6	2017-10-04 22:59:12 +0000	[diff] [blame]	5251	unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const {
				5252	unsigned Size = 0;
				5253	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
				5254	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
				5255	while (++I != E && I->isInsideBundle()) {
				5256	assert(!I->isBundle() && "No nested bundle!");
				5257	Size += getInstSizeInBytes(*I);
				5258	}
				5259
				5260	return Size;
				5261	}
				5262
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5263	unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
				5264	unsigned Opc = MI.getOpcode();
				5265	const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
				5266	unsigned DescSize = Desc.getSize();
				5267
				5268	// If we have a definitive size, we can use it. Otherwise we need to inspect
				5269	// the operands to know the size.
Matt Arsenault	0183c56	2018-07-27 09:15:03 +0000	[diff] [blame]	5270	if (isFixedSize(MI))
				5271	return DescSize;
				5272
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5273	// 4-byte instructions may have a 32-bit literal encoded after them. Check
				5274	// operands that coud ever be literals.
				5275	if (isVALU(MI) \|\| isSALU(MI)) {
				5276	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				5277	if (Src0Idx == -1)
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5278	return DescSize; // No operands.
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5279
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	5280	if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5281	return DescSize + 4;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5282
				5283	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				5284	if (Src1Idx == -1)
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5285	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5286
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	5287	if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5288	return DescSize + 4;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5289
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5290	int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
				5291	if (Src2Idx == -1)
				5292	return DescSize;
				5293
				5294	if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
				5295	return DescSize + 4;
				5296
				5297	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5298	}
				5299
				5300	switch (Opc) {
				5301	case TargetOpcode::IMPLICIT_DEF:
				5302	case TargetOpcode::KILL:
				5303	case TargetOpcode::DBG_VALUE:
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5304	case TargetOpcode::EH_LABEL:
				5305	return 0;
Matt Arsenault	9ab1fa6	2017-10-04 22:59:12 +0000	[diff] [blame]	5306	case TargetOpcode::BUNDLE:
				5307	return getInstBundleSize(MI);
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5308	case TargetOpcode::INLINEASM: {
				5309	const MachineFunction *MF = MI.getParent()->getParent();
				5310	const char *AsmStr = MI.getOperand(0).getSymbolName();
				5311	return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
				5312	}
				5313	default:
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5314	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5315	}
				5316	}
				5317
Tom Stellard	6695ba0	2016-10-28 23:53:48 +0000	[diff] [blame]	5318	bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
				5319	if (!isFLAT(MI))
				5320	return false;
				5321
				5322	if (MI.memoperands_empty())
				5323	return true;
				5324
				5325	for (const MachineMemOperand *MMO : MI.memoperands()) {
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	5326	if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
Tom Stellard	6695ba0	2016-10-28 23:53:48 +0000	[diff] [blame]	5327	return true;
				5328	}
				5329	return false;
				5330	}
				5331
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	5332	bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
				5333	return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
				5334	}
				5335
				5336	void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
				5337	MachineBasicBlock *IfEnd) const {
				5338	MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
				5339	assert(TI != IfEntry->end());
				5340
				5341	MachineInstr Branch = &(TI);
				5342	MachineFunction *MF = IfEntry->getParent();
				5343	MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
				5344
				5345	if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				5346	unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5347	MachineInstr *SIIF =
				5348	BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
				5349	.add(Branch->getOperand(0))
				5350	.add(Branch->getOperand(1));
				5351	MachineInstr *SIEND =
				5352	BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
				5353	.addReg(DstReg);
				5354
				5355	IfEntry->erase(TI);
				5356	IfEntry->insert(IfEntry->end(), SIIF);
				5357	IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
				5358	}
				5359	}
				5360
				5361	void SIInstrInfo::convertNonUniformLoopRegion(
				5362	MachineBasicBlock LoopEntry, MachineBasicBlock LoopEnd) const {
				5363	MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
				5364	// We expect 2 terminators, one conditional and one unconditional.
				5365	assert(TI != LoopEnd->end());
				5366
				5367	MachineInstr Branch = &(TI);
				5368	MachineFunction *MF = LoopEnd->getParent();
				5369	MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
				5370
				5371	if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				5372
				5373	unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5374	unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5375	MachineInstrBuilder HeaderPHIBuilder =
				5376	BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
				5377	for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
				5378	E = LoopEntry->pred_end();
				5379	PI != E; ++PI) {
				5380	if (*PI == LoopEnd) {
				5381	HeaderPHIBuilder.addReg(BackEdgeReg);
				5382	} else {
				5383	MachineBasicBlock PMBB = PI;
				5384	unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5385	materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
				5386	ZeroReg, 0);
				5387	HeaderPHIBuilder.addReg(ZeroReg);
				5388	}
				5389	HeaderPHIBuilder.addMBB(*PI);
				5390	}
				5391	MachineInstr *HeaderPhi = HeaderPHIBuilder;
				5392	MachineInstr SIIFBREAK = BuildMI((MF), Branch->getDebugLoc(),
				5393	get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
				5394	.addReg(DstReg)
				5395	.add(Branch->getOperand(0));
				5396	MachineInstr *SILOOP =
				5397	BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
				5398	.addReg(BackEdgeReg)
				5399	.addMBB(LoopEntry);
				5400
				5401	LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
				5402	LoopEnd->erase(TI);
				5403	LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
				5404	LoopEnd->insert(LoopEnd->end(), SILOOP);
				5405	}
				5406	}
				5407
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	5408	ArrayRef<std::pair<int, const char *>>
				5409	SIInstrInfo::getSerializableTargetIndices() const {
				5410	static const std::pair<int, const char *> TargetIndices[] = {
				5411	{AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
				5412	{AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
				5413	{AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
				5414	{AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
				5415	{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
				5416	return makeArrayRef(TargetIndices);
				5417	}
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	5418
				5419	/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
				5420	/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
				5421	ScheduleHazardRecognizer *
				5422	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
				5423	const ScheduleDAG *DAG) const {
				5424	return new GCNHazardRecognizer(DAG->MF);
				5425	}
				5426
				5427	/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
				5428	/// pass.
				5429	ScheduleHazardRecognizer *
				5430	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
				5431	return new GCNHazardRecognizer(MF);
				5432	}
Stanislav Mekhanoshin	6ec3e3a	2017-01-20 00:44:31 +0000	[diff] [blame]	5433
Matt Arsenault	3f031e7	2017-07-02 23:21:48 +0000	[diff] [blame]	5434	std::pair<unsigned, unsigned>
				5435	SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
				5436	return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
				5437	}
				5438
				5439	ArrayRef<std::pair<unsigned, const char *>>
				5440	SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
				5441	static const std::pair<unsigned, const char *> TargetFlags[] = {
				5442	{ MO_GOTPCREL, "amdgpu-gotprel" },
				5443	{ MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
				5444	{ MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
				5445	{ MO_REL32_LO, "amdgpu-rel32-lo" },
				5446	{ MO_REL32_HI, "amdgpu-rel32-hi" }
				5447	};
				5448
				5449	return makeArrayRef(TargetFlags);
				5450	}
				5451
Stanislav Mekhanoshin	6ec3e3a	2017-01-20 00:44:31 +0000	[diff] [blame]	5452	bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
				5453	return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
				5454	MI.modifiesRegister(AMDGPU::EXEC, &RI);
				5455	}
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5456
				5457	MachineInstrBuilder
				5458	SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
				5459	MachineBasicBlock::iterator I,
				5460	const DebugLoc &DL,
				5461	unsigned DestReg) const {
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5462	if (ST.hasAddNoCarry())
				5463	return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5464
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5465	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5466	unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5467	MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5468
				5469	return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
				5470	.addReg(UnusedCarry, RegState::Define \| RegState::Dead);
				5471	}
Marek Olsak	ce76ea0	2017-10-24 10:27:13 +0000	[diff] [blame]	5472
				5473	bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
				5474	switch (Opcode) {
				5475	case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
				5476	case AMDGPU::SI_KILL_I1_TERMINATOR:
				5477	return true;
				5478	default:
				5479	return false;
				5480	}
				5481	}
				5482
				5483	const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const {
				5484	switch (Opcode) {
				5485	case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
				5486	return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
				5487	case AMDGPU::SI_KILL_I1_PSEUDO:
				5488	return get(AMDGPU::SI_KILL_I1_TERMINATOR);
				5489	default:
				5490	llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
				5491	}
				5492	}
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	5493
				5494	bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
				5495	if (!isSMRD(MI))
				5496	return false;
				5497
				5498	// Check that it is using a buffer resource.
				5499	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sbase);
				5500	if (Idx == -1) // e.g. s_memtime
				5501	return false;
				5502
				5503	const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
				5504	return RCID == AMDGPU::SReg_128RegClassID;
				5505	}
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5506
				5507	// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
				5508	enum SIEncodingFamily {
				5509	SI = 0,
				5510	VI = 1,
				5511	SDWA = 2,
				5512	SDWA9 = 3,
				5513	GFX80 = 4,
				5514	GFX9 = 5
				5515	};
				5516
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5517	static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5518	switch (ST.getGeneration()) {
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5519	default:
				5520	break;
				5521	case AMDGPUSubtarget::SOUTHERN_ISLANDS:
				5522	case AMDGPUSubtarget::SEA_ISLANDS:
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5523	return SIEncodingFamily::SI;
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5524	case AMDGPUSubtarget::VOLCANIC_ISLANDS:
				5525	case AMDGPUSubtarget::GFX9:
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5526	return SIEncodingFamily::VI;
				5527	}
				5528	llvm_unreachable("Unknown subtarget generation!");
				5529	}
				5530
				5531	int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
				5532	SIEncodingFamily Gen = subtargetEncodingFamily(ST);
				5533
				5534	if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5535	ST.getGeneration() >= AMDGPUSubtarget::GFX9)
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5536	Gen = SIEncodingFamily::GFX9;
				5537
				5538	if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5539	Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5540	: SIEncodingFamily::SDWA;
				5541	// Adjust the encoding family to GFX80 for D16 buffer instructions when the
				5542	// subtarget has UnpackedD16VMem feature.
				5543	// TODO: remove this when we discard GFX80 encoding.
				5544	if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
				5545	Gen = SIEncodingFamily::GFX80;
				5546
				5547	int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
				5548
				5549	// -1 means that Opcode is already a native instruction.
				5550	if (MCOp == -1)
				5551	return Opcode;
				5552
				5553	// (uint16_t)-1 means that Opcode is a pseudo instruction that has
				5554	// no encoding in the given subtarget generation.
				5555	if (MCOp == (uint16_t)-1)
				5556	return -1;
				5557
				5558	return MCOp;
				5559	}
Valery Pykhtin	3d9afa2	2018-11-30 14:21:56 +0000	[diff] [blame]	5560
				5561	static
				5562	TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
				5563	assert(RegOpnd.isReg());
				5564	return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
				5565	getRegSubRegPair(RegOpnd);
				5566	}
				5567
				5568	TargetInstrInfo::RegSubRegPair
				5569	llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
				5570	assert(MI.isRegSequence());
				5571	for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
				5572	if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
				5573	auto &RegOp = MI.getOperand(1 + 2 * I);
				5574	return getRegOrUndef(RegOp);
				5575	}
				5576	return TargetInstrInfo::RegSubRegPair();
				5577	}
				5578
				5579	// Try to find the definition of reg:subreg in subreg-manipulation pseudos
				5580	// Following a subreg of reg:subreg isn't supported
				5581	static bool followSubRegDef(MachineInstr &MI,
				5582	TargetInstrInfo::RegSubRegPair &RSR) {
				5583	if (!RSR.SubReg)
				5584	return false;
				5585	switch (MI.getOpcode()) {
				5586	default: break;
				5587	case AMDGPU::REG_SEQUENCE:
				5588	RSR = getRegSequenceSubReg(MI, RSR.SubReg);
				5589	return true;
				5590	// EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
				5591	case AMDGPU::INSERT_SUBREG:
				5592	if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
				5593	// inserted the subreg we're looking for
				5594	RSR = getRegOrUndef(MI.getOperand(2));
				5595	else { // the subreg in the rest of the reg
				5596	auto R1 = getRegOrUndef(MI.getOperand(1));
				5597	if (R1.SubReg) // subreg of subreg isn't supported
				5598	return false;
				5599	RSR.Reg = R1.Reg;
				5600	}
				5601	return true;
				5602	}
				5603	return false;
				5604	}
				5605
				5606	MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
				5607	MachineRegisterInfo &MRI) {
				5608	assert(MRI.isSSA());
				5609	if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
				5610	return nullptr;
				5611
				5612	auto RSR = P;
				5613	auto *DefInst = MRI.getVRegDef(RSR.Reg);
				5614	while (auto *MI = DefInst) {
				5615	DefInst = nullptr;
				5616	switch (MI->getOpcode()) {
				5617	case AMDGPU::COPY:
				5618	case AMDGPU::V_MOV_B32_e32: {
				5619	auto &Op1 = MI->getOperand(1);
				5620	if (Op1.isReg() &&
				5621	TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
				5622	if (Op1.isUndef())
				5623	return nullptr;
				5624	RSR = getRegSubRegPair(Op1);
				5625	DefInst = MRI.getVRegDef(RSR.Reg);
				5626	}
				5627	break;
				5628	}
				5629	default:
				5630	if (followSubRegDef(*MI, RSR)) {
				5631	if (!RSR.Reg)
				5632	return nullptr;
				5633	DefInst = MRI.getVRegDef(RSR.Reg);
				5634	}
				5635	}
				5636	if (!DefInst)
				5637	return MI;
				5638	}
				5639	return nullptr;
				5640	}