Blame - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp - toolchain/llvm-project

blob: d9dc000827baf55f595f0351cf04d9d8a3fa7cec [file] [log] [blame]

Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1	//===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	10	/// SI Implementation of TargetInstrInfo.
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	11	//
				12	//===----------------------------------------------------------------------===//
				13
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	14	#include "SIInstrInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	15	#include "AMDGPU.h"
				16	#include "AMDGPUSubtarget.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	17	#include "GCNHazardRecognizer.h"
Tom Stellard	16a9a20	2013-08-14 23:24:17 +0000	[diff] [blame]	18	#include "SIDefines.h"
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	19	#include "SIMachineFunctionInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	20	#include "SIRegisterInfo.h"
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	21	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	22	#include "Utils/AMDGPUBaseInfo.h"
				23	#include "llvm/ADT/APInt.h"
				24	#include "llvm/ADT/ArrayRef.h"
				25	#include "llvm/ADT/SmallVector.h"
				26	#include "llvm/ADT/StringRef.h"
				27	#include "llvm/ADT/iterator_range.h"
				28	#include "llvm/Analysis/AliasAnalysis.h"
				29	#include "llvm/Analysis/MemoryLocation.h"
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	30	#include "llvm/Analysis/ValueTracking.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	31	#include "llvm/CodeGen/MachineBasicBlock.h"
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	32	#include "llvm/CodeGen/MachineDominators.h"
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	33	#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	34	#include "llvm/CodeGen/MachineFunction.h"
				35	#include "llvm/CodeGen/MachineInstr.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	36	#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	37	#include "llvm/CodeGen/MachineInstrBundle.h"
				38	#include "llvm/CodeGen/MachineMemOperand.h"
				39	#include "llvm/CodeGen/MachineOperand.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	40	#include "llvm/CodeGen/MachineRegisterInfo.h"
Chandler Carruth	6bda14b	2017-06-06 11:49:48 +0000	[diff] [blame]	41	#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	42	#include "llvm/CodeGen/ScheduleDAG.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	43	#include "llvm/CodeGen/SelectionDAGNodes.h"
David Blaikie	b3bde2e	2017-11-17 01:07:10 +0000	[diff] [blame]	44	#include "llvm/CodeGen/TargetOpcodes.h"
				45	#include "llvm/CodeGen/TargetRegisterInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	46	#include "llvm/IR/DebugLoc.h"
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	47	#include "llvm/IR/DiagnosticInfo.h"
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	48	#include "llvm/IR/Function.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	49	#include "llvm/IR/InlineAsm.h"
				50	#include "llvm/IR/LLVMContext.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	51	#include "llvm/MC/MCInstrDesc.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	52	#include "llvm/Support/Casting.h"
				53	#include "llvm/Support/CommandLine.h"
				54	#include "llvm/Support/Compiler.h"
				55	#include "llvm/Support/ErrorHandling.h"
David Blaikie	13e77db	2018-03-23 23:58:25 +0000	[diff] [blame]	56	#include "llvm/Support/MachineValueType.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	57	#include "llvm/Support/MathExtras.h"
				58	#include "llvm/Target/TargetMachine.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	59	#include <cassert>
				60	#include <cstdint>
				61	#include <iterator>
				62	#include <utility>
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	63
				64	using namespace llvm;
				65
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	66	#define GET_INSTRINFO_CTOR_DTOR
				67	#include "AMDGPUGenInstrInfo.inc"
				68
				69	namespace llvm {
				70	namespace AMDGPU {
				71	#define GET_D16ImageDimIntrinsics_IMPL
				72	#define GET_ImageDimIntrinsicTable_IMPL
				73	#define GET_RsrcIntrinsics_IMPL
				74	#include "AMDGPUGenSearchableTables.inc"
				75	}
				76	}
				77
				78
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	79	// Must be at least 4 to be able to branch over minimum unconditional branch
				80	// code. This is only for making it possible to write reasonably small tests for
				81	// long branches.
				82	static cl::opt<unsigned>
				83	BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
				84	cl::desc("Restrict range of branch instructions (DEBUG)"));
				85
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	86	SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	87	: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
				88	RI(ST), ST(ST) {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	89
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	90	//===----------------------------------------------------------------------===//
				91	// TargetInstrInfo callbacks
				92	//===----------------------------------------------------------------------===//
				93
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	94	static unsigned getNumOperandsNoGlue(SDNode *Node) {
				95	unsigned N = Node->getNumOperands();
				96	while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
				97	--N;
				98	return N;
				99	}
				100
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	101	/// Returns true if both nodes have the same value for the given
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	102	/// operand \p Op, or if both nodes do not have this operand.
				103	static bool nodesHaveSameOperandValue(SDNode N0, SDNode N1, unsigned OpName) {
				104	unsigned Opc0 = N0->getMachineOpcode();
				105	unsigned Opc1 = N1->getMachineOpcode();
				106
				107	int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
				108	int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
				109
				110	if (Op0Idx == -1 && Op1Idx == -1)
				111	return true;
				112
				113
				114	if ((Op0Idx == -1 && Op1Idx != -1) \|\|
				115	(Op1Idx == -1 && Op0Idx != -1))
				116	return false;
				117
				118	// getNamedOperandIdx returns the index for the MachineInstr's operands,
				119	// which includes the result as the first operand. We are indexing into the
				120	// MachineSDNode's operands, so we need to skip the result operand to get
				121	// the real index.
				122	--Op0Idx;
				123	--Op1Idx;
				124
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	125	return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	126	}
				127
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	128	bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	129	AliasAnalysis *AA) const {
				130	// TODO: The generic check fails for VALU instructions that should be
				131	// rematerializable due to implicit reads of exec. We really want all of the
				132	// generic logic for this except for this.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	133	switch (MI.getOpcode()) {
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	134	case AMDGPU::V_MOV_B32_e32:
				135	case AMDGPU::V_MOV_B32_e64:
Matt Arsenault	80f766a	2015-09-10 01:23:28 +0000	[diff] [blame]	136	case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenault	cba0c6d	2019-02-04 22:26:21 +0000	[diff] [blame]	137	// No implicit operands.
				138	return MI.getNumOperands() == MI.getDesc().getNumOperands();
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	139	default:
				140	return false;
				141	}
				142	}
				143
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	144	bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1,
				145	int64_t &Offset0,
				146	int64_t &Offset1) const {
				147	if (!Load0->isMachineOpcode() \|\| !Load1->isMachineOpcode())
				148	return false;
				149
				150	unsigned Opc0 = Load0->getMachineOpcode();
				151	unsigned Opc1 = Load1->getMachineOpcode();
				152
				153	// Make sure both are actually loads.
				154	if (!get(Opc0).mayLoad() \|\| !get(Opc1).mayLoad())
				155	return false;
				156
				157	if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard	20fa0be	2014-10-07 21:09:20 +0000	[diff] [blame]	158
				159	// FIXME: Handle this case:
				160	if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
				161	return false;
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	162
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	163	// Check base reg.
Matt Arsenault	07f904b	2019-03-08 20:30:50 +0000	[diff] [blame]	164	if (Load0->getOperand(0) != Load1->getOperand(0))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	165	return false;
				166
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	167	// Skip read2 / write2 variants for simplicity.
				168	// TODO: We should report true if the used offsets are adjacent (excluded
				169	// st64 versions).
Matt Arsenault	bbc59d8	2019-03-27 15:41:00 +0000	[diff] [blame]	170	int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				171	int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				172	if (Offset0Idx == -1 \|\| Offset1Idx == -1)
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	173	return false;
				174
Matt Arsenault	bbc59d8	2019-03-27 15:41:00 +0000	[diff] [blame]	175	// XXX - be careful of datalesss loads
				176	// getNamedOperandIdx returns the index for MachineInstrs. Since they
				177	// include the output in the operand list, but SDNodes don't, we need to
				178	// subtract the index by one.
				179	Offset0Idx -= get(Opc0).NumDefs;
				180	Offset1Idx -= get(Opc1).NumDefs;
				181	Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue();
				182	Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	183	return true;
				184	}
				185
				186	if (isSMRD(Opc0) && isSMRD(Opc1)) {
Nicolai Haehnle	ef44978	2017-04-24 16:53:52 +0000	[diff] [blame]	187	// Skip time and cache invalidation instructions.
				188	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 \|\|
				189	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
				190	return false;
				191
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	192	assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
				193
				194	// Check base reg.
				195	if (Load0->getOperand(0) != Load1->getOperand(0))
				196	return false;
				197
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	198	const ConstantSDNode *Load0Offset =
				199	dyn_cast<ConstantSDNode>(Load0->getOperand(1));
				200	const ConstantSDNode *Load1Offset =
				201	dyn_cast<ConstantSDNode>(Load1->getOperand(1));
				202
				203	if (!Load0Offset \|\| !Load1Offset)
				204	return false;
				205
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	206	Offset0 = Load0Offset->getZExtValue();
				207	Offset1 = Load1Offset->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	208	return true;
				209	}
				210
				211	// MUBUF and MTBUF can access the same addresses.
				212	if ((isMUBUF(Opc0) \|\| isMTBUF(Opc0)) && (isMUBUF(Opc1) \|\| isMTBUF(Opc1))) {
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	213
				214	// MUBUF and MTBUF have vaddr at different indices.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	215	if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) \|\|
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	216	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) \|\|
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	217	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	218	return false;
				219
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	220	int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				221	int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				222
				223	if (OffIdx0 == -1 \|\| OffIdx1 == -1)
				224	return false;
				225
				226	// getNamedOperandIdx returns the index for MachineInstrs. Since they
Matt Arsenault	07f904b	2019-03-08 20:30:50 +0000	[diff] [blame]	227	// include the output in the operand list, but SDNodes don't, we need to
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	228	// subtract the index by one.
Matt Arsenault	28f97f1	2019-03-27 16:12:29 +0000	[diff] [blame]	229	OffIdx0 -= get(Opc0).NumDefs;
				230	OffIdx1 -= get(Opc1).NumDefs;
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	231
				232	SDValue Off0 = Load0->getOperand(OffIdx0);
				233	SDValue Off1 = Load1->getOperand(OffIdx1);
				234
				235	// The offset might be a FrameIndexSDNode.
				236	if (!isa<ConstantSDNode>(Off0) \|\| !isa<ConstantSDNode>(Off1))
				237	return false;
				238
				239	Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
				240	Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	241	return true;
				242	}
				243
				244	return false;
				245	}
				246
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	247	static bool isStride64(unsigned Opc) {
				248	switch (Opc) {
				249	case AMDGPU::DS_READ2ST64_B32:
				250	case AMDGPU::DS_READ2ST64_B64:
				251	case AMDGPU::DS_WRITE2ST64_B32:
				252	case AMDGPU::DS_WRITE2ST64_B64:
				253	return true;
				254	default:
				255	return false;
				256	}
				257	}
				258
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	259	bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
				260	const MachineOperand *&BaseOp,
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	261	int64_t &Offset,
				262	const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	263	unsigned Opc = LdSt.getOpcode();
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	264
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	265	if (isDS(LdSt)) {
				266	const MachineOperand *OffsetImm =
				267	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	268	if (OffsetImm) {
				269	// Normal, single offset LDS instruction.
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	270	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault	cdd191d	2019-01-28 20:14:49 +0000	[diff] [blame]	271	// TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
				272	// report that here?
				273	if (!BaseOp)
				274	return false;
				275
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	276	Offset = OffsetImm->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	277	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				278	"operands of type register.");
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	279	return true;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	280	}
				281
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	282	// The 2 offset instructions use offset0 and offset1 instead. We can treat
				283	// these as a load with a single offset if the 2 offsets are consecutive. We
				284	// will use this for some partially aligned loads.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	285	const MachineOperand *Offset0Imm =
				286	getNamedOperand(LdSt, AMDGPU::OpName::offset0);
				287	const MachineOperand *Offset1Imm =
				288	getNamedOperand(LdSt, AMDGPU::OpName::offset1);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	289
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	290	uint8_t Offset0 = Offset0Imm->getImm();
				291	uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	292
Matt Arsenault	84db5d9	2015-07-14 17:57:36 +0000	[diff] [blame]	293	if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	294	// Each of these offsets is in element sized units, so we need to convert
				295	// to bytes of the individual reads.
				296
				297	unsigned EltSize;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	298	if (LdSt.mayLoad())
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	299	EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	300	else {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	301	assert(LdSt.mayStore());
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	302	int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	303	EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	304	}
				305
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	306	if (isStride64(Opc))
				307	EltSize *= 64;
				308
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	309	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	310	Offset = EltSize * Offset0;
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	311	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				312	"operands of type register.");
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	313	return true;
				314	}
				315
				316	return false;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	317	}
				318
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	319	if (isMUBUF(LdSt) \|\| isMTBUF(LdSt)) {
Matt Arsenault	3666629	2016-11-15 20:14:27 +0000	[diff] [blame]	320	const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
				321	if (SOffset && SOffset->isReg())
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	322	return false;
				323
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	324	const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	325	if (!AddrReg)
				326	return false;
				327
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	328	const MachineOperand *OffsetImm =
				329	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	330	BaseOp = AddrReg;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	331	Offset = OffsetImm->getImm();
Matt Arsenault	3666629	2016-11-15 20:14:27 +0000	[diff] [blame]	332
				333	if (SOffset) // soffset can be an inline immediate.
				334	Offset += SOffset->getImm();
				335
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	336	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				337	"operands of type register.");
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	338	return true;
				339	}
				340
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	341	if (isSMRD(LdSt)) {
				342	const MachineOperand *OffsetImm =
				343	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	344	if (!OffsetImm)
				345	return false;
				346
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	347	const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	348	BaseOp = SBaseReg;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	349	Offset = OffsetImm->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	350	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				351	"operands of type register.");
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	352	return true;
				353	}
				354
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	355	if (isFLAT(LdSt)) {
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	356	const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	357	if (VAddr) {
				358	// Can't analyze 2 offsets.
				359	if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
				360	return false;
				361
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	362	BaseOp = VAddr;
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	363	} else {
				364	// scratch instructions have either vaddr or saddr.
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	365	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	366	}
				367
				368	Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	369	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				370	"operands of type register.");
Matt Arsenault	43578ec	2016-06-02 20:05:20 +0000	[diff] [blame]	371	return true;
				372	}
				373
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	374	return false;
				375	}
				376
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	377	static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
				378	const MachineOperand &BaseOp1,
				379	const MachineInstr &MI2,
				380	const MachineOperand &BaseOp2) {
				381	// Support only base operands with base registers.
				382	// Note: this could be extended to support FI operands.
				383	if (!BaseOp1.isReg() \|\| !BaseOp2.isReg())
				384	return false;
				385
				386	if (BaseOp1.isIdenticalTo(BaseOp2))
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	387	return true;
				388
				389	if (!MI1.hasOneMemOperand() \|\| !MI2.hasOneMemOperand())
				390	return false;
				391
				392	auto MO1 = *MI1.memoperands_begin();
				393	auto MO2 = *MI2.memoperands_begin();
				394	if (MO1->getAddrSpace() != MO2->getAddrSpace())
				395	return false;
				396
				397	auto Base1 = MO1->getValue();
				398	auto Base2 = MO2->getValue();
				399	if (!Base1 \|\| !Base2)
				400	return false;
				401	const MachineFunction &MF = *MI1.getParent()->getParent();
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	402	const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	403	Base1 = GetUnderlyingObject(Base1, DL);
				404	Base2 = GetUnderlyingObject(Base1, DL);
				405
				406	if (isa<UndefValue>(Base1) \|\| isa<UndefValue>(Base2))
				407	return false;
				408
				409	return Base1 == Base2;
				410	}
				411
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	412	bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
				413	const MachineOperand &BaseOp2,
Jun Bum Lim	4c5bd58	2016-04-15 14:58:38 +0000	[diff] [blame]	414	unsigned NumLoads) const {
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	415	const MachineInstr &FirstLdSt = *BaseOp1.getParent();
				416	const MachineInstr &SecondLdSt = *BaseOp2.getParent();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	417
				418	if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	419	return false;
				420
NAKAMURA Takumi	fe1202c	2016-06-20 00:37:41 +0000	[diff] [blame]	421	const MachineOperand *FirstDst = nullptr;
				422	const MachineOperand *SecondDst = nullptr;
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	423
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	424	if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) \|\|
Matt Arsenault	74f6483	2017-02-01 20:22:51 +0000	[diff] [blame]	425	(isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) \|\|
				426	(isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	427	const unsigned MaxGlobalLoadCluster = 6;
				428	if (NumLoads > MaxGlobalLoadCluster)
				429	return false;
				430
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	431	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin	949fac9	2017-09-06 15:31:30 +0000	[diff] [blame]	432	if (!FirstDst)
				433	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	434	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin	949fac9	2017-09-06 15:31:30 +0000	[diff] [blame]	435	if (!SecondDst)
				436	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Matt Arsenault	437fd71	2016-11-29 19:30:41 +0000	[diff] [blame]	437	} else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
				438	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
				439	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
				440	} else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
				441	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
				442	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	443	}
				444
				445	if (!FirstDst \|\| !SecondDst)
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	446	return false;
				447
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	448	// Try to limit clustering based on the total number of bytes loaded
				449	// rather than the number of instructions. This is done to help reduce
				450	// register pressure. The method used is somewhat inexact, though,
				451	// because it assumes that all loads in the cluster will load the
				452	// same number of bytes as FirstLdSt.
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	453
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	454	// The unit of this value is bytes.
				455	// FIXME: This needs finer tuning.
				456	unsigned LoadClusterThreshold = 16;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	457
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	458	const MachineRegisterInfo &MRI =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	459	FirstLdSt.getParent()->getParent()->getRegInfo();
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	460
				461	const unsigned Reg = FirstDst->getReg();
				462
				463	const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg)
				464	? MRI.getRegClass(Reg)
				465	: RI.getPhysRegClass(Reg);
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	466
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	467	return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	468	}
				469
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	470	// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
				471	// the first 16 loads will be interleaved with the stores, and the next 16 will
				472	// be clustered as expected. It should really split into 2 16 store batches.
				473	//
				474	// Loads are clustered until this returns false, rather than trying to schedule
				475	// groups of stores. This also means we have to deal with saying different
				476	// address space loads should be clustered, and ones which might cause bank
				477	// conflicts.
				478	//
				479	// This might be deprecated so it might not be worth that much effort to fix.
				480	bool SIInstrInfo::shouldScheduleLoadsNear(SDNode Load0, SDNode Load1,
				481	int64_t Offset0, int64_t Offset1,
				482	unsigned NumLoads) const {
				483	assert(Offset1 > Offset0 &&
				484	"Second offset should be larger than first offset!");
				485	// If we have less than 16 loads in a row, and the offsets are within 64
				486	// bytes, then schedule together.
				487
				488	// A cacheline is 64 bytes (for global memory).
				489	return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
				490	}
				491
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	492	static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
				493	MachineBasicBlock::iterator MI,
				494	const DebugLoc &DL, unsigned DestReg,
				495	unsigned SrcReg, bool KillSrc) {
				496	MachineFunction *MF = MBB.getParent();
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	497	DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	498	"illegal SGPR to VGPR copy",
				499	DL, DS_Error);
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	500	LLVMContext &C = MF->getFunction().getContext();
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	501	C.diagnose(IllegalCopy);
				502
				503	BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
				504	.addReg(SrcReg, getKillRegState(KillSrc));
				505	}
				506
Benjamin Kramer	bdc4956	2016-06-12 15:39:02 +0000	[diff] [blame]	507	void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
				508	MachineBasicBlock::iterator MI,
				509	const DebugLoc &DL, unsigned DestReg,
				510	unsigned SrcReg, bool KillSrc) const {
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	511	const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	512
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	513	if (RC == &AMDGPU::VGPR_32RegClass) {
				514	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) \|\|
				515	AMDGPU::SReg_32RegClass.contains(SrcReg));
				516	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				517	.addReg(SrcReg, getKillRegState(KillSrc));
				518	return;
				519	}
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	520
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	521	if (RC == &AMDGPU::SReg_32_XM0RegClass \|\|
				522	RC == &AMDGPU::SReg_32RegClass) {
Nicolai Haehnle	e58e0e3	2016-09-12 16:25:20 +0000	[diff] [blame]	523	if (SrcReg == AMDGPU::SCC) {
				524	BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
				525	.addImm(-1)
				526	.addImm(0);
				527	return;
				528	}
				529
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	530	if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
				531	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				532	return;
				533	}
				534
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	535	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				536	.addReg(SrcReg, getKillRegState(KillSrc));
				537	return;
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	538	}
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	539
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	540	if (RC == &AMDGPU::SReg_64RegClass) {
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	541	if (DestReg == AMDGPU::VCC) {
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	542	if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				543	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
				544	.addReg(SrcReg, getKillRegState(KillSrc));
				545	} else {
				546	// FIXME: Hack until VReg_1 removed.
				547	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault	5d8eb25	2016-09-30 01:50:20 +0000	[diff] [blame]	548	BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	549	.addImm(0)
				550	.addReg(SrcReg, getKillRegState(KillSrc));
				551	}
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	552
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	553	return;
				554	}
				555
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	556	if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				557	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				558	return;
				559	}
				560
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	561	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				562	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	563	return;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	564	}
				565
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	566	if (DestReg == AMDGPU::SCC) {
				567	assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
				568	BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
				569	.addReg(SrcReg, getKillRegState(KillSrc))
				570	.addImm(0);
				571	return;
				572	}
				573
				574	unsigned EltSize = 4;
				575	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
				576	if (RI.isSGPRClass(RC)) {
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	577	// TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
				578	if (!(RI.getRegSizeInBits(*RC) % 64)) {
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	579	Opcode = AMDGPU::S_MOV_B64;
				580	EltSize = 8;
				581	} else {
				582	Opcode = AMDGPU::S_MOV_B32;
				583	EltSize = 4;
				584	}
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	585
				586	if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
				587	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				588	return;
				589	}
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	590	}
				591
				592	ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
Matt Arsenault	73d2f89	2016-07-15 22:32:02 +0000	[diff] [blame]	593	bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	594
				595	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				596	unsigned SubIdx;
				597	if (Forward)
				598	SubIdx = SubIndices[Idx];
				599	else
				600	SubIdx = SubIndices[SubIndices.size() - Idx - 1];
				601
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	602	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				603	get(Opcode), RI.getSubReg(DestReg, SubIdx));
				604
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	605	Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	606
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	607	if (Idx == 0)
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	608	Builder.addReg(DestReg, RegState::Define \| RegState::Implicit);
Matt Arsenault	73d2f89	2016-07-15 22:32:02 +0000	[diff] [blame]	609
Matt Arsenault	05c2647	2017-06-12 17:19:20 +0000	[diff] [blame]	610	bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
				611	Builder.addReg(SrcReg, getKillRegState(UseKill) \| RegState::Implicit);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	612	}
				613	}
				614
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	615	int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	616	int NewOpc;
				617
				618	// Try to map original to commuted opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	619	NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	620	if (NewOpc != -1)
				621	// Check if the commuted (REV) opcode exists on the target.
				622	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	623
				624	// Try to map commuted to original opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	625	NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	626	if (NewOpc != -1)
				627	// Check if the original (non-REV) opcode exists on the target.
				628	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	629
				630	return Opcode;
				631	}
				632
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	633	void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
				634	MachineBasicBlock::iterator MI,
				635	const DebugLoc &DL, unsigned DestReg,
				636	int64_t Value) const {
				637	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				638	const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
				639	if (RegClass == &AMDGPU::SReg_32RegClass \|\|
				640	RegClass == &AMDGPU::SGPR_32RegClass \|\|
				641	RegClass == &AMDGPU::SReg_32_XM0RegClass \|\|
				642	RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
				643	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				644	.addImm(Value);
				645	return;
				646	}
				647
				648	if (RegClass == &AMDGPU::SReg_64RegClass \|\|
				649	RegClass == &AMDGPU::SGPR_64RegClass \|\|
				650	RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
				651	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				652	.addImm(Value);
				653	return;
				654	}
				655
				656	if (RegClass == &AMDGPU::VGPR_32RegClass) {
				657	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				658	.addImm(Value);
				659	return;
				660	}
				661	if (RegClass == &AMDGPU::VReg_64RegClass) {
				662	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
				663	.addImm(Value);
				664	return;
				665	}
				666
				667	unsigned EltSize = 4;
				668	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
				669	if (RI.isSGPRClass(RegClass)) {
				670	if (RI.getRegSizeInBits(*RegClass) > 32) {
				671	Opcode = AMDGPU::S_MOV_B64;
				672	EltSize = 8;
				673	} else {
				674	Opcode = AMDGPU::S_MOV_B32;
				675	EltSize = 4;
				676	}
				677	}
				678
				679	ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
				680	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				681	int64_t IdxValue = Idx == 0 ? Value : 0;
				682
				683	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				684	get(Opcode), RI.getSubReg(DestReg, Idx));
				685	Builder.addImm(IdxValue);
				686	}
				687	}
				688
				689	const TargetRegisterClass *
				690	SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
				691	return &AMDGPU::VGPR_32RegClass;
				692	}
				693
				694	void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
				695	MachineBasicBlock::iterator I,
				696	const DebugLoc &DL, unsigned DstReg,
				697	ArrayRef<MachineOperand> Cond,
				698	unsigned TrueReg,
				699	unsigned FalseReg) const {
				700	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
NAKAMURA Takumi	994a43d	2017-05-16 04:01:23 +0000	[diff] [blame]	701	assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
				702	"Not a VGPR32 reg");
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	703
				704	if (Cond.size() == 1) {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	705	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				706	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				707	.add(Cond[0]);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	708	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	709	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	710	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	711	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	712	.addReg(TrueReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	713	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	714	} else if (Cond.size() == 2) {
				715	assert(Cond[0].isImm() && "Cond[0] is not an immediate");
				716	switch (Cond[0].getImm()) {
				717	case SIInstrInfo::SCC_TRUE: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	718	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	719	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				720	.addImm(-1)
				721	.addImm(0);
				722	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	723	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	724	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	725	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	726	.addReg(TrueReg)
				727	.addReg(SReg);
				728	break;
				729	}
				730	case SIInstrInfo::SCC_FALSE: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	731	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	732	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				733	.addImm(0)
				734	.addImm(-1);
				735	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	736	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	737	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	738	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	739	.addReg(TrueReg)
				740	.addReg(SReg);
				741	break;
				742	}
				743	case SIInstrInfo::VCCNZ: {
				744	MachineOperand RegOp = Cond[1];
				745	RegOp.setImplicit(false);
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	746	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				747	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				748	.add(RegOp);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	749	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	750	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	751	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	752	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	753	.addReg(TrueReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	754	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	755	break;
				756	}
				757	case SIInstrInfo::VCCZ: {
				758	MachineOperand RegOp = Cond[1];
				759	RegOp.setImplicit(false);
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	760	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				761	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				762	.add(RegOp);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	763	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	764	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	765	.addReg(TrueReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	766	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	767	.addReg(FalseReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	768	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	769	break;
				770	}
				771	case SIInstrInfo::EXECNZ: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	772	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	773	unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				774	BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
				775	.addImm(0);
				776	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				777	.addImm(-1)
				778	.addImm(0);
				779	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	780	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	781	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	782	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	783	.addReg(TrueReg)
				784	.addReg(SReg);
				785	break;
				786	}
				787	case SIInstrInfo::EXECZ: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	788	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	789	unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				790	BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
				791	.addImm(0);
				792	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				793	.addImm(0)
				794	.addImm(-1);
				795	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	796	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	797	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	798	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	799	.addReg(TrueReg)
				800	.addReg(SReg);
				801	llvm_unreachable("Unhandled branch predicate EXECZ");
				802	break;
				803	}
				804	default:
				805	llvm_unreachable("invalid branch predicate");
				806	}
				807	} else {
				808	llvm_unreachable("Can only handle Cond size 1 or 2");
				809	}
				810	}
				811
				812	unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
				813	MachineBasicBlock::iterator I,
				814	const DebugLoc &DL,
				815	unsigned SrcReg, int Value) const {
				816	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				817	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				818	BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
				819	.addImm(Value)
				820	.addReg(SrcReg);
				821
				822	return Reg;
				823	}
				824
				825	unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
				826	MachineBasicBlock::iterator I,
				827	const DebugLoc &DL,
				828	unsigned SrcReg, int Value) const {
				829	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				830	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				831	BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
				832	.addImm(Value)
				833	.addReg(SrcReg);
				834
				835	return Reg;
				836	}
				837
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	838	unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
				839
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	840	if (RI.getRegSizeInBits(*DstRC) == 32) {
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	841	return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	842	} else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	843	return AMDGPU::S_MOV_B64;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	844	} else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	845	return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	846	}
				847	return AMDGPU::COPY;
				848	}
				849
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	850	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
				851	switch (Size) {
				852	case 4:
				853	return AMDGPU::SI_SPILL_S32_SAVE;
				854	case 8:
				855	return AMDGPU::SI_SPILL_S64_SAVE;
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	856	case 12:
				857	return AMDGPU::SI_SPILL_S96_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	858	case 16:
				859	return AMDGPU::SI_SPILL_S128_SAVE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	860	case 20:
				861	return AMDGPU::SI_SPILL_S160_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	862	case 32:
				863	return AMDGPU::SI_SPILL_S256_SAVE;
				864	case 64:
				865	return AMDGPU::SI_SPILL_S512_SAVE;
				866	default:
				867	llvm_unreachable("unknown register size");
				868	}
				869	}
				870
				871	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
				872	switch (Size) {
				873	case 4:
				874	return AMDGPU::SI_SPILL_V32_SAVE;
				875	case 8:
				876	return AMDGPU::SI_SPILL_V64_SAVE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	877	case 12:
				878	return AMDGPU::SI_SPILL_V96_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	879	case 16:
				880	return AMDGPU::SI_SPILL_V128_SAVE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	881	case 20:
				882	return AMDGPU::SI_SPILL_V160_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	883	case 32:
				884	return AMDGPU::SI_SPILL_V256_SAVE;
				885	case 64:
				886	return AMDGPU::SI_SPILL_V512_SAVE;
				887	default:
				888	llvm_unreachable("unknown register size");
				889	}
				890	}
				891
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	892	void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
				893	MachineBasicBlock::iterator MI,
				894	unsigned SrcReg, bool isKill,
				895	int FrameIndex,
				896	const TargetRegisterClass *RC,
				897	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	898	MachineFunction *MF = MBB.getParent();
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	899	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	900	MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	901	const DebugLoc &DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	902
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	903	unsigned Size = FrameInfo.getObjectSize(FrameIndex);
				904	unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	905	MachinePointerInfo PtrInfo
				906	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				907	MachineMemOperand *MMO
				908	= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
				909	Size, Align);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	910	unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	911
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	912	if (RI.isSGPRClass(RC)) {
Matt Arsenault	5b22dfa	2015-11-05 05:27:10 +0000	[diff] [blame]	913	MFI->setHasSpilledSGPRs();
				914
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	915	// We are only allowed to create one new instruction when spilling
				916	// registers, so we need to use pseudo instruction for spilling SGPRs.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	917	const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	918
				919	// The SGPR spill/restore instructions only work on number sgprs, so we need
				920	// to make sure we are using the correct register class.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	921	if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
Matt Arsenault	b6e1cc2	2016-05-21 00:53:42 +0000	[diff] [blame]	922	MachineRegisterInfo &MRI = MF->getRegInfo();
				923	MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
				924	}
				925
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	926	MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	927	.addReg(SrcReg, getKillRegState(isKill)) // data
				928	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	929	.addMemOperand(MMO)
				930	.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	931	.addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	932	// Add the scratch resource registers as implicit uses because we may end up
				933	// needing them, and need to ensure that the reserved registers are
				934	// correctly handled.
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	935
Matt Arsenault	adc59d7	2018-04-23 15:51:26 +0000	[diff] [blame]	936	FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	937	if (ST.hasScalarStores()) {
				938	// m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle	43cc6c4	2017-06-27 08:04:13 +0000	[diff] [blame]	939	Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine \| RegState::Dead);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	940	}
				941
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	942	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	943	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	944
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	945	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				946
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	947	unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	948	MFI->setHasSpilledVGPRs();
				949	BuildMI(MBB, MI, DL, get(Opcode))
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	950	.addReg(SrcReg, getKillRegState(isKill)) // data
				951	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	952	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	953	.addReg(MFI->getFrameOffsetReg()) // scratch_offset
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	954	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	955	.addMemOperand(MMO);
				956	}
				957
				958	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
				959	switch (Size) {
				960	case 4:
				961	return AMDGPU::SI_SPILL_S32_RESTORE;
				962	case 8:
				963	return AMDGPU::SI_SPILL_S64_RESTORE;
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	964	case 12:
				965	return AMDGPU::SI_SPILL_S96_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	966	case 16:
				967	return AMDGPU::SI_SPILL_S128_RESTORE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	968	case 20:
				969	return AMDGPU::SI_SPILL_S160_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	970	case 32:
				971	return AMDGPU::SI_SPILL_S256_RESTORE;
				972	case 64:
				973	return AMDGPU::SI_SPILL_S512_RESTORE;
				974	default:
				975	llvm_unreachable("unknown register size");
				976	}
				977	}
				978
				979	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
				980	switch (Size) {
				981	case 4:
				982	return AMDGPU::SI_SPILL_V32_RESTORE;
				983	case 8:
				984	return AMDGPU::SI_SPILL_V64_RESTORE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	985	case 12:
				986	return AMDGPU::SI_SPILL_V96_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	987	case 16:
				988	return AMDGPU::SI_SPILL_V128_RESTORE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	989	case 20:
				990	return AMDGPU::SI_SPILL_V160_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	991	case 32:
				992	return AMDGPU::SI_SPILL_V256_RESTORE;
				993	case 64:
				994	return AMDGPU::SI_SPILL_V512_RESTORE;
				995	default:
				996	llvm_unreachable("unknown register size");
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	997	}
				998	}
				999
				1000	void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
				1001	MachineBasicBlock::iterator MI,
				1002	unsigned DestReg, int FrameIndex,
				1003	const TargetRegisterClass *RC,
				1004	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	1005	MachineFunction *MF = MBB.getParent();
Matt Arsenault	88ce3dc	2018-11-26 21:28:40 +0000	[diff] [blame]	1006	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	1007	MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1008	const DebugLoc &DL = MBB.findDebugLoc(MI);
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	1009	unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
				1010	unsigned Size = FrameInfo.getObjectSize(FrameIndex);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1011	unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	1012
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1013	MachinePointerInfo PtrInfo
				1014	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				1015
				1016	MachineMemOperand *MMO = MF->getMachineMemOperand(
				1017	PtrInfo, MachineMemOperand::MOLoad, Size, Align);
				1018
				1019	if (RI.isSGPRClass(RC)) {
Matt Arsenault	88ce3dc	2018-11-26 21:28:40 +0000	[diff] [blame]	1020	MFI->setHasSpilledSGPRs();
				1021
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1022	// FIXME: Maybe this should not include a memoperand because it will be
				1023	// lowered to non-memory instructions.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1024	const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
				1025	if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
Matt Arsenault	b6e1cc2	2016-05-21 00:53:42 +0000	[diff] [blame]	1026	MachineRegisterInfo &MRI = MF->getRegInfo();
				1027	MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
				1028	}
				1029
Matt Arsenault	adc59d7	2018-04-23 15:51:26 +0000	[diff] [blame]	1030	FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1031	MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	1032	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	1033	.addMemOperand(MMO)
				1034	.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	1035	.addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1036
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1037	if (ST.hasScalarStores()) {
				1038	// m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle	43cc6c4	2017-06-27 08:04:13 +0000	[diff] [blame]	1039	Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine \| RegState::Dead);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1040	}
				1041
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1042	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1043	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1044
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1045	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				1046
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1047	unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1048	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	1049	.addFrameIndex(FrameIndex) // vaddr
				1050	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				1051	.addReg(MFI->getFrameOffsetReg()) // scratch_offset
				1052	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1053	.addMemOperand(MMO);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	1054	}
				1055
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1056	/// \param @Offset Offset in bytes of the FrameIndex being spilled
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1057	unsigned SIInstrInfo::calculateLDSSpillAddress(
				1058	MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
				1059	unsigned FrameOffset, unsigned Size) const {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1060	MachineFunction *MF = MBB.getParent();
				1061	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	1062	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1063	const DebugLoc &DL = MBB.findDebugLoc(MI);
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	1064	unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1065	unsigned WavefrontSize = ST.getWavefrontSize();
				1066
				1067	unsigned TIDReg = MFI->getTIDReg();
				1068	if (!MFI->hasCalculatedTID()) {
				1069	MachineBasicBlock &Entry = MBB.getParent()->front();
				1070	MachineBasicBlock::iterator Insert = Entry.front();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1071	const DebugLoc &DL = Insert->getDebugLoc();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1072
Tom Stellard	19f4301	2016-07-28 14:30:43 +0000	[diff] [blame]	1073	TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
				1074	*MF);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1075	if (TIDReg == AMDGPU::NoRegister)
				1076	return TIDReg;
				1077
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	1078	if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) &&
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1079	WorkGroupSize > WavefrontSize) {
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1080	unsigned TIDIGXReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1081	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1082	unsigned TIDIGYReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1083	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1084	unsigned TIDIGZReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1085	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1086	unsigned InputPtrReg =
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1087	MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer	7149aab	2015-03-01 18:09:56 +0000	[diff] [blame]	1088	for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1089	if (!Entry.isLiveIn(Reg))
				1090	Entry.addLiveIn(Reg);
				1091	}
				1092
Matthias Braun	7dc03f0	2016-04-06 02:47:09 +0000	[diff] [blame]	1093	RS->enterBasicBlock(Entry);
Matt Arsenault	0c90e95	2015-11-06 18:17:45 +0000	[diff] [blame]	1094	// FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1095	unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				1096	unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				1097	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
				1098	.addReg(InputPtrReg)
				1099	.addImm(SI::KernelInputOffsets::NGROUPS_Z);
				1100	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
				1101	.addReg(InputPtrReg)
				1102	.addImm(SI::KernelInputOffsets::NGROUPS_Y);
				1103
				1104	// NGROUPS.X * NGROUPS.Y
				1105	BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
				1106	.addReg(STmp1)
				1107	.addReg(STmp0);
				1108	// (NGROUPS.X * NGROUPS.Y) * TIDIG.X
				1109	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
				1110	.addReg(STmp1)
				1111	.addReg(TIDIGXReg);
				1112	// NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
				1113	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
				1114	.addReg(STmp0)
				1115	.addReg(TIDIGYReg)
				1116	.addReg(TIDReg);
				1117	// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	1118	getAddNoCarry(Entry, Insert, DL, TIDReg)
				1119	.addReg(TIDReg)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	1120	.addReg(TIDIGZReg)
				1121	.addImm(0); // clamp bit
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1122	} else {
				1123	// Get the wave id
				1124	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
				1125	TIDReg)
				1126	.addImm(-1)
				1127	.addImm(0);
				1128
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	1129	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1130	TIDReg)
				1131	.addImm(-1)
				1132	.addReg(TIDReg);
				1133	}
				1134
				1135	BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
				1136	TIDReg)
				1137	.addImm(2)
				1138	.addReg(TIDReg);
				1139	MFI->setTIDReg(TIDReg);
				1140	}
				1141
				1142	// Add FrameIndex to LDS offset
Matt Arsenault	52ef401	2016-07-26 16:45:58 +0000	[diff] [blame]	1143	unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	1144	getAddNoCarry(MBB, MI, DL, TmpReg)
				1145	.addImm(LDSOffset)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	1146	.addReg(TIDReg)
				1147	.addImm(0); // clamp bit
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1148
				1149	return TmpReg;
				1150	}
				1151
Tom Stellard	d37630e	2016-04-07 14:47:07 +0000	[diff] [blame]	1152	void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
				1153	MachineBasicBlock::iterator MI,
Nicolai Haehnle	87323da	2015-12-17 16:46:42 +0000	[diff] [blame]	1154	int Count) const {
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	1155	DebugLoc DL = MBB.findDebugLoc(MI);
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1156	while (Count > 0) {
				1157	int Arg;
				1158	if (Count >= 8)
				1159	Arg = 7;
				1160	else
				1161	Arg = Count - 1;
				1162	Count -= 8;
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	1163	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1164	.addImm(Arg);
				1165	}
				1166	}
				1167
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	1168	void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
				1169	MachineBasicBlock::iterator MI) const {
				1170	insertWaitStates(MBB, MI, 1);
				1171	}
				1172
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1173	void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
				1174	auto MF = MBB.getParent();
				1175	SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
				1176
				1177	assert(Info->isEntryFunction());
				1178
				1179	if (MBB.succ_empty()) {
				1180	bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
David Stuttard	20ea21c	2019-03-12 09:52:58 +0000	[diff] [blame]	1181	if (HasNoTerminator) {
				1182	if (Info->returnsVoid()) {
				1183	BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0);
				1184	} else {
				1185	BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG));
				1186	}
				1187	}
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1188	}
				1189	}
				1190
Stanislav Mekhanoshin	f92ed69	2019-01-21 19:11:26 +0000	[diff] [blame]	1191	unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	1192	switch (MI.getOpcode()) {
				1193	default: return 1; // FIXME: Do wait states equal cycles?
				1194
				1195	case AMDGPU::S_NOP:
				1196	return MI.getOperand(0).getImm() + 1;
				1197	}
				1198	}
				1199
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1200	bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
				1201	MachineBasicBlock &MBB = *MI.getParent();
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1202	DebugLoc DL = MBB.findDebugLoc(MI);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1203	switch (MI.getOpcode()) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	1204	default: return TargetInstrInfo::expandPostRAPseudo(MI);
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1205	case AMDGPU::S_MOV_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1206	// This is only a terminator to get the correct spill code placement during
				1207	// register allocation.
				1208	MI.setDesc(get(AMDGPU::S_MOV_B64));
				1209	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1210
				1211	case AMDGPU::S_XOR_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1212	// This is only a terminator to get the correct spill code placement during
				1213	// register allocation.
				1214	MI.setDesc(get(AMDGPU::S_XOR_B64));
				1215	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1216
Matt Arsenault	396653f	2019-04-03 20:53:20 +0000	[diff] [blame]	1217	case AMDGPU::S_OR_B64_term:
				1218	// This is only a terminator to get the correct spill code placement during
				1219	// register allocation.
				1220	MI.setDesc(get(AMDGPU::S_OR_B64));
				1221	break;
				1222
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1223	case AMDGPU::S_ANDN2_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1224	// This is only a terminator to get the correct spill code placement during
				1225	// register allocation.
				1226	MI.setDesc(get(AMDGPU::S_ANDN2_B64));
				1227	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1228
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1229	case AMDGPU::V_MOV_B64_PSEUDO: {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1230	unsigned Dst = MI.getOperand(0).getReg();
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1231	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				1232	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				1233
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1234	const MachineOperand &SrcOp = MI.getOperand(1);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1235	// FIXME: Will this work for 64-bit floating point immediates?
				1236	assert(!SrcOp.isFPImm());
				1237	if (SrcOp.isImm()) {
				1238	APInt Imm(64, SrcOp.getImm());
				1239	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1240	.addImm(Imm.getLoBits(32).getZExtValue())
				1241	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1242	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1243	.addImm(Imm.getHiBits(32).getZExtValue())
				1244	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1245	} else {
				1246	assert(SrcOp.isReg());
				1247	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1248	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
				1249	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1250	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1251	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
				1252	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1253	}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1254	MI.eraseFromParent();
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1255	break;
				1256	}
Connor Abbott	66b9bd6	2017-08-04 18:36:54 +0000	[diff] [blame]	1257	case AMDGPU::V_SET_INACTIVE_B32: {
				1258	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1259	.addReg(AMDGPU::EXEC);
				1260	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
				1261	.add(MI.getOperand(2));
				1262	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1263	.addReg(AMDGPU::EXEC);
				1264	MI.eraseFromParent();
				1265	break;
				1266	}
				1267	case AMDGPU::V_SET_INACTIVE_B64: {
				1268	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1269	.addReg(AMDGPU::EXEC);
				1270	MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
				1271	MI.getOperand(0).getReg())
				1272	.add(MI.getOperand(2));
				1273	expandPostRAPseudo(*Copy);
				1274	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1275	.addReg(AMDGPU::EXEC);
				1276	MI.eraseFromParent();
				1277	break;
				1278	}
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1279	case AMDGPU::V_MOVRELD_B32_V1:
				1280	case AMDGPU::V_MOVRELD_B32_V2:
				1281	case AMDGPU::V_MOVRELD_B32_V4:
				1282	case AMDGPU::V_MOVRELD_B32_V8:
				1283	case AMDGPU::V_MOVRELD_B32_V16: {
				1284	const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
				1285	unsigned VecReg = MI.getOperand(0).getReg();
				1286	bool IsUndef = MI.getOperand(1).isUndef();
				1287	unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
				1288	assert(VecReg == MI.getOperand(1).getReg());
				1289
				1290	MachineInstr *MovRel =
				1291	BuildMI(MBB, MI, DL, MovRelDesc)
				1292	.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1293	.add(MI.getOperand(2))
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1294	.addReg(VecReg, RegState::ImplicitDefine)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1295	.addReg(VecReg,
				1296	RegState::Implicit \| (IsUndef ? RegState::Undef : 0));
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1297
				1298	const int ImpDefIdx =
				1299	MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
				1300	const int ImpUseIdx = ImpDefIdx + 1;
				1301	MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
				1302
				1303	MI.eraseFromParent();
				1304	break;
				1305	}
Tom Stellard	bf3e6e5	2016-06-14 20:29:59 +0000	[diff] [blame]	1306	case AMDGPU::SI_PC_ADD_REL_OFFSET: {
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1307	MachineFunction &MF = *MBB.getParent();
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1308	unsigned Reg = MI.getOperand(0).getReg();
Matt Arsenault	11587d9	2016-08-10 19:11:45 +0000	[diff] [blame]	1309	unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
				1310	unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1311
				1312	// Create a bundle so these instructions won't be re-ordered by the
				1313	// post-RA scheduler.
				1314	MIBundleBuilder Bundler(MBB, MI);
				1315	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
				1316
				1317	// Add 32-bit offset from this instruction to the start of the
				1318	// constant data.
				1319	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1320	.addReg(RegLo)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1321	.add(MI.getOperand(1)));
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1322
Konstantin Zhuravlyov	c96b5d7	2016-10-14 04:37:34 +0000	[diff] [blame]	1323	MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
				1324	.addReg(RegHi);
				1325	if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
				1326	MIB.addImm(0);
				1327	else
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1328	MIB.add(MI.getOperand(2));
Konstantin Zhuravlyov	c96b5d7	2016-10-14 04:37:34 +0000	[diff] [blame]	1329
				1330	Bundler.append(MIB);
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1331	finalizeBundle(MBB, Bundler.begin());
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1332
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1333	MI.eraseFromParent();
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1334	break;
				1335	}
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	1336	case AMDGPU::ENTER_WWM: {
				1337	// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
				1338	// WWM is entered.
				1339	MI.setDesc(get(AMDGPU::S_OR_SAVEEXEC_B64));
				1340	break;
				1341	}
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	1342	case AMDGPU::EXIT_WWM: {
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	1343	// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
				1344	// WWM is exited.
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	1345	MI.setDesc(get(AMDGPU::S_MOV_B64));
				1346	break;
				1347	}
Stanislav Mekhanoshin	739174c	2018-05-31 20:13:51 +0000	[diff] [blame]	1348	case TargetOpcode::BUNDLE: {
				1349	if (!MI.mayLoad())
				1350	return false;
				1351
				1352	// If it is a load it must be a memory clause
				1353	for (MachineBasicBlock::instr_iterator I = MI.getIterator();
				1354	I->isBundledWithSucc(); ++I) {
				1355	I->unbundleFromSucc();
				1356	for (MachineOperand &MO : I->operands())
				1357	if (MO.isReg())
				1358	MO.setIsInternalRead(false);
				1359	}
				1360
				1361	MI.eraseFromParent();
				1362	break;
				1363	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1364	}
				1365	return true;
				1366	}
				1367
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1368	bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
				1369	MachineOperand &Src0,
				1370	unsigned Src0OpName,
				1371	MachineOperand &Src1,
				1372	unsigned Src1OpName) const {
				1373	MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
				1374	if (!Src0Mods)
				1375	return false;
				1376
				1377	MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
				1378	assert(Src1Mods &&
				1379	"All commutable instructions have both src0 and src1 modifiers");
				1380
				1381	int Src0ModsVal = Src0Mods->getImm();
				1382	int Src1ModsVal = Src1Mods->getImm();
				1383
				1384	Src1Mods->setImm(Src0ModsVal);
				1385	Src0Mods->setImm(Src1ModsVal);
				1386	return true;
				1387	}
				1388
				1389	static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
				1390	MachineOperand &RegOp,
Matt Arsenault	25dba30	2016-09-13 19:03:12 +0000	[diff] [blame]	1391	MachineOperand &NonRegOp) {
				1392	unsigned Reg = RegOp.getReg();
				1393	unsigned SubReg = RegOp.getSubReg();
				1394	bool IsKill = RegOp.isKill();
				1395	bool IsDead = RegOp.isDead();
				1396	bool IsUndef = RegOp.isUndef();
				1397	bool IsDebug = RegOp.isDebug();
				1398
				1399	if (NonRegOp.isImm())
				1400	RegOp.ChangeToImmediate(NonRegOp.getImm());
				1401	else if (NonRegOp.isFI())
				1402	RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
				1403	else
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1404	return nullptr;
				1405
Matt Arsenault	25dba30	2016-09-13 19:03:12 +0000	[diff] [blame]	1406	NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
				1407	NonRegOp.setSubReg(SubReg);
				1408
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1409	return &MI;
				1410	}
				1411
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1412	MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1413	unsigned Src0Idx,
				1414	unsigned Src1Idx) const {
				1415	assert(!NewMI && "this should never be used");
				1416
				1417	unsigned Opc = MI.getOpcode();
				1418	int CommutedOpcode = commuteOpcode(Opc);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	1419	if (CommutedOpcode == -1)
				1420	return nullptr;
				1421
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1422	assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
				1423	static_cast<int>(Src0Idx) &&
				1424	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
				1425	static_cast<int>(Src1Idx) &&
				1426	"inconsistency with findCommutedOpIndices");
				1427
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1428	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1429	MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	1430
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1431	MachineInstr *CommutedMI = nullptr;
				1432	if (Src0.isReg() && Src1.isReg()) {
				1433	if (isOperandLegal(MI, Src1Idx, &Src0)) {
				1434	// Be sure to copy the source modifiers to the right place.
				1435	CommutedMI
				1436	= TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
Matt Arsenault	d282ada	2014-10-17 18:00:48 +0000	[diff] [blame]	1437	}
				1438
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1439	} else if (Src0.isReg() && !Src1.isReg()) {
				1440	// src0 should always be able to support any operand type, so no need to
				1441	// check operand legality.
				1442	CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
				1443	} else if (!Src0.isReg() && Src1.isReg()) {
				1444	if (isOperandLegal(MI, Src1Idx, &Src0))
				1445	CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1446	} else {
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1447	// FIXME: Found two non registers to commute. This does happen.
				1448	return nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1449	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1450
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1451	if (CommutedMI) {
				1452	swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
				1453	Src1, AMDGPU::OpName::src1_modifiers);
				1454
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1455	CommutedMI->setDesc(get(CommutedOpcode));
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1456	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1457
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1458	return CommutedMI;
Christian Konig	76edd4f	2013-02-26 17:52:29 +0000	[diff] [blame]	1459	}
				1460
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1461	// This needs to be implemented because the source modifiers may be inserted
				1462	// between the true commutable operands, and the base
				1463	// TargetInstrInfo::commuteInstruction uses it.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1464	bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1465	unsigned &SrcOpIdx1) const {
Alexander Timofeev	db7ee76	2018-09-11 11:56:50 +0000	[diff] [blame]	1466	return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
				1467	}
				1468
				1469	bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
				1470	unsigned &SrcOpIdx1) const {
				1471	if (!Desc.isCommutable())
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1472	return false;
				1473
Alexander Timofeev	db7ee76	2018-09-11 11:56:50 +0000	[diff] [blame]	1474	unsigned Opc = Desc.getOpcode();
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1475	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1476	if (Src0Idx == -1)
				1477	return false;
				1478
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1479	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1480	if (Src1Idx == -1)
				1481	return false;
				1482
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1483	return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1484	}
				1485
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1486	bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
				1487	int64_t BrOffset) const {
				1488	// BranchRelaxation should never have to check s_setpc_b64 because its dest
				1489	// block is unanalyzable.
				1490	assert(BranchOp != AMDGPU::S_SETPC_B64);
				1491
				1492	// Convert to dwords.
				1493	BrOffset /= 4;
				1494
				1495	// The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
				1496	// from the next instruction.
				1497	BrOffset -= 1;
				1498
				1499	return isIntN(BranchOffsetBits, BrOffset);
				1500	}
				1501
				1502	MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
				1503	const MachineInstr &MI) const {
				1504	if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
				1505	// This would be a difficult analysis to perform, but can always be legal so
				1506	// there's no need to analyze it.
				1507	return nullptr;
				1508	}
				1509
				1510	return MI.getOperand(0).getMBB();
				1511	}
				1512
				1513	unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
				1514	MachineBasicBlock &DestBB,
				1515	const DebugLoc &DL,
				1516	int64_t BrOffset,
				1517	RegScavenger *RS) const {
				1518	assert(RS && "RegScavenger required for long branching");
				1519	assert(MBB.empty() &&
				1520	"new block should be inserted for expanding unconditional branch");
				1521	assert(MBB.pred_size() == 1);
				1522
				1523	MachineFunction *MF = MBB.getParent();
				1524	MachineRegisterInfo &MRI = MF->getRegInfo();
				1525
				1526	// FIXME: Virtual register workaround for RegScavenger not working with empty
				1527	// blocks.
				1528	unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				1529
				1530	auto I = MBB.end();
				1531
				1532	// We need to compute the offset relative to the instruction immediately after
				1533	// s_getpc_b64. Insert pc arithmetic code before last terminator.
				1534	MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
				1535
				1536	// TODO: Handle > 32-bit block address.
				1537	if (BrOffset >= 0) {
				1538	BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
				1539	.addReg(PCReg, RegState::Define, AMDGPU::sub0)
				1540	.addReg(PCReg, 0, AMDGPU::sub0)
				1541	.addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
				1542	BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
				1543	.addReg(PCReg, RegState::Define, AMDGPU::sub1)
				1544	.addReg(PCReg, 0, AMDGPU::sub1)
				1545	.addImm(0);
				1546	} else {
				1547	// Backwards branch.
				1548	BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
				1549	.addReg(PCReg, RegState::Define, AMDGPU::sub0)
				1550	.addReg(PCReg, 0, AMDGPU::sub0)
				1551	.addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
				1552	BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
				1553	.addReg(PCReg, RegState::Define, AMDGPU::sub1)
				1554	.addReg(PCReg, 0, AMDGPU::sub1)
				1555	.addImm(0);
				1556	}
				1557
				1558	// Insert the indirect branch after the other terminator.
				1559	BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
				1560	.addReg(PCReg);
				1561
				1562	// FIXME: If spilling is necessary, this will fail because this scavenger has
				1563	// no emergency stack slots. It is non-trivial to spill in this situation,
				1564	// because the restore code needs to be specially placed after the
				1565	// jump. BranchRelaxation then needs to be made aware of the newly inserted
				1566	// block.
				1567	//
				1568	// If a spill is needed for the pc register pair, we need to insert a spill
				1569	// restore block right before the destination block, and insert a short branch
				1570	// into the old destination block's fallthrough predecessor.
				1571	// e.g.:
				1572	//
				1573	// s_cbranch_scc0 skip_long_branch:
				1574	//
				1575	// long_branch_bb:
				1576	// spill s[8:9]
				1577	// s_getpc_b64 s[8:9]
				1578	// s_add_u32 s8, s8, restore_bb
				1579	// s_addc_u32 s9, s9, 0
				1580	// s_setpc_b64 s[8:9]
				1581	//
				1582	// skip_long_branch:
				1583	// foo;
				1584	//
				1585	// .....
				1586	//
				1587	// dest_bb_fallthrough_predecessor:
				1588	// bar;
				1589	// s_branch dest_bb
				1590	//
				1591	// restore_bb:
				1592	// restore s[8:9]
				1593	// fallthrough dest_bb
				1594	///
				1595	// dest_bb:
				1596	// buzz;
				1597
				1598	RS->enterBasicBlockEnd(MBB);
Matt Arsenault	b0b741e	2018-10-30 01:33:14 +0000	[diff] [blame]	1599	unsigned Scav = RS->scavengeRegisterBackwards(
				1600	AMDGPU::SReg_64RegClass,
				1601	MachineBasicBlock::iterator(GetPC), false, 0);
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1602	MRI.replaceRegWith(PCReg, Scav);
				1603	MRI.clearVirtRegs();
				1604	RS->setRegUsed(Scav);
				1605
				1606	return 4 + 8 + 4 + 4;
				1607	}
				1608
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1609	unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
				1610	switch (Cond) {
				1611	case SIInstrInfo::SCC_TRUE:
				1612	return AMDGPU::S_CBRANCH_SCC1;
				1613	case SIInstrInfo::SCC_FALSE:
				1614	return AMDGPU::S_CBRANCH_SCC0;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame]	1615	case SIInstrInfo::VCCNZ:
				1616	return AMDGPU::S_CBRANCH_VCCNZ;
				1617	case SIInstrInfo::VCCZ:
				1618	return AMDGPU::S_CBRANCH_VCCZ;
				1619	case SIInstrInfo::EXECNZ:
				1620	return AMDGPU::S_CBRANCH_EXECNZ;
				1621	case SIInstrInfo::EXECZ:
				1622	return AMDGPU::S_CBRANCH_EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1623	default:
				1624	llvm_unreachable("invalid branch predicate");
				1625	}
				1626	}
				1627
				1628	SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
				1629	switch (Opcode) {
				1630	case AMDGPU::S_CBRANCH_SCC0:
				1631	return SCC_FALSE;
				1632	case AMDGPU::S_CBRANCH_SCC1:
				1633	return SCC_TRUE;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame]	1634	case AMDGPU::S_CBRANCH_VCCNZ:
				1635	return VCCNZ;
				1636	case AMDGPU::S_CBRANCH_VCCZ:
				1637	return VCCZ;
				1638	case AMDGPU::S_CBRANCH_EXECNZ:
				1639	return EXECNZ;
				1640	case AMDGPU::S_CBRANCH_EXECZ:
				1641	return EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1642	default:
				1643	return INVALID_BR;
				1644	}
				1645	}
				1646
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1647	bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
				1648	MachineBasicBlock::iterator I,
				1649	MachineBasicBlock *&TBB,
				1650	MachineBasicBlock *&FBB,
				1651	SmallVectorImpl<MachineOperand> &Cond,
				1652	bool AllowModify) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1653	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1654	// Unconditional Branch
				1655	TBB = I->getOperand(0).getMBB();
				1656	return false;
				1657	}
				1658
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1659	MachineBasicBlock *CondBB = nullptr;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1660
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1661	if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				1662	CondBB = I->getOperand(1).getMBB();
				1663	Cond.push_back(I->getOperand(0));
				1664	} else {
				1665	BranchPredicate Pred = getBranchPredicate(I->getOpcode());
				1666	if (Pred == INVALID_BR)
				1667	return true;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1668
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1669	CondBB = I->getOperand(0).getMBB();
				1670	Cond.push_back(MachineOperand::CreateImm(Pred));
				1671	Cond.push_back(I->getOperand(1)); // Save the branch register.
				1672	}
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1673	++I;
				1674
				1675	if (I == MBB.end()) {
				1676	// Conditional branch followed by fall-through.
				1677	TBB = CondBB;
				1678	return false;
				1679	}
				1680
				1681	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1682	TBB = CondBB;
				1683	FBB = I->getOperand(0).getMBB();
				1684	return false;
				1685	}
				1686
				1687	return true;
				1688	}
				1689
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1690	bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
				1691	MachineBasicBlock *&FBB,
				1692	SmallVectorImpl<MachineOperand> &Cond,
				1693	bool AllowModify) const {
				1694	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
Matt Arsenault	eabb8dd	2018-11-16 05:03:02 +0000	[diff] [blame]	1695	auto E = MBB.end();
				1696	if (I == E)
				1697	return false;
				1698
				1699	// Skip over the instructions that are artificially terminators for special
				1700	// exec management.
				1701	while (I != E && !I->isBranch() && !I->isReturn() &&
				1702	I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
				1703	switch (I->getOpcode()) {
				1704	case AMDGPU::SI_MASK_BRANCH:
				1705	case AMDGPU::S_MOV_B64_term:
				1706	case AMDGPU::S_XOR_B64_term:
Matt Arsenault	396653f	2019-04-03 20:53:20 +0000	[diff] [blame]	1707	case AMDGPU::S_OR_B64_term:
Matt Arsenault	eabb8dd	2018-11-16 05:03:02 +0000	[diff] [blame]	1708	case AMDGPU::S_ANDN2_B64_term:
				1709	break;
				1710	case AMDGPU::SI_IF:
				1711	case AMDGPU::SI_ELSE:
				1712	case AMDGPU::SI_KILL_I1_TERMINATOR:
				1713	case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
				1714	// FIXME: It's messy that these need to be considered here at all.
				1715	return true;
				1716	default:
				1717	llvm_unreachable("unexpected non-branch terminator inst");
				1718	}
				1719
				1720	++I;
				1721	}
				1722
				1723	if (I == E)
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1724	return false;
				1725
				1726	if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
				1727	return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
				1728
				1729	++I;
				1730
				1731	// TODO: Should be able to treat as fallthrough?
				1732	if (I == MBB.end())
				1733	return true;
				1734
				1735	if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
				1736	return true;
				1737
				1738	MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
				1739
				1740	// Specifically handle the case where the conditional branch is to the same
				1741	// destination as the mask branch. e.g.
				1742	//
				1743	// si_mask_branch BB8
				1744	// s_cbranch_execz BB8
				1745	// s_cbranch BB9
				1746	//
				1747	// This is required to understand divergent loops which may need the branches
				1748	// to be relaxed.
				1749	if (TBB != MaskBrDest \|\| Cond.empty())
				1750	return true;
				1751
				1752	auto Pred = Cond[0].getImm();
				1753	return (Pred != EXECZ && Pred != EXECNZ);
				1754	}
				1755
Matt Arsenault	1b9fc8e	2016-09-14 20:43:16 +0000	[diff] [blame]	1756	unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1757	int *BytesRemoved) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1758	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
				1759
				1760	unsigned Count = 0;
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1761	unsigned RemovedSize = 0;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1762	while (I != MBB.end()) {
				1763	MachineBasicBlock::iterator Next = std::next(I);
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1764	if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
				1765	I = Next;
				1766	continue;
				1767	}
				1768
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1769	RemovedSize += getInstSizeInBytes(*I);
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1770	I->eraseFromParent();
				1771	++Count;
				1772	I = Next;
				1773	}
				1774
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1775	if (BytesRemoved)
				1776	*BytesRemoved = RemovedSize;
				1777
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1778	return Count;
				1779	}
				1780
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1781	// Copy the flags onto the implicit condition register operand.
				1782	static void preserveCondRegFlags(MachineOperand &CondReg,
				1783	const MachineOperand &OrigCond) {
				1784	CondReg.setIsUndef(OrigCond.isUndef());
				1785	CondReg.setIsKill(OrigCond.isKill());
				1786	}
				1787
Matt Arsenault	e8e0f5c	2016-09-14 17:24:15 +0000	[diff] [blame]	1788	unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1789	MachineBasicBlock *TBB,
				1790	MachineBasicBlock *FBB,
				1791	ArrayRef<MachineOperand> Cond,
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1792	const DebugLoc &DL,
				1793	int *BytesAdded) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1794	if (!FBB && Cond.empty()) {
				1795	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1796	.addMBB(TBB);
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1797	if (BytesAdded)
				1798	*BytesAdded = 4;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1799	return 1;
				1800	}
				1801
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1802	if(Cond.size() == 1 && Cond[0].isReg()) {
				1803	BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
				1804	.add(Cond[0])
				1805	.addMBB(TBB);
				1806	return 1;
				1807	}
				1808
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1809	assert(TBB && Cond[0].isImm());
				1810
				1811	unsigned Opcode
				1812	= getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
				1813
				1814	if (!FBB) {
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1815	Cond[1].isUndef();
				1816	MachineInstr *CondBr =
				1817	BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1818	.addMBB(TBB);
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1819
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1820	// Copy the flags onto the implicit condition register operand.
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1821	preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1822
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1823	if (BytesAdded)
				1824	*BytesAdded = 4;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1825	return 1;
				1826	}
				1827
				1828	assert(TBB && FBB);
				1829
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1830	MachineInstr *CondBr =
				1831	BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1832	.addMBB(TBB);
				1833	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1834	.addMBB(FBB);
				1835
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1836	MachineOperand &CondReg = CondBr->getOperand(1);
				1837	CondReg.setIsUndef(Cond[1].isUndef());
				1838	CondReg.setIsKill(Cond[1].isKill());
				1839
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1840	if (BytesAdded)
				1841	*BytesAdded = 8;
				1842
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1843	return 2;
				1844	}
				1845
Matt Arsenault	1b9fc8e	2016-09-14 20:43:16 +0000	[diff] [blame]	1846	bool SIInstrInfo::reverseBranchCondition(
Matt Arsenault	72fcd5f	2016-05-21 00:29:34 +0000	[diff] [blame]	1847	SmallVectorImpl<MachineOperand> &Cond) const {
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1848	if (Cond.size() != 2) {
				1849	return true;
				1850	}
				1851
				1852	if (Cond[0].isImm()) {
				1853	Cond[0].setImm(-Cond[0].getImm());
				1854	return false;
				1855	}
				1856
				1857	return true;
Matt Arsenault	72fcd5f	2016-05-21 00:29:34 +0000	[diff] [blame]	1858	}
				1859
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1860	bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
				1861	ArrayRef<MachineOperand> Cond,
				1862	unsigned TrueReg, unsigned FalseReg,
				1863	int &CondCycles,
				1864	int &TrueCycles, int &FalseCycles) const {
				1865	switch (Cond[0].getImm()) {
				1866	case VCCNZ:
				1867	case VCCZ: {
				1868	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1869	const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
				1870	assert(MRI.getRegClass(FalseReg) == RC);
				1871
				1872	int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
				1873	CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
				1874
				1875	// Limit to equal cost for branch vs. N v_cndmask_b32s.
				1876	return !RI.isSGPRClass(RC) && NumInsts <= 6;
				1877	}
				1878	case SCC_TRUE:
				1879	case SCC_FALSE: {
				1880	// FIXME: We could insert for VGPRs if we could replace the original compare
				1881	// with a vector one.
				1882	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1883	const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
				1884	assert(MRI.getRegClass(FalseReg) == RC);
				1885
				1886	int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
				1887
				1888	// Multiples of 8 can do s_cselect_b64
				1889	if (NumInsts % 2 == 0)
				1890	NumInsts /= 2;
				1891
				1892	CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
				1893	return RI.isSGPRClass(RC);
				1894	}
				1895	default:
				1896	return false;
				1897	}
				1898	}
				1899
				1900	void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
				1901	MachineBasicBlock::iterator I, const DebugLoc &DL,
				1902	unsigned DstReg, ArrayRef<MachineOperand> Cond,
				1903	unsigned TrueReg, unsigned FalseReg) const {
				1904	BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
				1905	if (Pred == VCCZ \|\| Pred == SCC_FALSE) {
				1906	Pred = static_cast<BranchPredicate>(-Pred);
				1907	std::swap(TrueReg, FalseReg);
				1908	}
				1909
				1910	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1911	const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1912	unsigned DstSize = RI.getRegSizeInBits(*DstRC);
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1913
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1914	if (DstSize == 32) {
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1915	unsigned SelOp = Pred == SCC_TRUE ?
				1916	AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
				1917
				1918	// Instruction's operands are backwards from what is expected.
				1919	MachineInstr *Select =
				1920	BuildMI(MBB, I, DL, get(SelOp), DstReg)
				1921	.addReg(FalseReg)
				1922	.addReg(TrueReg);
				1923
				1924	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1925	return;
				1926	}
				1927
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1928	if (DstSize == 64 && Pred == SCC_TRUE) {
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1929	MachineInstr *Select =
				1930	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
				1931	.addReg(FalseReg)
				1932	.addReg(TrueReg);
				1933
				1934	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1935	return;
				1936	}
				1937
				1938	static const int16_t Sub0_15[] = {
				1939	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
				1940	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
				1941	AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
				1942	AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
				1943	};
				1944
				1945	static const int16_t Sub0_15_64[] = {
				1946	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				1947	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				1948	AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
				1949	AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
				1950	};
				1951
				1952	unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
				1953	const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
				1954	const int16_t *SubIndices = Sub0_15;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1955	int NElts = DstSize / 32;
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1956
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	1957	// 64-bit select is only available for SALU.
				1958	// TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit.
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1959	if (Pred == SCC_TRUE) {
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	1960	if (NElts % 2) {
				1961	SelOp = AMDGPU::S_CSELECT_B32;
				1962	EltRC = &AMDGPU::SGPR_32RegClass;
				1963	} else {
				1964	SelOp = AMDGPU::S_CSELECT_B64;
				1965	EltRC = &AMDGPU::SGPR_64RegClass;
				1966	SubIndices = Sub0_15_64;
				1967	NElts /= 2;
				1968	}
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1969	}
				1970
				1971	MachineInstrBuilder MIB = BuildMI(
				1972	MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
				1973
				1974	I = MIB->getIterator();
				1975
				1976	SmallVector<unsigned, 8> Regs;
				1977	for (int Idx = 0; Idx != NElts; ++Idx) {
				1978	unsigned DstElt = MRI.createVirtualRegister(EltRC);
				1979	Regs.push_back(DstElt);
				1980
				1981	unsigned SubIdx = SubIndices[Idx];
				1982
				1983	MachineInstr *Select =
				1984	BuildMI(MBB, I, DL, get(SelOp), DstElt)
				1985	.addReg(FalseReg, 0, SubIdx)
				1986	.addReg(TrueReg, 0, SubIdx);
				1987	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1988
				1989	MIB.addReg(DstElt)
				1990	.addImm(SubIdx);
				1991	}
				1992	}
				1993
Sam Kolton	27e0f8b	2017-03-31 11:42:43 +0000	[diff] [blame]	1994	bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
				1995	switch (MI.getOpcode()) {
				1996	case AMDGPU::V_MOV_B32_e32:
				1997	case AMDGPU::V_MOV_B32_e64:
				1998	case AMDGPU::V_MOV_B64_PSEUDO: {
				1999	// If there are additional implicit register operands, this may be used for
				2000	// register indexing so the source register operand isn't simply copied.
				2001	unsigned NumOps = MI.getDesc().getNumOperands() +
				2002	MI.getDesc().getNumImplicitUses();
				2003
				2004	return MI.getNumOperands() == NumOps;
				2005	}
				2006	case AMDGPU::S_MOV_B32:
				2007	case AMDGPU::S_MOV_B64:
				2008	case AMDGPU::COPY:
				2009	return true;
				2010	default:
				2011	return false;
				2012	}
				2013	}
				2014
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2015	unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
Marcello Maggioni	5ca4128	2018-08-20 19:23:45 +0000	[diff] [blame]	2016	unsigned Kind) const {
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2017	switch(Kind) {
				2018	case PseudoSourceValue::Stack:
				2019	case PseudoSourceValue::FixedStack:
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	2020	return AMDGPUAS::PRIVATE_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2021	case PseudoSourceValue::ConstantPool:
				2022	case PseudoSourceValue::GOT:
				2023	case PseudoSourceValue::JumpTable:
				2024	case PseudoSourceValue::GlobalValueCallEntry:
				2025	case PseudoSourceValue::ExternalSymbolCallEntry:
				2026	case PseudoSourceValue::TargetCustom:
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	2027	return AMDGPUAS::CONSTANT_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2028	}
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	2029	return AMDGPUAS::FLAT_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2030	}
				2031
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2032	static void removeModOperands(MachineInstr &MI) {
				2033	unsigned Opc = MI.getOpcode();
				2034	int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				2035	AMDGPU::OpName::src0_modifiers);
				2036	int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				2037	AMDGPU::OpName::src1_modifiers);
				2038	int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				2039	AMDGPU::OpName::src2_modifiers);
				2040
				2041	MI.RemoveOperand(Src2ModIdx);
				2042	MI.RemoveOperand(Src1ModIdx);
				2043	MI.RemoveOperand(Src0ModIdx);
				2044	}
				2045
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2046	bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2047	unsigned Reg, MachineRegisterInfo *MRI) const {
				2048	if (!MRI->hasOneNonDBGUse(Reg))
				2049	return false;
				2050
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2051	switch (DefMI.getOpcode()) {
				2052	default:
				2053	return false;
				2054	case AMDGPU::S_MOV_B64:
				2055	// TODO: We could fold 64-bit immediates, but this get compilicated
				2056	// when there are sub-registers.
				2057	return false;
				2058
				2059	case AMDGPU::V_MOV_B32_e32:
				2060	case AMDGPU::S_MOV_B32:
				2061	break;
				2062	}
				2063
				2064	const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
				2065	assert(ImmOp);
				2066	// FIXME: We could handle FrameIndex values here.
				2067	if (!ImmOp->isImm())
				2068	return false;
				2069
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2070	unsigned Opc = UseMI.getOpcode();
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2071	if (Opc == AMDGPU::COPY) {
				2072	bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2073	unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2074	UseMI.setDesc(get(NewOpc));
				2075	UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
				2076	UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
				2077	return true;
				2078	}
				2079
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2080	if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2081	Opc == AMDGPU::V_MAD_F16 \|\| Opc == AMDGPU::V_MAC_F16_e64) {
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2082	// Don't fold if we are using source or output modifiers. The new VOP2
				2083	// instructions don't have them.
				2084	if (hasAnyModifiersSet(UseMI))
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2085	return false;
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2086
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	2087	// If this is a free constant, there's no reason to do this.
				2088	// TODO: We could fold this here instead of letting SIFoldOperands do it
				2089	// later.
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2090	MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
				2091
				2092	// Any src operand can be used for the legality check.
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2093	if (isInlineConstant(UseMI, Src0, ImmOp))
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	2094	return false;
				2095
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2096	bool IsF32 = Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2097	MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
				2098	MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2099
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2100	// Multiplied part is the constant: Use v_madmk_{f16, f32}.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2101	// We should only expect these to be on src0 due to canonicalizations.
				2102	if (Src0->isReg() && Src0->getReg() == Reg) {
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	2103	if (!Src1->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2104	return false;
				2105
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	2106	if (!Src2->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2107	return false;
				2108
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	2109	// We need to swap operands 0 and 1 since madmk constant is at operand 1.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2110
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2111	const int64_t Imm = ImmOp->getImm();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2112
				2113	// FIXME: This would be a lot easier if we could return a new instruction
				2114	// instead of having to modify in place.
				2115
				2116	// Remove these first since they are at the end.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2117	UseMI.RemoveOperand(
				2118	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
				2119	UseMI.RemoveOperand(
				2120	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2121
				2122	unsigned Src1Reg = Src1->getReg();
				2123	unsigned Src1SubReg = Src1->getSubReg();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2124	Src0->setReg(Src1Reg);
				2125	Src0->setSubReg(Src1SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	2126	Src0->setIsKill(Src1->isKill());
				2127
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2128	if (Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2129	Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2130	UseMI.untieRegOperand(
				2131	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2132
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	2133	Src1->ChangeToImmediate(Imm);
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2134
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2135	removeModOperands(UseMI);
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2136	UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2137
				2138	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				2139	if (DeleteDef)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2140	DefMI.eraseFromParent();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2141
				2142	return true;
				2143	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2144
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2145	// Added part is the constant: Use v_madak_{f16, f32}.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2146	if (Src2->isReg() && Src2->getReg() == Reg) {
				2147	// Not allowed to use constant bus for another operand.
				2148	// We can however allow an inline immediate as src0.
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2149	bool Src0Inlined = false;
				2150	if (Src0->isReg()) {
				2151	// Try to inline constant if possible.
				2152	// If the Def moves immediate and the use is single
				2153	// We are saving VGPR here.
				2154	MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
				2155	if (Def && Def->isMoveImmediate() &&
				2156	isInlineConstant(Def->getOperand(1)) &&
				2157	MRI->hasOneUse(Src0->getReg())) {
				2158	Src0->ChangeToImmediate(Def->getOperand(1).getImm());
				2159	Src0Inlined = true;
				2160	} else if ((RI.isPhysicalRegister(Src0->getReg()) &&
				2161	RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) \|\|
				2162	(RI.isVirtualRegister(Src0->getReg()) &&
				2163	RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
				2164	return false;
				2165	// VGPR is okay as Src0 - fallthrough
				2166	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2167
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2168	if (Src1->isReg() && !Src0Inlined ) {
				2169	// We have one slot for inlinable constant so far - try to fill it
				2170	MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
				2171	if (Def && Def->isMoveImmediate() &&
				2172	isInlineConstant(Def->getOperand(1)) &&
				2173	MRI->hasOneUse(Src1->getReg()) &&
				2174	commuteInstruction(UseMI)) {
				2175	Src0->ChangeToImmediate(Def->getOperand(1).getImm());
				2176	} else if ((RI.isPhysicalRegister(Src1->getReg()) &&
				2177	RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) \|\|
				2178	(RI.isVirtualRegister(Src1->getReg()) &&
				2179	RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
				2180	return false;
				2181	// VGPR is okay as Src1 - fallthrough
				2182	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2183
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2184	const int64_t Imm = ImmOp->getImm();
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2185
				2186	// FIXME: This would be a lot easier if we could return a new instruction
				2187	// instead of having to modify in place.
				2188
				2189	// Remove these first since they are at the end.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2190	UseMI.RemoveOperand(
				2191	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
				2192	UseMI.RemoveOperand(
				2193	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2194
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2195	if (Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2196	Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2197	UseMI.untieRegOperand(
				2198	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2199
				2200	// ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2201	Src2->ChangeToImmediate(Imm);
				2202
				2203	// These come before src2.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2204	removeModOperands(UseMI);
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2205	UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2206
				2207	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				2208	if (DeleteDef)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2209	DefMI.eraseFromParent();
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2210
				2211	return true;
				2212	}
				2213	}
				2214
				2215	return false;
				2216	}
				2217
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2218	static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
				2219	int WidthB, int OffsetB) {
				2220	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
				2221	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
				2222	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
				2223	return LowOffset + LowWidth <= HighOffset;
				2224	}
				2225
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	2226	bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
				2227	const MachineInstr &MIb) const {
				2228	const MachineOperand BaseOp0, BaseOp1;
Chad Rosier	c27a18f	2016-03-09 16:00:35 +0000	[diff] [blame]	2229	int64_t Offset0, Offset1;
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2230
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	2231	if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
				2232	getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
				2233	if (!BaseOp0->isIdenticalTo(*BaseOp1))
				2234	return false;
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	2235
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2236	if (!MIa.hasOneMemOperand() \|\| !MIb.hasOneMemOperand()) {
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	2237	// FIXME: Handle ds_read2 / ds_write2.
				2238	return false;
				2239	}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2240	unsigned Width0 = (*MIa.memoperands_begin())->getSize();
				2241	unsigned Width1 = (*MIb.memoperands_begin())->getSize();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	2242	if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2243	return true;
				2244	}
				2245	}
				2246
				2247	return false;
				2248	}
				2249
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	2250	bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
				2251	const MachineInstr &MIb,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2252	AliasAnalysis *AA) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2253	assert((MIa.mayLoad() \|\| MIa.mayStore()) &&
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2254	"MIa must load from or modify a memory location");
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2255	assert((MIb.mayLoad() \|\| MIb.mayStore()) &&
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2256	"MIb must load from or modify a memory location");
				2257
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2258	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects())
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2259	return false;
				2260
				2261	// XXX - Can we relax this between address spaces?
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2262	if (MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2263	return false;
				2264
				2265	// TODO: Should we check the address space from the MachineMemOperand? That
				2266	// would allow us to distinguish objects we know don't alias based on the
Benjamin Kramer	df005cb	2015-08-08 18:27:36 +0000	[diff] [blame]	2267	// underlying address space, even if it was lowered to a different one,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2268	// e.g. private accesses lowered to use MUBUF instructions on a scratch
				2269	// buffer.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2270	if (isDS(MIa)) {
				2271	if (isDS(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2272	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2273
Matt Arsenault	9608a289	2017-07-29 01:26:21 +0000	[diff] [blame]	2274	return !isFLAT(MIb) \|\| isSegmentSpecificFLAT(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2275	}
				2276
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2277	if (isMUBUF(MIa) \|\| isMTBUF(MIa)) {
				2278	if (isMUBUF(MIb) \|\| isMTBUF(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2279	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2280
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2281	return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2282	}
				2283
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2284	if (isSMRD(MIa)) {
				2285	if (isSMRD(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2286	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2287
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2288	return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2289	}
				2290
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2291	if (isFLAT(MIa)) {
				2292	if (isFLAT(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2293	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2294
				2295	return false;
				2296	}
				2297
				2298	return false;
				2299	}
				2300
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2301	static int64_t getFoldableImm(const MachineOperand* MO) {
				2302	if (!MO->isReg())
				2303	return false;
				2304	const MachineFunction *MF = MO->getParent()->getParent()->getParent();
				2305	const MachineRegisterInfo &MRI = MF->getRegInfo();
				2306	auto Def = MRI.getUniqueVRegDef(MO->getReg());
Matt Arsenault	c317287	2017-09-14 20:54:29 +0000	[diff] [blame]	2307	if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
				2308	Def->getOperand(1).isImm())
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2309	return Def->getOperand(1).getImm();
				2310	return AMDGPU::NoRegister;
				2311	}
				2312
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2313	MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2314	MachineInstr &MI,
				2315	LiveVariables *LV) const {
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2316	unsigned Opc = MI.getOpcode();
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2317	bool IsF16 = false;
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2318	bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 \|\| Opc == AMDGPU::V_FMAC_F32_e64;
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2319
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2320	switch (Opc) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2321	default:
				2322	return nullptr;
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2323	case AMDGPU::V_MAC_F16_e64:
				2324	IsF16 = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	2325	LLVM_FALLTHROUGH;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2326	case AMDGPU::V_MAC_F32_e64:
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2327	case AMDGPU::V_FMAC_F32_e64:
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2328	break;
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2329	case AMDGPU::V_MAC_F16_e32:
				2330	IsF16 = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	2331	LLVM_FALLTHROUGH;
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2332	case AMDGPU::V_MAC_F32_e32:
				2333	case AMDGPU::V_FMAC_F32_e32: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2334	int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
				2335	AMDGPU::OpName::src0);
				2336	const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2337	if (!Src0->isReg() && !Src0->isImm())
				2338	return nullptr;
				2339
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2340	if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2341	return nullptr;
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2342
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2343	break;
				2344	}
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2345	}
				2346
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2347	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
				2348	const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2349	const MachineOperand *Src0Mods =
				2350	getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2351	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2352	const MachineOperand *Src1Mods =
				2353	getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2354	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2355	const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
				2356	const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2357
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2358	if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
Matt Arsenault	c317287	2017-09-14 20:54:29 +0000	[diff] [blame]	2359	// If we have an SGPR input, we will violate the constant bus restriction.
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2360	(!Src0->isReg() \|\| !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2361	if (auto Imm = getFoldableImm(Src2)) {
				2362	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2363	get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
				2364	.add(*Dst)
				2365	.add(*Src0)
				2366	.add(*Src1)
				2367	.addImm(Imm);
				2368	}
				2369	if (auto Imm = getFoldableImm(Src1)) {
				2370	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2371	get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
				2372	.add(*Dst)
				2373	.add(*Src0)
				2374	.addImm(Imm)
				2375	.add(*Src2);
				2376	}
				2377	if (auto Imm = getFoldableImm(Src0)) {
				2378	if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
				2379	AMDGPU::OpName::src0), Src1))
				2380	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2381	get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
				2382	.add(*Dst)
				2383	.add(*Src1)
				2384	.addImm(Imm)
				2385	.add(*Src2);
				2386	}
				2387	}
				2388
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2389	assert((!IsFMA \|\| !IsF16) && "fmac only expected with f32");
				2390	unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
				2391	(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
				2392	return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2393	.add(*Dst)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2394	.addImm(Src0Mods ? Src0Mods->getImm() : 0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2395	.add(*Src0)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2396	.addImm(Src1Mods ? Src1Mods->getImm() : 0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2397	.add(*Src1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2398	.addImm(0) // Src mods
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2399	.add(*Src2)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2400	.addImm(Clamp ? Clamp->getImm() : 0)
				2401	.addImm(Omod ? Omod->getImm() : 0);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2402	}
				2403
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2404	// It's not generally safe to move VALU instructions across these since it will
				2405	// start using the register as a base index rather than directly.
				2406	// XXX - Why isn't hasSideEffects sufficient for these?
				2407	static bool changesVGPRIndexingMode(const MachineInstr &MI) {
				2408	switch (MI.getOpcode()) {
				2409	case AMDGPU::S_SET_GPR_IDX_ON:
				2410	case AMDGPU::S_SET_GPR_IDX_MODE:
				2411	case AMDGPU::S_SET_GPR_IDX_OFF:
				2412	return true;
				2413	default:
				2414	return false;
				2415	}
				2416	}
				2417
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2418	bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2419	const MachineBasicBlock *MBB,
				2420	const MachineFunction &MF) const {
Matt Arsenault	95c7897	2016-07-09 01:13:51 +0000	[diff] [blame]	2421	// XXX - Do we want the SP check in the base implementation?
				2422
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2423	// Target-independent instructions do not have an implicit-use of EXEC, even
				2424	// when they operate on VGPRs. Treating EXEC modifications as scheduling
				2425	// boundaries prevents incorrect movements of such instructions.
Matt Arsenault	95c7897	2016-07-09 01:13:51 +0000	[diff] [blame]	2426	return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) \|\|
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2427	MI.modifiesRegister(AMDGPU::EXEC, &RI) \|\|
Tom Stellard	8485fa0	2016-12-07 02:42:15 +0000	[diff] [blame]	2428	MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 \|\|
				2429	MI.getOpcode() == AMDGPU::S_SETREG_B32 \|\|
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2430	changesVGPRIndexingMode(MI);
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2431	}
				2432
Marek Olsak	c5cec5e	2019-01-16 15:43:53 +0000	[diff] [blame]	2433	bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
				2434	return Opcode == AMDGPU::DS_ORDERED_COUNT \|\|
				2435	Opcode == AMDGPU::DS_GWS_INIT \|\|
				2436	Opcode == AMDGPU::DS_GWS_SEMA_V \|\|
				2437	Opcode == AMDGPU::DS_GWS_SEMA_BR \|\|
				2438	Opcode == AMDGPU::DS_GWS_SEMA_P \|\|
				2439	Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL \|\|
				2440	Opcode == AMDGPU::DS_GWS_BARRIER;
				2441	}
				2442
Nicolai Haehnle	7f0d05d	2018-07-30 09:23:59 +0000	[diff] [blame]	2443	bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
				2444	unsigned Opcode = MI.getOpcode();
				2445
				2446	if (MI.mayStore() && isSMRD(MI))
				2447	return true; // scalar store or atomic
				2448
				2449	// These instructions cause shader I/O that may cause hardware lockups
				2450	// when executed with an empty EXEC mask.
				2451	//
				2452	// Note: exp with VM = DONE = 0 is automatically skipped by hardware when
				2453	// EXEC = 0, but checking for that case here seems not worth it
				2454	// given the typical code patterns.
				2455	if (Opcode == AMDGPU::S_SENDMSG \|\| Opcode == AMDGPU::S_SENDMSGHALT \|\|
Marek Olsak	c5cec5e	2019-01-16 15:43:53 +0000	[diff] [blame]	2456	Opcode == AMDGPU::EXP \|\| Opcode == AMDGPU::EXP_DONE \|\|
				2457	Opcode == AMDGPU::DS_ORDERED_COUNT)
Nicolai Haehnle	7f0d05d	2018-07-30 09:23:59 +0000	[diff] [blame]	2458	return true;
				2459
				2460	if (MI.isInlineAsm())
				2461	return true; // conservative assumption
				2462
				2463	// These are like SALU instructions in terms of effects, so it's questionable
				2464	// whether we should return true for those.
				2465	//
				2466	// However, executing them with EXEC = 0 causes them to operate on undefined
				2467	// data, which we avoid by returning true here.
				2468	if (Opcode == AMDGPU::V_READFIRSTLANE_B32 \|\| Opcode == AMDGPU::V_READLANE_B32)
				2469	return true;
				2470
				2471	return false;
				2472	}
				2473
Matt Arsenault	a353fd5	2019-03-28 14:01:39 +0000	[diff] [blame]	2474	bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI,
				2475	const MachineInstr &MI) const {
				2476	if (MI.isMetaInstruction())
				2477	return false;
				2478
				2479	// This won't read exec if this is an SGPR->SGPR copy.
				2480	if (MI.isCopyLike()) {
				2481	if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg()))
				2482	return true;
				2483
				2484	// Make sure this isn't copying exec as a normal operand
				2485	return MI.readsRegister(AMDGPU::EXEC, &RI);
				2486	}
				2487
				2488	// Be conservative with any unhandled generic opcodes.
				2489	if (!isTargetSpecificOpcode(MI.getOpcode()))
				2490	return true;
				2491
				2492	return !isSALU(MI) \|\| MI.readsRegister(AMDGPU::EXEC, &RI);
				2493	}
				2494
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2495	bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault	26faed3	2016-12-05 22:26:17 +0000	[diff] [blame]	2496	switch (Imm.getBitWidth()) {
				2497	case 32:
				2498	return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
				2499	ST.hasInv2PiInlineImm());
				2500	case 64:
				2501	return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
				2502	ST.hasInv2PiInlineImm());
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2503	case 16:
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2504	return ST.has16BitInsts() &&
				2505	AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2506	ST.hasInv2PiInlineImm());
Matt Arsenault	26faed3	2016-12-05 22:26:17 +0000	[diff] [blame]	2507	default:
				2508	llvm_unreachable("invalid bitwidth");
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	2509	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2510	}
				2511
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2512	bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2513	uint8_t OperandType) const {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2514	if (!MO.isImm() \|\|
				2515	OperandType < AMDGPU::OPERAND_SRC_FIRST \|\|
				2516	OperandType > AMDGPU::OPERAND_SRC_LAST)
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2517	return false;
				2518
				2519	// MachineOperand provides no way to tell the true operand size, since it only
				2520	// records a 64-bit value. We need to know the size to determine if a 32-bit
				2521	// floating point immediate bit pattern is legal for an integer immediate. It
				2522	// would be for any 32-bit integer operand, but would not be for a 64-bit one.
				2523
				2524	int64_t Imm = MO.getImm();
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2525	switch (OperandType) {
				2526	case AMDGPU::OPERAND_REG_IMM_INT32:
				2527	case AMDGPU::OPERAND_REG_IMM_FP32:
				2528	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
				2529	case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2530	int32_t Trunc = static_cast<int32_t>(Imm);
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	2531	return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2532	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2533	case AMDGPU::OPERAND_REG_IMM_INT64:
				2534	case AMDGPU::OPERAND_REG_IMM_FP64:
				2535	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	2536	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2537	return AMDGPU::isInlinableLiteral64(MO.getImm(),
				2538	ST.hasInv2PiInlineImm());
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2539	case AMDGPU::OPERAND_REG_IMM_INT16:
				2540	case AMDGPU::OPERAND_REG_IMM_FP16:
				2541	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
				2542	case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2543	if (isInt<16>(Imm) \|\| isUInt<16>(Imm)) {
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2544	// A few special case instructions have 16-bit operands on subtargets
				2545	// where 16-bit instructions are not legal.
				2546	// TODO: Do the 32-bit immediates work? We shouldn't really need to handle
				2547	// constants in these cases
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2548	int16_t Trunc = static_cast<int16_t>(Imm);
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2549	return ST.has16BitInsts() &&
				2550	AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2551	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2552
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2553	return false;
				2554	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2555	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
				2556	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
Stanislav Mekhanoshin	160f857	2018-04-19 21:16:50 +0000	[diff] [blame]	2557	if (isUInt<16>(Imm)) {
				2558	int16_t Trunc = static_cast<int16_t>(Imm);
				2559	return ST.has16BitInsts() &&
				2560	AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
				2561	}
				2562	if (!(Imm & 0xffff)) {
				2563	return ST.has16BitInsts() &&
				2564	AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm());
				2565	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2566	uint32_t Trunc = static_cast<uint32_t>(Imm);
				2567	return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
				2568	}
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2569	default:
				2570	llvm_unreachable("invalid bitwidth");
				2571	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2572	}
				2573
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2574	bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2575	const MCOperandInfo &OpInfo) const {
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2576	switch (MO.getType()) {
				2577	case MachineOperand::MO_Register:
				2578	return false;
				2579	case MachineOperand::MO_Immediate:
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2580	return !isInlineConstant(MO, OpInfo);
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2581	case MachineOperand::MO_FrameIndex:
				2582	case MachineOperand::MO_MachineBasicBlock:
				2583	case MachineOperand::MO_ExternalSymbol:
				2584	case MachineOperand::MO_GlobalAddress:
				2585	case MachineOperand::MO_MCSymbol:
				2586	return true;
				2587	default:
				2588	llvm_unreachable("unexpected operand type");
				2589	}
				2590	}
				2591
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	2592	static bool compareMachineOp(const MachineOperand &Op0,
				2593	const MachineOperand &Op1) {
				2594	if (Op0.getType() != Op1.getType())
				2595	return false;
				2596
				2597	switch (Op0.getType()) {
				2598	case MachineOperand::MO_Register:
				2599	return Op0.getReg() == Op1.getReg();
				2600	case MachineOperand::MO_Immediate:
				2601	return Op0.getImm() == Op1.getImm();
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	2602	default:
				2603	llvm_unreachable("Didn't expect to be comparing these operand types");
				2604	}
				2605	}
				2606
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2607	bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
				2608	const MachineOperand &MO) const {
				2609	const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2610
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	2611	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2612
				2613	if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
				2614	return true;
				2615
				2616	if (OpInfo.RegClass < 0)
				2617	return false;
				2618
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2619	if (MO.isImm() && isInlineConstant(MO, OpInfo))
				2620	return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2621
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2622	return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2623	}
				2624
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	2625	bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	2626	int Op32 = AMDGPU::getVOPe32(Opcode);
				2627	if (Op32 == -1)
				2628	return false;
				2629
				2630	return pseudoToMCOpcode(Op32) != -1;
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	2631	}
				2632
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	2633	bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
				2634	// The src0_modifier operand is present on all instructions
				2635	// that have modifiers.
				2636
				2637	return AMDGPU::getNamedOperandIdx(Opcode,
				2638	AMDGPU::OpName::src0_modifiers) != -1;
				2639	}
				2640
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	2641	bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
				2642	unsigned OpName) const {
				2643	const MachineOperand *Mods = getNamedOperand(MI, OpName);
				2644	return Mods && Mods->getImm();
				2645	}
				2646
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2647	bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
				2648	return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) \|\|
				2649	hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) \|\|
				2650	hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) \|\|
				2651	hasModifiersSet(MI, AMDGPU::OpName::clamp) \|\|
				2652	hasModifiersSet(MI, AMDGPU::OpName::omod);
				2653	}
				2654
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2655	bool SIInstrInfo::canShrink(const MachineInstr &MI,
				2656	const MachineRegisterInfo &MRI) const {
				2657	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				2658	// Can't shrink instruction with three operands.
				2659	// FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
				2660	// a special case for it. It can only be shrunk if the third operand
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	2661	// is vcc, and src0_modifiers and src1_modifiers are not set.
				2662	// We should handle this the same way we handle vopc, by addding
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2663	// a register allocation hint pre-regalloc and then do the shrinking
				2664	// post-regalloc.
				2665	if (Src2) {
				2666	switch (MI.getOpcode()) {
				2667	default: return false;
				2668
				2669	case AMDGPU::V_ADDC_U32_e64:
				2670	case AMDGPU::V_SUBB_U32_e64:
				2671	case AMDGPU::V_SUBBREV_U32_e64: {
				2672	const MachineOperand *Src1
				2673	= getNamedOperand(MI, AMDGPU::OpName::src1);
				2674	if (!Src1->isReg() \|\| !RI.isVGPR(MRI, Src1->getReg()))
				2675	return false;
				2676	// Additional verification is needed for sdst/src2.
				2677	return true;
				2678	}
				2679	case AMDGPU::V_MAC_F32_e64:
				2680	case AMDGPU::V_MAC_F16_e64:
				2681	case AMDGPU::V_FMAC_F32_e64:
				2682	if (!Src2->isReg() \|\| !RI.isVGPR(MRI, Src2->getReg()) \|\|
				2683	hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
				2684	return false;
				2685	break;
				2686
				2687	case AMDGPU::V_CNDMASK_B32_e64:
				2688	break;
				2689	}
				2690	}
				2691
				2692	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				2693	if (Src1 && (!Src1->isReg() \|\| !RI.isVGPR(MRI, Src1->getReg()) \|\|
				2694	hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
				2695	return false;
				2696
				2697	// We don't need to check src0, all input types are legal, so just make sure
				2698	// src0 isn't using any modifiers.
				2699	if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
				2700	return false;
				2701
Ron Lieberman	16de4fd	2018-12-03 13:04:54 +0000	[diff] [blame]	2702	// Can it be shrunk to a valid 32 bit opcode?
				2703	if (!hasVALU32BitEncoding(MI.getOpcode()))
				2704	return false;
				2705
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2706	// Check output modifiers
				2707	return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
				2708	!hasModifiersSet(MI, AMDGPU::OpName::clamp);
Matt Arsenault	de6c421	2018-08-28 18:34:24 +0000	[diff] [blame]	2709	}
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2710
Matt Arsenault	de6c421	2018-08-28 18:34:24 +0000	[diff] [blame]	2711	// Set VCC operand with all flags from \p Orig, except for setting it as
				2712	// implicit.
				2713	static void copyFlagsToImplicitVCC(MachineInstr &MI,
				2714	const MachineOperand &Orig) {
				2715
				2716	for (MachineOperand &Use : MI.implicit_operands()) {
				2717	if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
				2718	Use.setIsUndef(Orig.isUndef());
				2719	Use.setIsKill(Orig.isKill());
				2720	return;
				2721	}
				2722	}
				2723	}
				2724
				2725	MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
				2726	unsigned Op32) const {
				2727	MachineBasicBlock *MBB = MI.getParent();;
				2728	MachineInstrBuilder Inst32 =
				2729	BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
				2730
				2731	// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
				2732	// For VOPC instructions, this is replaced by an implicit def of vcc.
				2733	int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
				2734	if (Op32DstIdx != -1) {
				2735	// dst
				2736	Inst32.add(MI.getOperand(0));
				2737	} else {
				2738	assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
				2739	"Unexpected case");
				2740	}
				2741
				2742	Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
				2743
				2744	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				2745	if (Src1)
				2746	Inst32.add(*Src1);
				2747
				2748	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				2749
				2750	if (Src2) {
				2751	int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
				2752	if (Op32Src2Idx != -1) {
				2753	Inst32.add(*Src2);
				2754	} else {
				2755	// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
				2756	// replaced with an implicit read of vcc. This was already added
				2757	// during the initial BuildMI, so find it to preserve the flags.
				2758	copyFlagsToImplicitVCC(Inst32, Src2);
				2759	}
				2760	}
				2761
				2762	return Inst32;
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2763	}
				2764
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2765	bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2766	const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2767	const MCOperandInfo &OpInfo) const {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2768	// Literal constants use the constant bus.
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2769	//if (isLiteralConstantLike(MO, OpInfo))
				2770	// return true;
				2771	if (MO.isImm())
				2772	return !isInlineConstant(MO, OpInfo);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2773
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2774	if (!MO.isReg())
				2775	return true; // Misc other operands like FrameIndex
				2776
				2777	if (!MO.isUse())
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2778	return false;
				2779
				2780	if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
				2781	return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
				2782
				2783	// FLAT_SCR is just an SGPR pair.
				2784	if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
				2785	return true;
				2786
				2787	// EXEC register uses the constant bus.
				2788	if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
				2789	return true;
				2790
				2791	// SGPRs use the constant bus
Matt Arsenault	8226fc4	2016-03-02 23:00:21 +0000	[diff] [blame]	2792	return (MO.getReg() == AMDGPU::VCC \|\| MO.getReg() == AMDGPU::M0 \|\|
				2793	(!MO.isImplicit() &&
				2794	(AMDGPU::SGPR_32RegClass.contains(MO.getReg()) \|\|
				2795	AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2796	}
				2797
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2798	static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
				2799	for (const MachineOperand &MO : MI.implicit_operands()) {
				2800	// We only care about reads.
				2801	if (MO.isDef())
				2802	continue;
				2803
				2804	switch (MO.getReg()) {
				2805	case AMDGPU::VCC:
				2806	case AMDGPU::M0:
				2807	case AMDGPU::FLAT_SCR:
				2808	return MO.getReg();
				2809
				2810	default:
				2811	break;
				2812	}
				2813	}
				2814
				2815	return AMDGPU::NoRegister;
				2816	}
				2817
Matt Arsenault	529cf25	2016-06-23 01:26:16 +0000	[diff] [blame]	2818	static bool shouldReadExec(const MachineInstr &MI) {
				2819	if (SIInstrInfo::isVALU(MI)) {
				2820	switch (MI.getOpcode()) {
				2821	case AMDGPU::V_READLANE_B32:
				2822	case AMDGPU::V_READLANE_B32_si:
				2823	case AMDGPU::V_READLANE_B32_vi:
				2824	case AMDGPU::V_WRITELANE_B32:
				2825	case AMDGPU::V_WRITELANE_B32_si:
				2826	case AMDGPU::V_WRITELANE_B32_vi:
				2827	return false;
				2828	}
				2829
				2830	return true;
				2831	}
				2832
				2833	if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) \|\|
				2834	SIInstrInfo::isSALU(MI) \|\|
				2835	SIInstrInfo::isSMRD(MI))
				2836	return false;
				2837
				2838	return true;
				2839	}
				2840
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	2841	static bool isSubRegOf(const SIRegisterInfo &TRI,
				2842	const MachineOperand &SuperVec,
				2843	const MachineOperand &SubReg) {
				2844	if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
				2845	return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
				2846
				2847	return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
				2848	SubReg.getReg() == SuperVec.getReg();
				2849	}
				2850
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2851	bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2852	StringRef &ErrInfo) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2853	uint16_t Opcode = MI.getOpcode();
Tom Stellard	dde28a8	2017-05-26 16:40:03 +0000	[diff] [blame]	2854	if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
				2855	return true;
				2856
Matt Arsenault	89ad17c	2017-06-12 16:37:55 +0000	[diff] [blame]	2857	const MachineFunction *MF = MI.getParent()->getParent();
				2858	const MachineRegisterInfo &MRI = MF->getRegInfo();
				2859
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2860	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
				2861	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
				2862	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
				2863
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2864	// Make sure the number of operands is correct.
				2865	const MCInstrDesc &Desc = get(Opcode);
				2866	if (!Desc.isVariadic() &&
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2867	Desc.getNumOperands() != MI.getNumExplicitOperands()) {
				2868	ErrInfo = "Instruction has wrong number of operands.";
				2869	return false;
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2870	}
				2871
Matt Arsenault	3d46319	2016-11-01 22:55:07 +0000	[diff] [blame]	2872	if (MI.isInlineAsm()) {
				2873	// Verify register classes for inlineasm constraints.
				2874	for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
				2875	I != E; ++I) {
				2876	const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
				2877	if (!RC)
				2878	continue;
				2879
				2880	const MachineOperand &Op = MI.getOperand(I);
				2881	if (!Op.isReg())
				2882	continue;
				2883
				2884	unsigned Reg = Op.getReg();
				2885	if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
				2886	ErrInfo = "inlineasm operand has incorrect register class.";
				2887	return false;
				2888	}
				2889	}
				2890
				2891	return true;
				2892	}
				2893
Changpeng Fang	c996393	2015-12-18 20:04:28 +0000	[diff] [blame]	2894	// Make sure the register classes are correct.
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	2895	for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2896	if (MI.getOperand(i).isFPImm()) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	2897	ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
				2898	"all fp values to integers.";
				2899	return false;
				2900	}
				2901
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	2902	int RegClass = Desc.OpInfo[i].RegClass;
				2903
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2904	switch (Desc.OpInfo[i].OperandType) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2905	case MCOI::OPERAND_REGISTER:
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2906	if (MI.getOperand(i).isImm()) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2907	ErrInfo = "Illegal immediate value for operand.";
				2908	return false;
				2909	}
				2910	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2911	case AMDGPU::OPERAND_REG_IMM_INT32:
				2912	case AMDGPU::OPERAND_REG_IMM_FP32:
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2913	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2914	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
				2915	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
				2916	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
				2917	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
				2918	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
				2919	case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
				2920	const MachineOperand &MO = MI.getOperand(i);
				2921	if (!MO.isReg() && (!MO.isImm() \|\| !isInlineConstant(MI, i))) {
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	2922	ErrInfo = "Illegal immediate value for operand.";
				2923	return false;
Tom Stellard	a305f93	2014-07-02 20:53:44 +0000	[diff] [blame]	2924	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2925	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2926	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2927	case MCOI::OPERAND_IMMEDIATE:
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	2928	case AMDGPU::OPERAND_KIMM32:
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2929	// Check if this operand is an immediate.
				2930	// FrameIndex operands will be replaced by immediates, so they are
				2931	// allowed.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2932	if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2933	ErrInfo = "Expected immediate, but got non-immediate";
				2934	return false;
				2935	}
Justin Bogner	b03fd12	2016-08-17 05:10:15 +0000	[diff] [blame]	2936	LLVM_FALLTHROUGH;
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2937	default:
				2938	continue;
				2939	}
				2940
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2941	if (!MI.getOperand(i).isReg())
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2942	continue;
				2943
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2944	if (RegClass != -1) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2945	unsigned Reg = MI.getOperand(i).getReg();
Matt Arsenault	1322b6f	2016-07-09 01:13:56 +0000	[diff] [blame]	2946	if (Reg == AMDGPU::NoRegister \|\|
				2947	TargetRegisterInfo::isVirtualRegister(Reg))
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2948	continue;
				2949
				2950	const TargetRegisterClass *RC = RI.getRegClass(RegClass);
				2951	if (!RC->contains(Reg)) {
				2952	ErrInfo = "Operand has incorrect register class.";
				2953	return false;
				2954	}
				2955	}
				2956	}
				2957
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2958	// Verify SDWA
				2959	if (isSDWA(MI)) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2960	if (!ST.hasSDWA()) {
				2961	ErrInfo = "SDWA is not supported on this target";
				2962	return false;
				2963	}
				2964
				2965	int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2966
				2967	const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
				2968
				2969	for (int OpIdx: OpIndicies) {
				2970	if (OpIdx == -1)
				2971	continue;
				2972	const MachineOperand &MO = MI.getOperand(OpIdx);
				2973
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	2974	if (!ST.hasSDWAScalar()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2975	// Only VGPRS on VI
				2976	if (!MO.isReg() \|\| !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
				2977	ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
				2978	return false;
				2979	}
				2980	} else {
				2981	// No immediates on GFX9
				2982	if (!MO.isReg()) {
				2983	ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
				2984	return false;
				2985	}
				2986	}
				2987	}
				2988
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	2989	if (!ST.hasSDWAOmod()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2990	// No omod allowed on VI
				2991	const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
				2992	if (OMod != nullptr &&
				2993	(!OMod->isImm() \|\| OMod->getImm() != 0)) {
				2994	ErrInfo = "OMod not allowed in SDWA instructions on VI";
				2995	return false;
				2996	}
				2997	}
				2998
				2999	uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
				3000	if (isVOPC(BasicOpcode)) {
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	3001	if (!ST.hasSDWASdst() && DstIdx != -1) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3002	// Only vcc allowed as dst on VI for VOPC
				3003	const MachineOperand &Dst = MI.getOperand(DstIdx);
				3004	if (!Dst.isReg() \|\| Dst.getReg() != AMDGPU::VCC) {
				3005	ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
				3006	return false;
				3007	}
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	3008	} else if (!ST.hasSDWAOutModsVOPC()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3009	// No clamp allowed on GFX9 for VOPC
				3010	const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	3011	if (Clamp && (!Clamp->isImm() \|\| Clamp->getImm() != 0)) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3012	ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
				3013	return false;
				3014	}
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	3015
				3016	// No omod allowed on GFX9 for VOPC
				3017	const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
				3018	if (OMod && (!OMod->isImm() \|\| OMod->getImm() != 0)) {
				3019	ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
				3020	return false;
				3021	}
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3022	}
				3023	}
Sam Kolton	5f7f32c	2017-12-04 16:22:32 +0000	[diff] [blame]	3024
				3025	const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused);
				3026	if (DstUnused && DstUnused->isImm() &&
				3027	DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) {
				3028	const MachineOperand &Dst = MI.getOperand(DstIdx);
				3029	if (!Dst.isReg() \|\| !Dst.isTied()) {
				3030	ErrInfo = "Dst register should have tied register";
				3031	return false;
				3032	}
				3033
				3034	const MachineOperand &TiedMO =
				3035	MI.getOperand(MI.findTiedOperandIdx(DstIdx));
				3036	if (!TiedMO.isReg() \|\| !TiedMO.isImplicit() \|\| !TiedMO.isUse()) {
				3037	ErrInfo =
				3038	"Dst register should be tied to implicit use of preserved register";
				3039	return false;
				3040	} else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
				3041	Dst.getReg() != TiedMO.getReg()) {
				3042	ErrInfo = "Dst register should use same physical register as preserved";
				3043	return false;
				3044	}
				3045	}
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3046	}
				3047
David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	3048	// Verify MIMG
				3049	if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
				3050	// Ensure that the return type used is large enough for all the options
				3051	// being used TFE/LWE require an extra result register.
				3052	const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
				3053	if (DMask) {
				3054	uint64_t DMaskImm = DMask->getImm();
				3055	uint32_t RegCount =
				3056	isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm);
				3057	const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe);
				3058	const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe);
				3059	const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16);
				3060
				3061	// Adjust for packed 16 bit values
				3062	if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
				3063	RegCount >>= 1;
				3064
				3065	// Adjust if using LWE or TFE
				3066	if ((LWE && LWE->getImm()) \|\| (TFE && TFE->getImm()))
				3067	RegCount += 1;
				3068
				3069	const uint32_t DstIdx =
				3070	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
				3071	const MachineOperand &Dst = MI.getOperand(DstIdx);
				3072	if (Dst.isReg()) {
				3073	const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
				3074	uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
				3075	if (RegCount > DstSize) {
				3076	ErrInfo = "MIMG instruction returns too many registers for dst "
				3077	"register class";
				3078	return false;
				3079	}
				3080	}
				3081	}
				3082	}
				3083
Tim Renouf	2a99fa2	2018-02-28 19:10:32 +0000	[diff] [blame]	3084	// Verify VOP*. Ignore multiple sgpr operands on writelane.
				3085	if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
				3086	&& (isVOP1(MI) \|\| isVOP2(MI) \|\| isVOP3(MI) \|\| isVOPC(MI) \|\| isSDWA(MI))) {
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	3087	// Only look at the true operands. Only a real operand can use the constant
				3088	// bus, and we don't want to check pseudo-operands like the source modifier
				3089	// flags.
				3090	const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
				3091
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3092	unsigned ConstantBusCount = 0;
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3093	unsigned LiteralCount = 0;
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3094
				3095	if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
				3096	++ConstantBusCount;
				3097
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3098	unsigned SGPRUsed = findImplicitSGPRRead(MI);
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	3099	if (SGPRUsed != AMDGPU::NoRegister)
				3100	++ConstantBusCount;
				3101
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	3102	for (int OpIdx : OpIndices) {
				3103	if (OpIdx == -1)
				3104	break;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3105	const MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3106	if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3107	if (MO.isReg()) {
				3108	if (MO.getReg() != SGPRUsed)
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3109	++ConstantBusCount;
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3110	SGPRUsed = MO.getReg();
				3111	} else {
				3112	++ConstantBusCount;
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3113	++LiteralCount;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3114	}
				3115	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3116	}
				3117	if (ConstantBusCount > 1) {
				3118	ErrInfo = "VOP* instruction uses the constant bus more than once";
				3119	return false;
				3120	}
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3121
				3122	if (isVOP3(MI) && LiteralCount) {
				3123	ErrInfo = "VOP3 instruction uses literal";
				3124	return false;
				3125	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3126	}
				3127
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	3128	// Verify misc. restrictions on specific instructions.
				3129	if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 \|\|
				3130	Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3131	const MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3132	const MachineOperand &Src1 = MI.getOperand(Src1Idx);
				3133	const MachineOperand &Src2 = MI.getOperand(Src2Idx);
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	3134	if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
				3135	if (!compareMachineOp(Src0, Src1) &&
				3136	!compareMachineOp(Src0, Src2)) {
				3137	ErrInfo = "v_div_scale_{f32\|f64} require src0 = src1 or src2";
				3138	return false;
				3139	}
				3140	}
				3141	}
				3142
Matt Arsenault	7ccf6cd	2016-09-16 21:41:16 +0000	[diff] [blame]	3143	if (isSOPK(MI)) {
				3144	int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
				3145	if (sopkIsZext(MI)) {
				3146	if (!isUInt<16>(Imm)) {
				3147	ErrInfo = "invalid immediate for SOPK instruction";
				3148	return false;
				3149	}
				3150	} else {
				3151	if (!isInt<16>(Imm)) {
				3152	ErrInfo = "invalid immediate for SOPK instruction";
				3153	return false;
				3154	}
				3155	}
				3156	}
				3157
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	3158	if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 \|\|
				3159	Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 \|\|
				3160	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 \|\|
				3161	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
				3162	const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 \|\|
				3163	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
				3164
				3165	const unsigned StaticNumOps = Desc.getNumOperands() +
				3166	Desc.getNumImplicitUses();
				3167	const unsigned NumImplicitOps = IsDst ? 2 : 1;
				3168
Nicolai Haehnle	368972c	2016-11-02 17:03:11 +0000	[diff] [blame]	3169	// Allow additional implicit operands. This allows a fixup done by the post
				3170	// RA scheduler where the main implicit operand is killed and implicit-defs
				3171	// are added for sub-registers that remain live after this instruction.
				3172	if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	3173	ErrInfo = "missing implicit register operands";
				3174	return false;
				3175	}
				3176
				3177	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
				3178	if (IsDst) {
				3179	if (!Dst->isUse()) {
				3180	ErrInfo = "v_movreld_b32 vdst should be a use operand";
				3181	return false;
				3182	}
				3183
				3184	unsigned UseOpIdx;
				3185	if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) \|\|
				3186	UseOpIdx != StaticNumOps + 1) {
				3187	ErrInfo = "movrel implicit operands should be tied";
				3188	return false;
				3189	}
				3190	}
				3191
				3192	const MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3193	const MachineOperand &ImpUse
				3194	= MI.getOperand(StaticNumOps + NumImplicitOps - 1);
				3195	if (!ImpUse.isReg() \|\| !ImpUse.isUse() \|\|
				3196	!isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
				3197	ErrInfo = "src0 should be subreg of implicit vector use";
				3198	return false;
				3199	}
				3200	}
				3201
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	3202	// Make sure we aren't losing exec uses in the td files. This mostly requires
				3203	// being careful when using let Uses to try to add other use registers.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3204	if (shouldReadExec(MI)) {
				3205	if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	3206	ErrInfo = "VALU instruction does not implicitly read exec mask";
				3207	return false;
				3208	}
				3209	}
				3210
Matt Arsenault	7b64755	2016-10-28 21:55:15 +0000	[diff] [blame]	3211	if (isSMRD(MI)) {
				3212	if (MI.mayStore()) {
				3213	// The register offset form of scalar stores may only use m0 as the
				3214	// soffset register.
				3215	const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
				3216	if (Soff && Soff->getReg() != AMDGPU::M0) {
				3217	ErrInfo = "scalar stores must use m0 as offset register";
				3218	return false;
				3219	}
				3220	}
				3221	}
				3222
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	3223	if (isFLAT(MI) && !MF->getSubtarget<GCNSubtarget>().hasFlatInstOffsets()) {
Matt Arsenault	89ad17c	2017-06-12 16:37:55 +0000	[diff] [blame]	3224	const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				3225	if (Offset->getImm() != 0) {
				3226	ErrInfo = "subtarget does not support offsets in flat instructions";
				3227	return false;
				3228	}
				3229	}
				3230
Stanislav Mekhanoshin	4329361	2018-05-08 16:53:02 +0000	[diff] [blame]	3231	const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl);
				3232	if (DppCt) {
				3233	using namespace AMDGPU::DPP;
				3234
				3235	unsigned DC = DppCt->getImm();
				3236	if (DC == DppCtrl::DPP_UNUSED1 \|\| DC == DppCtrl::DPP_UNUSED2 \|\|
				3237	DC == DppCtrl::DPP_UNUSED3 \|\| DC > DppCtrl::DPP_LAST \|\|
				3238	(DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) \|\|
				3239	(DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) \|\|
				3240	(DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) \|\|
				3241	(DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST)) {
				3242	ErrInfo = "Invalid dpp_ctrl value";
				3243	return false;
				3244	}
				3245	}
				3246
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3247	return true;
				3248	}
				3249
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3250	unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3251	switch (MI.getOpcode()) {
				3252	default: return AMDGPU::INSTRUCTION_LIST_END;
				3253	case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
				3254	case AMDGPU::COPY: return AMDGPU::COPY;
				3255	case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard	204e61b	2014-04-07 19:45:45 +0000	[diff] [blame]	3256	case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Connor Abbott	8c217d0	2017-08-04 18:36:49 +0000	[diff] [blame]	3257	case AMDGPU::WQM: return AMDGPU::WQM;
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	3258	case AMDGPU::WWM: return AMDGPU::WWM;
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	3259	case AMDGPU::S_MOV_B32:
				3260	return MI.getOperand(1).isReg() ?
Tom Stellard	8c12fd9	2014-03-24 16:12:34 +0000	[diff] [blame]	3261	AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	3262	case AMDGPU::S_ADD_I32:
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3263	return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
				3264	case AMDGPU::S_ADDC_U32:
				3265	return AMDGPU::V_ADDC_U32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	3266	case AMDGPU::S_SUB_I32:
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3267	return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
				3268	// FIXME: These are not consistently handled, and selected when the carry is
				3269	// used.
				3270	case AMDGPU::S_ADD_U32:
				3271	return AMDGPU::V_ADD_I32_e32;
				3272	case AMDGPU::S_SUB_U32:
				3273	return AMDGPU::V_SUB_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	3274	case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault	869cd07	2014-09-03 23:24:35 +0000	[diff] [blame]	3275	case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Michael Liao	efb4f9e	2019-03-18 20:40:09 +0000	[diff] [blame]	3276	case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32;
				3277	case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32;
Matt Arsenault	124384f	2016-09-09 23:32:53 +0000	[diff] [blame]	3278	case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
				3279	case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
				3280	case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	3281	case AMDGPU::S_XNOR_B32:
				3282	return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
Matt Arsenault	124384f	2016-09-09 23:32:53 +0000	[diff] [blame]	3283	case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
				3284	case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
				3285	case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
				3286	case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3287	case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
				3288	case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
				3289	case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
				3290	case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
				3291	case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
				3292	case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	3293	case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
				3294	case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	3295	case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
				3296	case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak	63a7b08	2015-03-24 13:40:21 +0000	[diff] [blame]	3297	case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault	43160e7	2014-06-18 17:13:57 +0000	[diff] [blame]	3298	case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault	2c33562	2014-04-09 07:16:16 +0000	[diff] [blame]	3299	case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	3300	case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	0cb92e1	2014-04-11 19:25:18 +0000	[diff] [blame]	3301	case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
				3302	case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
				3303	case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
				3304	case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
				3305	case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
				3306	case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	3307	case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
				3308	case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
				3309	case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
				3310	case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
				3311	case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
				3312	case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Matt Arsenault	7b1dc2c	2016-09-17 02:02:19 +0000	[diff] [blame]	3313	case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
				3314	case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	3315	case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault	295b86e	2014-06-17 17:36:27 +0000	[diff] [blame]	3316	case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault	8579601	2014-06-17 17:36:24 +0000	[diff] [blame]	3317	case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsak	d2af89d	2015-03-04 17:33:45 +0000	[diff] [blame]	3318	case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	3319	case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
				3320	case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3321	}
Michael Liao	efb4f9e	2019-03-18 20:40:09 +0000	[diff] [blame]	3322	llvm_unreachable(
				3323	"Unexpected scalar opcode without corresponding vector one!");
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3324	}
				3325
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3326	const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
				3327	unsigned OpNo) const {
				3328	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				3329	const MCInstrDesc &Desc = get(MI.getOpcode());
				3330	if (MI.isVariadic() \|\| OpNo >= Desc.getNumOperands() \|\|
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	3331	Desc.OpInfo[OpNo].RegClass == -1) {
				3332	unsigned Reg = MI.getOperand(OpNo).getReg();
				3333
				3334	if (TargetRegisterInfo::isVirtualRegister(Reg))
				3335	return MRI.getRegClass(Reg);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	3336	return RI.getPhysRegClass(Reg);
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	3337	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3338
				3339	unsigned RCID = Desc.OpInfo[OpNo].RegClass;
				3340	return RI.getRegClass(RCID);
				3341	}
				3342
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3343	void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3344	MachineBasicBlock::iterator I = MI;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3345	MachineBasicBlock *MBB = MI.getParent();
				3346	MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3347	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3348	unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3349	const TargetRegisterClass *RC = RI.getRegClass(RCID);
				3350	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3351	if (MO.isReg())
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3352	Opcode = AMDGPU::COPY;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3353	else if (RI.isSGPRClass(RC))
Matt Arsenault	671a005	2013-11-14 10:08:50 +0000	[diff] [blame]	3354	Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3355
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	3356	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3357	if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard	0c93c9e	2014-09-05 14:08:01 +0000	[diff] [blame]	3358	VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3359	else
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	3360	VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3361
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	3362	unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3363	DebugLoc DL = MBB->findDebugLoc(I);
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	3364	BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3365	MO.ChangeToRegister(Reg, false);
				3366	}
				3367
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3368	unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
				3369	MachineRegisterInfo &MRI,
				3370	MachineOperand &SuperReg,
				3371	const TargetRegisterClass *SuperRC,
				3372	unsigned SubIdx,
				3373	const TargetRegisterClass *SubRC)
				3374	const {
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3375	MachineBasicBlock *MBB = MI->getParent();
				3376	DebugLoc DL = MI->getDebugLoc();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3377	unsigned SubReg = MRI.createVirtualRegister(SubRC);
				3378
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3379	if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
				3380	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				3381	.addReg(SuperReg.getReg(), 0, SubIdx);
				3382	return SubReg;
				3383	}
				3384
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3385	// Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault	08d8494	2014-06-03 23:06:13 +0000	[diff] [blame]	3386	// value so we don't need to worry about merging its subreg index with the
				3387	// SubIdx passed to this function. The register coalescer should be able to
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3388	// eliminate this extra copy.
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3389	unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3390
Matt Arsenault	7480a0e	2014-11-17 21:11:37 +0000	[diff] [blame]	3391	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
				3392	.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
				3393
				3394	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				3395	.addReg(NewSuperReg, 0, SubIdx);
				3396
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3397	return SubReg;
				3398	}
				3399
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3400	MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
				3401	MachineBasicBlock::iterator MII,
				3402	MachineRegisterInfo &MRI,
				3403	MachineOperand &Op,
				3404	const TargetRegisterClass *SuperRC,
				3405	unsigned SubIdx,
				3406	const TargetRegisterClass *SubRC) const {
				3407	if (Op.isImm()) {
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3408	if (SubIdx == AMDGPU::sub0)
Matt Arsenault	d745c28	2016-09-08 17:44:36 +0000	[diff] [blame]	3409	return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3410	if (SubIdx == AMDGPU::sub1)
Matt Arsenault	d745c28	2016-09-08 17:44:36 +0000	[diff] [blame]	3411	return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3412
				3413	llvm_unreachable("Unhandled register index for immediate");
				3414	}
				3415
				3416	unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
				3417	SubIdx, SubRC);
				3418	return MachineOperand::CreateReg(SubReg, false);
				3419	}
				3420
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	3421	// Change the order of operands from (0, 1, 2) to (0, 2, 1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3422	void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
				3423	assert(Inst.getNumExplicitOperands() == 3);
				3424	MachineOperand Op1 = Inst.getOperand(1);
				3425	Inst.RemoveOperand(1);
				3426	Inst.addOperand(Op1);
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	3427	}
				3428
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3429	bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
				3430	const MCOperandInfo &OpInfo,
				3431	const MachineOperand &MO) const {
				3432	if (!MO.isReg())
				3433	return false;
				3434
				3435	unsigned Reg = MO.getReg();
				3436	const TargetRegisterClass *RC =
				3437	TargetRegisterInfo::isVirtualRegister(Reg) ?
				3438	MRI.getRegClass(Reg) :
				3439	RI.getPhysRegClass(Reg);
				3440
Nicolai Haehnle	82fc962	2016-01-07 17:10:29 +0000	[diff] [blame]	3441	const SIRegisterInfo *TRI =
				3442	static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
				3443	RC = TRI->getSubRegClass(RC, MO.getSubReg());
				3444
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3445	// In order to be legal, the common sub-class must be equal to the
				3446	// class of the current operand. For example:
				3447	//
Sam Kolton	1eeb11b	2016-09-09 14:44:04 +0000	[diff] [blame]	3448	// v_mov_b32 s0 ; Operand defined as vsrc_b32
				3449	// ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3450	//
				3451	// s_sendmsg 0, s0 ; Operand defined as m0reg
				3452	// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
				3453
				3454	return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
				3455	}
				3456
				3457	bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
				3458	const MCOperandInfo &OpInfo,
				3459	const MachineOperand &MO) const {
				3460	if (MO.isReg())
				3461	return isLegalRegOperand(MRI, OpInfo, MO);
				3462
				3463	// Handle non-register types that are treated like immediates.
				3464	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
				3465	return true;
				3466	}
				3467
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3468	bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3469	const MachineOperand *MO) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3470	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				3471	const MCInstrDesc &InstDesc = MI.getDesc();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3472	const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
				3473	const TargetRegisterClass *DefinedRC =
				3474	OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
				3475	if (!MO)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3476	MO = &MI.getOperand(OpIdx);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3477
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3478	if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	3479
				3480	RegSubRegPair SGPRUsed;
				3481	if (MO->isReg())
				3482	SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
				3483
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3484	for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3485	if (i == OpIdx)
				3486	continue;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3487	const MachineOperand &Op = MI.getOperand(i);
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3488	if (Op.isReg()) {
				3489	if ((Op.getReg() != SGPRUsed.Reg \|\| Op.getSubReg() != SGPRUsed.SubReg) &&
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3490	usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3491	return false;
				3492	}
				3493	} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3494	return false;
				3495	}
				3496	}
				3497	}
				3498
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3499	if (MO->isReg()) {
				3500	assert(DefinedRC);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3501	return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3502	}
				3503
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3504	// Handle non-register types that are treated like immediates.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	3505	assert(MO->isImm() \|\| MO->isTargetIndex() \|\| MO->isFI());
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3506
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	3507	if (!DefinedRC) {
				3508	// This operand expects an immediate.
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3509	return true;
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	3510	}
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3511
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3512	return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3513	}
				3514
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3515	void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3516	MachineInstr &MI) const {
				3517	unsigned Opc = MI.getOpcode();
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3518	const MCInstrDesc &InstrDesc = get(Opc);
				3519
				3520	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3521	MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3522
				3523	// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
				3524	// we need to only have one constant bus use.
				3525	//
				3526	// Note we do not need to worry about literal constants here. They are
				3527	// disabled for the operand type for instructions because they will always
				3528	// violate the one constant bus use rule.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3529	bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3530	if (HasImplicitSGPR) {
				3531	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3532	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3533
				3534	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
				3535	legalizeOpWithMove(MI, Src0Idx);
				3536	}
				3537
Tim Renouf	2a99fa2	2018-02-28 19:10:32 +0000	[diff] [blame]	3538	// Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for
				3539	// both the value to write (src0) and lane select (src1). Fix up non-SGPR
				3540	// src0/src1 with V_READFIRSTLANE.
				3541	if (Opc == AMDGPU::V_WRITELANE_B32) {
				3542	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				3543	MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3544	const DebugLoc &DL = MI.getDebugLoc();
				3545	if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
				3546	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3547	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3548	.add(Src0);
				3549	Src0.ChangeToRegister(Reg, false);
				3550	}
				3551	if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) {
				3552	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3553	const DebugLoc &DL = MI.getDebugLoc();
				3554	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3555	.add(Src1);
				3556	Src1.ChangeToRegister(Reg, false);
				3557	}
				3558	return;
				3559	}
				3560
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3561	// VOP2 src0 instructions support all operand types, so we don't need to check
				3562	// their legality. If src1 is already legal, we don't need to do anything.
				3563	if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
				3564	return;
				3565
Nicolai Haehnle	5dea645	2017-04-24 17:17:36 +0000	[diff] [blame]	3566	// Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
				3567	// lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
				3568	// select is uniform.
				3569	if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
				3570	RI.isVGPR(MRI, Src1.getReg())) {
				3571	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3572	const DebugLoc &DL = MI.getDebugLoc();
				3573	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3574	.add(Src1);
				3575	Src1.ChangeToRegister(Reg, false);
				3576	return;
				3577	}
				3578
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3579	// We do not use commuteInstruction here because it is too aggressive and will
				3580	// commute if it is possible. We only want to commute here if it improves
				3581	// legality. This can be called a fairly large number of times so don't waste
				3582	// compile time pointlessly swapping and checking legality again.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3583	if (HasImplicitSGPR \|\| !MI.isCommutable()) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3584	legalizeOpWithMove(MI, Src1Idx);
				3585	return;
				3586	}
				3587
				3588	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3589	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3590
				3591	// If src0 can be used as src1, commuting will make the operands legal.
				3592	// Otherwise we have to give up and insert a move.
				3593	//
				3594	// TODO: Other immediate-like operand kinds could be commuted if there was a
				3595	// MachineOperand::ChangeTo* for them.
				3596	if ((!Src1.isImm() && !Src1.isReg()) \|\|
				3597	!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
				3598	legalizeOpWithMove(MI, Src1Idx);
				3599	return;
				3600	}
				3601
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3602	int CommutedOpc = commuteOpcode(MI);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3603	if (CommutedOpc == -1) {
				3604	legalizeOpWithMove(MI, Src1Idx);
				3605	return;
				3606	}
				3607
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3608	MI.setDesc(get(CommutedOpc));
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3609
				3610	unsigned Src0Reg = Src0.getReg();
				3611	unsigned Src0SubReg = Src0.getSubReg();
				3612	bool Src0Kill = Src0.isKill();
				3613
				3614	if (Src1.isImm())
				3615	Src0.ChangeToImmediate(Src1.getImm());
				3616	else if (Src1.isReg()) {
				3617	Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
				3618	Src0.setSubReg(Src1.getSubReg());
				3619	} else
				3620	llvm_unreachable("Should only have register or immediate operands");
				3621
				3622	Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
				3623	Src1.setSubReg(Src0SubReg);
				3624	}
				3625
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3626	// Legalize VOP3 operands. Because all operand types are supported for any
				3627	// operand, and since literal constants are not allowed and should never be
				3628	// seen, we only need to worry about inserting copies if we use multiple SGPR
				3629	// operands.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3630	void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
				3631	MachineInstr &MI) const {
				3632	unsigned Opc = MI.getOpcode();
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3633
				3634	int VOP3Idx[3] = {
				3635	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
				3636	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
				3637	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
				3638	};
				3639
				3640	// Find the one SGPR operand we are allowed to use.
				3641	unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
				3642
				3643	for (unsigned i = 0; i < 3; ++i) {
				3644	int Idx = VOP3Idx[i];
				3645	if (Idx == -1)
				3646	break;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3647	MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3648
				3649	// We should never see a VOP3 instruction with an illegal immediate operand.
				3650	if (!MO.isReg())
				3651	continue;
				3652
				3653	if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
				3654	continue; // VGPRs are legal
				3655
				3656	if (SGPRReg == AMDGPU::NoRegister \|\| SGPRReg == MO.getReg()) {
				3657	SGPRReg = MO.getReg();
				3658	// We can use one SGPR in each VOP3 instruction.
				3659	continue;
				3660	}
				3661
				3662	// If we make it this far, then the operand is not legal and we must
				3663	// legalize it.
				3664	legalizeOpWithMove(MI, Idx);
				3665	}
				3666	}
				3667
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3668	unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
				3669	MachineRegisterInfo &MRI) const {
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3670	const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
				3671	const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
				3672	unsigned DstReg = MRI.createVirtualRegister(SRC);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	3673	unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3674
Nicolai Haehnle	7a87977	2018-04-20 07:14:25 +0000	[diff] [blame]	3675	if (SubRegs == 1) {
				3676	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
				3677	get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
				3678	.addReg(SrcReg);
				3679	return DstReg;
				3680	}
				3681
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3682	SmallVector<unsigned, 8> SRegs;
				3683	for (unsigned i = 0; i < SubRegs; ++i) {
				3684	unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3685	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3686	get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3687	.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3688	SRegs.push_back(SGPR);
				3689	}
				3690
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3691	MachineInstrBuilder MIB =
				3692	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
				3693	get(AMDGPU::REG_SEQUENCE), DstReg);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3694	for (unsigned i = 0; i < SubRegs; ++i) {
				3695	MIB.addReg(SRegs[i]);
				3696	MIB.addImm(RI.getSubRegFromChannel(i));
				3697	}
				3698	return DstReg;
				3699	}
				3700
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3701	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3702	MachineInstr &MI) const {
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3703
				3704	// If the pointer is store in VGPRs, then we need to move them to
				3705	// SGPRs using v_readfirstlane. This is safe because we only select
				3706	// loads with uniform pointers to SMRD instruction so we know the
				3707	// pointer value is uniform.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3708	MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3709	if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
Nicolai Haehnle	a7b0005	2018-11-30 22:55:38 +0000	[diff] [blame]	3710	unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
				3711	SBase->setReg(SGPR);
				3712	}
				3713	MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
				3714	if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
				3715	unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
				3716	SOff->setReg(SGPR);
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3717	}
				3718	}
				3719
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3720	void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
				3721	MachineBasicBlock::iterator I,
				3722	const TargetRegisterClass *DstRC,
				3723	MachineOperand &Op,
				3724	MachineRegisterInfo &MRI,
				3725	const DebugLoc &DL) const {
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3726	unsigned OpReg = Op.getReg();
				3727	unsigned OpSubReg = Op.getSubReg();
				3728
				3729	const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
				3730	RI.getRegClassForReg(MRI, OpReg), OpSubReg);
				3731
				3732	// Check if operand is already the correct register class.
				3733	if (DstRC == OpRC)
				3734	return;
				3735
				3736	unsigned DstReg = MRI.createVirtualRegister(DstRC);
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	3737	MachineInstr *Copy =
				3738	BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3739
				3740	Op.setReg(DstReg);
				3741	Op.setSubReg(0);
				3742
				3743	MachineInstr *Def = MRI.getVRegDef(OpReg);
				3744	if (!Def)
				3745	return;
				3746
				3747	// Try to eliminate the copy if it is copying an immediate value.
				3748	if (Def->isMoveImmediate())
				3749	FoldImmediate(Copy, Def, OpReg, &MRI);
				3750	}
				3751
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	3752	// Emit the actual waterfall loop, executing the wrapped instruction for each
				3753	// unique value of \p Rsrc across all lanes. In the best case we execute 1
				3754	// iteration, in the worst case we execute 64 (once per lane).
				3755	static void
				3756	emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
				3757	MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
				3758	const DebugLoc &DL, MachineOperand &Rsrc) {
				3759	MachineBasicBlock::iterator I = LoopBB.begin();
				3760
				3761	unsigned VRsrc = Rsrc.getReg();
				3762	unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
				3763
				3764	unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3765	unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3766	unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3767	unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3768	unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3769	unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3770	unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3771	unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3772	unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
				3773
				3774	// Beginning of the loop, read the next Rsrc variant.
				3775	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
				3776	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
				3777	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
				3778	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
				3779	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
				3780	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
				3781	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
				3782	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
				3783
				3784	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
				3785	.addReg(SRsrcSub0)
				3786	.addImm(AMDGPU::sub0)
				3787	.addReg(SRsrcSub1)
				3788	.addImm(AMDGPU::sub1)
				3789	.addReg(SRsrcSub2)
				3790	.addImm(AMDGPU::sub2)
				3791	.addReg(SRsrcSub3)
				3792	.addImm(AMDGPU::sub3);
				3793
				3794	// Update Rsrc operand to use the SGPR Rsrc.
				3795	Rsrc.setReg(SRsrc);
				3796	Rsrc.setIsKill(true);
				3797
				3798	// Identify all lanes with identical Rsrc operands in their VGPRs.
				3799	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
				3800	.addReg(SRsrc, 0, AMDGPU::sub0_sub1)
				3801	.addReg(VRsrc, 0, AMDGPU::sub0_sub1);
				3802	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
				3803	.addReg(SRsrc, 0, AMDGPU::sub2_sub3)
				3804	.addReg(VRsrc, 0, AMDGPU::sub2_sub3);
				3805	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
				3806	.addReg(CondReg0)
				3807	.addReg(CondReg1);
				3808
				3809	MRI.setSimpleHint(SaveExec, AndCond);
				3810
				3811	// Update EXEC to matching lanes, saving original to SaveExec.
				3812	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
				3813	.addReg(AndCond, RegState::Kill);
				3814
				3815	// The original instruction is here; we insert the terminators after it.
				3816	I = LoopBB.end();
				3817
				3818	// Update EXEC, switch all done bits to 0 and all todo bits to 1.
				3819	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
				3820	.addReg(AMDGPU::EXEC)
				3821	.addReg(SaveExec);
				3822	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
				3823	}
				3824
				3825	// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
				3826	// with SGPRs by iterating over all unique values across all lanes.
				3827	static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
				3828	MachineOperand &Rsrc, MachineDominatorTree *MDT) {
				3829	MachineBasicBlock &MBB = *MI.getParent();
				3830	MachineFunction &MF = *MBB.getParent();
				3831	MachineRegisterInfo &MRI = MF.getRegInfo();
				3832	MachineBasicBlock::iterator I(&MI);
				3833	const DebugLoc &DL = MI.getDebugLoc();
				3834
				3835	unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				3836
				3837	// Save the EXEC mask
				3838	BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
				3839	.addReg(AMDGPU::EXEC);
				3840
				3841	// Killed uses in the instruction we are waterfalling around will be
				3842	// incorrect due to the added control-flow.
				3843	for (auto &MO : MI.uses()) {
				3844	if (MO.isReg() && MO.isUse()) {
				3845	MRI.clearKillFlags(MO.getReg());
				3846	}
				3847	}
				3848
				3849	// To insert the loop we need to split the block. Move everything after this
				3850	// point to a new block, and insert a new empty block between the two.
				3851	MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
				3852	MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
				3853	MachineFunction::iterator MBBI(MBB);
				3854	++MBBI;
				3855
				3856	MF.insert(MBBI, LoopBB);
				3857	MF.insert(MBBI, RemainderBB);
				3858
				3859	LoopBB->addSuccessor(LoopBB);
				3860	LoopBB->addSuccessor(RemainderBB);
				3861
				3862	// Move MI to the LoopBB, and the remainder of the block to RemainderBB.
				3863	MachineBasicBlock::iterator J = I++;
				3864	RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
				3865	RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
				3866	LoopBB->splice(LoopBB->begin(), &MBB, J);
				3867
				3868	MBB.addSuccessor(LoopBB);
				3869
				3870	// Update dominators. We know that MBB immediately dominates LoopBB, that
				3871	// LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
				3872	// dominates all of the successors transferred to it from MBB that MBB used
				3873	// to dominate.
				3874	if (MDT) {
				3875	MDT->addNewBlock(LoopBB, &MBB);
				3876	MDT->addNewBlock(RemainderBB, LoopBB);
				3877	for (auto &Succ : RemainderBB->successors()) {
				3878	if (MDT->dominates(&MBB, Succ)) {
				3879	MDT->changeImmediateDominator(Succ, RemainderBB);
				3880	}
				3881	}
				3882	}
				3883
				3884	emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
				3885
				3886	// Restore the EXEC mask
				3887	MachineBasicBlock::iterator First = RemainderBB->begin();
				3888	BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
				3889	.addReg(SaveExec);
				3890	}
				3891
				3892	// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
				3893	static std::tuple<unsigned, unsigned>
				3894	extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
				3895	MachineBasicBlock &MBB = *MI.getParent();
				3896	MachineFunction &MF = *MBB.getParent();
				3897	MachineRegisterInfo &MRI = MF.getRegInfo();
				3898
				3899	// Extract the ptr from the resource descriptor.
				3900	unsigned RsrcPtr =
				3901	TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
				3902	AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
				3903
				3904	// Create an empty resource descriptor
				3905	unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3906	unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3907	unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3908	unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
				3909	uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
				3910
				3911	// Zero64 = 0
				3912	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64)
				3913	.addImm(0);
				3914
				3915	// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
				3916	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
				3917	.addImm(RsrcDataFormat & 0xFFFFFFFF);
				3918
				3919	// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
				3920	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
				3921	.addImm(RsrcDataFormat >> 32);
				3922
				3923	// NewSRsrc = {Zero64, SRsrcFormat}
				3924	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc)
				3925	.addReg(Zero64)
				3926	.addImm(AMDGPU::sub0_sub1)
				3927	.addReg(SRsrcFormatLo)
				3928	.addImm(AMDGPU::sub2)
				3929	.addReg(SRsrcFormatHi)
				3930	.addImm(AMDGPU::sub3);
				3931
				3932	return std::make_tuple(RsrcPtr, NewSRsrc);
				3933	}
				3934
				3935	void SIInstrInfo::legalizeOperands(MachineInstr &MI,
				3936	MachineDominatorTree *MDT) const {
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	3937	MachineFunction &MF = *MI.getParent()->getParent();
				3938	MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3939
				3940	// Legalize VOP2
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3941	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3942	legalizeOperandsVOP2(MRI, MI);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3943	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3944	}
				3945
				3946	// Legalize VOP3
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3947	if (isVOP3(MI)) {
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3948	legalizeOperandsVOP3(MRI, MI);
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	3949	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3950	}
				3951
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3952	// Legalize SMRD
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3953	if (isSMRD(MI)) {
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3954	legalizeOperandsSMRD(MRI, MI);
				3955	return;
				3956	}
				3957
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	3958	// Legalize REG_SEQUENCE and PHI
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3959	// The register class of the operands much be the same type as the register
				3960	// class of the output.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3961	if (MI.getOpcode() == AMDGPU::PHI) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	3962	const TargetRegisterClass RC = nullptr, SRC = nullptr, *VRC = nullptr;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3963	for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
				3964	if (!MI.getOperand(i).isReg() \|\|
				3965	!TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3966	continue;
				3967	const TargetRegisterClass *OpRC =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3968	MRI.getRegClass(MI.getOperand(i).getReg());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3969	if (RI.hasVGPRs(OpRC)) {
				3970	VRC = OpRC;
				3971	} else {
				3972	SRC = OpRC;
				3973	}
				3974	}
				3975
				3976	// If any of the operands are VGPR registers, then they all most be
				3977	// otherwise we will create illegal VGPR->SGPR copies when legalizing
				3978	// them.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3979	if (VRC \|\| !RI.isSGPRClass(getOpRegClass(MI, 0))) {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3980	if (!VRC) {
				3981	assert(SRC);
				3982	VRC = RI.getEquivalentVGPRClass(SRC);
				3983	}
				3984	RC = VRC;
				3985	} else {
				3986	RC = SRC;
				3987	}
				3988
				3989	// Update all the operands so they have the same type.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3990	for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
				3991	MachineOperand &Op = MI.getOperand(I);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3992	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3993	continue;
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3994
				3995	// MI is a PHI instruction.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3996	MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	3997	MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
				3998
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3999	// Avoid creating no-op copies with the same src and dst reg class. These
				4000	// confuse some of the machine passes.
				4001	legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4002	}
				4003	}
				4004
				4005	// REG_SEQUENCE doesn't really require operand legalization, but if one has a
				4006	// VGPR dest type and SGPR sources, insert copies so all operands are
				4007	// VGPRs. This seems to help operand folding / the register coalescer.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4008	if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
				4009	MachineBasicBlock *MBB = MI.getParent();
				4010	const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4011	if (RI.hasVGPRs(DstRC)) {
				4012	// Update all the operands so they are VGPR register classes. These may
				4013	// not be the same register class because REG_SEQUENCE supports mixing
				4014	// subregister index types e.g. sub0_sub1 + sub2 + sub3
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4015	for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
				4016	MachineOperand &Op = MI.getOperand(I);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4017	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
				4018	continue;
				4019
				4020	const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
				4021	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
				4022	if (VRC == OpRC)
				4023	continue;
				4024
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4025	legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4026	Op.setIsKill();
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	4027	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4028	}
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	4029
				4030	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4031	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4032
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	4033	// Legalize INSERT_SUBREG
				4034	// src0 must have the same register class as dst
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4035	if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
				4036	unsigned Dst = MI.getOperand(0).getReg();
				4037	unsigned Src0 = MI.getOperand(1).getReg();
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	4038	const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
				4039	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
				4040	if (DstRC != Src0RC) {
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4041	MachineBasicBlock *MBB = MI.getParent();
				4042	MachineOperand &Op = MI.getOperand(1);
				4043	legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	4044	}
				4045	return;
				4046	}
				4047
Nicolai Haehnle	7a87977	2018-04-20 07:14:25 +0000	[diff] [blame]	4048	// Legalize SI_INIT_M0
				4049	if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
				4050	MachineOperand &Src = MI.getOperand(0);
				4051	if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
				4052	Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
				4053	return;
				4054	}
				4055
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	4056	// Legalize MIMG and MUBUF/MTBUF for shaders.
				4057	//
				4058	// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
				4059	// scratch memory access. In both cases, the legalization never involves
				4060	// conversion to the addr64 form.
				4061	if (isMIMG(MI) \|\|
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	4062	(AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	4063	(isMUBUF(MI) \|\| isMTBUF(MI)))) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4064	MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	4065	if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
				4066	unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
				4067	SRsrc->setReg(SGPR);
				4068	}
				4069
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4070	MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	4071	if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
				4072	unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
				4073	SSamp->setReg(SGPR);
				4074	}
				4075	return;
				4076	}
				4077
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4078	// Legalize MUBUF* instructions.
				4079	int RsrcIdx =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4080	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4081	if (RsrcIdx != -1) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4082	// We have an MUBUF instruction
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4083	MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
				4084	unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass;
				4085	if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
				4086	RI.getRegClass(RsrcRC))) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4087	// The operands are legal.
				4088	// FIXME: We may need to legalize operands besided srsrc.
				4089	return;
				4090	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4091
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4092	// Legalize a VGPR Rsrc.
				4093	//
				4094	// If the instruction is _ADDR64, we can avoid a waterfall by extracting
				4095	// the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
				4096	// a zero-value SRsrc.
				4097	//
				4098	// If the instruction is _OFFSET (both idxen and offen disabled), and we
				4099	// support ADDR64 instructions, we can convert to ADDR64 and do the same as
				4100	// above.
				4101	//
				4102	// Otherwise we are on non-ADDR64 hardware, and/or we have
				4103	// idxen/offen/bothen and we fall back to a waterfall loop.
				4104
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4105	MachineBasicBlock &MBB = *MI.getParent();
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4106
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4107	MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4108	if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4109	// This is already an ADDR64 instruction so we need to add the pointer
				4110	// extracted from the resource descriptor to the current value of VAddr.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4111	unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4112	unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4113	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4114
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4115	unsigned RsrcPtr, NewSRsrc;
				4116	std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(this, MI, Rsrc);
				4117
				4118	// NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4119	DebugLoc DL = MI.getDebugLoc();
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	4120	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4121	.addReg(RsrcPtr, 0, AMDGPU::sub0)
				4122	.addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4123
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4124	// NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	4125	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4126	.addReg(RsrcPtr, 0, AMDGPU::sub1)
				4127	.addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4128
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4129	// NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4130	BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				4131	.addReg(NewVAddrLo)
				4132	.addImm(AMDGPU::sub0)
				4133	.addReg(NewVAddrHi)
				4134	.addImm(AMDGPU::sub1);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4135
				4136	VAddr->setReg(NewVAddr);
				4137	Rsrc->setReg(NewSRsrc);
				4138	} else if (!VAddr && ST.hasAddr64()) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4139	// This instructions is the _OFFSET variant, so we need to convert it to
				4140	// ADDR64.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4141	assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
				4142	< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4143	"FIXME: Need to emit flat atomics here");
				4144
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4145	unsigned RsrcPtr, NewSRsrc;
				4146	std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(this, MI, Rsrc);
				4147
				4148	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4149	MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
				4150	MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				4151	MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
				4152	unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4153
				4154	// Atomics rith return have have an additional tied operand and are
				4155	// missing some of the special bits.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4156	MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4157	MachineInstr *Addr64;
				4158
				4159	if (!VDataIn) {
				4160	// Regular buffer load / store.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4161	MachineInstrBuilder MIB =
				4162	BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4163	.add(*VData)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4164	.addReg(NewVAddr)
				4165	.addReg(NewSRsrc)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4166	.add(*SOffset)
				4167	.add(*Offset);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4168
				4169	// Atomics do not have this operand.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4170	if (const MachineOperand *GLC =
				4171	getNamedOperand(MI, AMDGPU::OpName::glc)) {
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4172	MIB.addImm(GLC->getImm());
				4173	}
				4174
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4175	MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4176
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4177	if (const MachineOperand *TFE =
				4178	getNamedOperand(MI, AMDGPU::OpName::tfe)) {
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4179	MIB.addImm(TFE->getImm());
				4180	}
				4181
Chandler Carruth	c73c030	2018-08-16 21:30:05 +0000	[diff] [blame]	4182	MIB.cloneMemRefs(MI);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4183	Addr64 = MIB;
				4184	} else {
				4185	// Atomics with return.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4186	Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4187	.add(*VData)
				4188	.add(*VDataIn)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4189	.addReg(NewVAddr)
				4190	.addReg(NewSRsrc)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4191	.add(*SOffset)
				4192	.add(*Offset)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4193	.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
Chandler Carruth	c73c030	2018-08-16 21:30:05 +0000	[diff] [blame]	4194	.cloneMemRefs(MI);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4195	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4196
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4197	MI.removeFromParent();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4198
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4199	// NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4200	BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
				4201	NewVAddr)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4202	.addReg(RsrcPtr, 0, AMDGPU::sub0)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4203	.addImm(AMDGPU::sub0)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4204	.addReg(RsrcPtr, 0, AMDGPU::sub1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4205	.addImm(AMDGPU::sub1);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4206	} else {
				4207	// This is another variant; legalize Rsrc with waterfall loop from VGPRs
				4208	// to SGPRs.
				4209	loadSRsrcFromVGPR(this, MI, Rsrc, MDT);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4210	}
				4211	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4212	}
				4213
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4214	void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
				4215	MachineDominatorTree *MDT) const {
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4216	SetVectorType Worklist;
				4217	Worklist.insert(&TopInst);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4218
				4219	while (!Worklist.empty()) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4220	MachineInstr &Inst = *Worklist.pop_back_val();
				4221	MachineBasicBlock *MBB = Inst.getParent();
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4222	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				4223
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4224	unsigned Opcode = Inst.getOpcode();
				4225	unsigned NewOpcode = getVALUOp(Inst);
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4226
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4227	// Handle some special cases
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4228	switch (Opcode) {
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	4229	default:
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	4230	break;
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4231	case AMDGPU::S_ADD_U64_PSEUDO:
				4232	case AMDGPU::S_SUB_U64_PSEUDO:
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4233	splitScalar64BitAddSub(Worklist, Inst, MDT);
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4234	Inst.eraseFromParent();
				4235	continue;
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4236	case AMDGPU::S_ADD_I32:
				4237	case AMDGPU::S_SUB_I32:
				4238	// FIXME: The u32 versions currently selected use the carry.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4239	if (moveScalarAddSub(Worklist, Inst, MDT))
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4240	continue;
				4241
				4242	// Default handling
				4243	break;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4244	case AMDGPU::S_AND_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4245	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4246	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4247	continue;
				4248
				4249	case AMDGPU::S_OR_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4250	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4251	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4252	continue;
				4253
				4254	case AMDGPU::S_XOR_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4255	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
				4256	Inst.eraseFromParent();
				4257	continue;
				4258
				4259	case AMDGPU::S_NAND_B64:
				4260	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
				4261	Inst.eraseFromParent();
				4262	continue;
				4263
				4264	case AMDGPU::S_NOR_B64:
				4265	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
				4266	Inst.eraseFromParent();
				4267	continue;
				4268
				4269	case AMDGPU::S_XNOR_B64:
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4270	if (ST.hasDLInsts())
				4271	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
				4272	else
				4273	splitScalar64BitXnor(Worklist, Inst, MDT);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4274	Inst.eraseFromParent();
				4275	continue;
				4276
				4277	case AMDGPU::S_ANDN2_B64:
				4278	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
				4279	Inst.eraseFromParent();
				4280	continue;
				4281
				4282	case AMDGPU::S_ORN2_B64:
				4283	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4284	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4285	continue;
				4286
				4287	case AMDGPU::S_NOT_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4288	splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4289	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4290	continue;
				4291
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4292	case AMDGPU::S_BCNT1_I32_B64:
				4293	splitScalar64BitBCNT(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4294	Inst.eraseFromParent();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4295	continue;
				4296
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	4297	case AMDGPU::S_BFE_I64:
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4298	splitScalar64BitBFE(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4299	Inst.eraseFromParent();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4300	continue;
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4301
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4302	case AMDGPU::S_LSHL_B32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4303	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4304	NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
				4305	swapOperands(Inst);
				4306	}
				4307	break;
				4308	case AMDGPU::S_ASHR_I32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4309	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4310	NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
				4311	swapOperands(Inst);
				4312	}
				4313	break;
				4314	case AMDGPU::S_LSHR_B32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4315	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4316	NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
				4317	swapOperands(Inst);
				4318	}
				4319	break;
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4320	case AMDGPU::S_LSHL_B64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4321	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4322	NewOpcode = AMDGPU::V_LSHLREV_B64;
				4323	swapOperands(Inst);
				4324	}
				4325	break;
				4326	case AMDGPU::S_ASHR_I64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4327	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4328	NewOpcode = AMDGPU::V_ASHRREV_I64;
				4329	swapOperands(Inst);
				4330	}
				4331	break;
				4332	case AMDGPU::S_LSHR_B64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4333	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4334	NewOpcode = AMDGPU::V_LSHRREV_B64;
				4335	swapOperands(Inst);
				4336	}
				4337	break;
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4338
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4339	case AMDGPU::S_ABS_I32:
				4340	lowerScalarAbs(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4341	Inst.eraseFromParent();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4342	continue;
				4343
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4344	case AMDGPU::S_CBRANCH_SCC0:
				4345	case AMDGPU::S_CBRANCH_SCC1:
				4346	// Clear unused bits of vcc
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4347	BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
				4348	AMDGPU::VCC)
				4349	.addReg(AMDGPU::EXEC)
				4350	.addReg(AMDGPU::VCC);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4351	break;
				4352
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4353	case AMDGPU::S_BFE_U64:
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4354	case AMDGPU::S_BFM_B64:
				4355	llvm_unreachable("Moving this op to VALU not implemented");
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4356
				4357	case AMDGPU::S_PACK_LL_B32_B16:
				4358	case AMDGPU::S_PACK_LH_B32_B16:
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	4359	case AMDGPU::S_PACK_HH_B32_B16:
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4360	movePackToVALU(Worklist, MRI, Inst);
				4361	Inst.eraseFromParent();
				4362	continue;
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4363
				4364	case AMDGPU::S_XNOR_B32:
				4365	lowerScalarXnor(Worklist, Inst);
				4366	Inst.eraseFromParent();
				4367	continue;
				4368
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4369	case AMDGPU::S_NAND_B32:
				4370	splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
				4371	Inst.eraseFromParent();
				4372	continue;
				4373
				4374	case AMDGPU::S_NOR_B32:
				4375	splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
				4376	Inst.eraseFromParent();
				4377	continue;
				4378
				4379	case AMDGPU::S_ANDN2_B32:
				4380	splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
				4381	Inst.eraseFromParent();
				4382	continue;
				4383
				4384	case AMDGPU::S_ORN2_B32:
				4385	splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4386	Inst.eraseFromParent();
				4387	continue;
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4388	}
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4389
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4390	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
				4391	// We cannot move this instruction to the VALU, so we should try to
				4392	// legalize its operands instead.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4393	legalizeOperands(Inst, MDT);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4394	continue;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4395	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4396
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4397	// Use the new VALU Opcode.
				4398	const MCInstrDesc &NewDesc = get(NewOpcode);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4399	Inst.setDesc(NewDesc);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4400
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	4401	// Remove any references to SCC. Vector instructions can't read from it, and
				4402	// We're just about to add the implicit use / defs of VCC, and we don't want
				4403	// both.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4404	for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
				4405	MachineOperand &Op = Inst.getOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4406	if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Michael Liao	6883d7e	2019-03-15 12:42:21 +0000	[diff] [blame]	4407	// Only propagate through live-def of SCC.
				4408	if (Op.isDef() && !Op.isDead())
				4409	addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4410	Inst.RemoveOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4411	}
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	4412	}
				4413
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4414	if (Opcode == AMDGPU::S_SEXT_I32_I8 \|\| Opcode == AMDGPU::S_SEXT_I32_I16) {
				4415	// We are converting these to a BFE, so we need to add the missing
				4416	// operands for the size and offset.
				4417	unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4418	Inst.addOperand(MachineOperand::CreateImm(0));
				4419	Inst.addOperand(MachineOperand::CreateImm(Size));
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4420
Matt Arsenault	b5b5110	2014-06-10 19:18:21 +0000	[diff] [blame]	4421	} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
				4422	// The VALU version adds the second operand to the result, so insert an
				4423	// extra 0 operand.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4424	Inst.addOperand(MachineOperand::CreateImm(0));
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4425	}
				4426
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4427	Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4428
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4429	if (Opcode == AMDGPU::S_BFE_I32 \|\| Opcode == AMDGPU::S_BFE_U32) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4430	const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4431	// If we need to move this to VGPRs, we need to unpack the second operand
				4432	// back into the 2 separate ones for bit offset and width.
				4433	assert(OffsetWidthOp.isImm() &&
				4434	"Scalar BFE is only implemented for constant width and offset");
				4435	uint32_t Imm = OffsetWidthOp.getImm();
				4436
				4437	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				4438	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4439	Inst.RemoveOperand(2); // Remove old immediate.
				4440	Inst.addOperand(MachineOperand::CreateImm(Offset));
				4441	Inst.addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4442	}
				4443
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4444	bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4445	unsigned NewDstReg = AMDGPU::NoRegister;
				4446	if (HasDst) {
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	4447	unsigned DstReg = Inst.getOperand(0).getReg();
				4448	if (TargetRegisterInfo::isPhysicalRegister(DstReg))
				4449	continue;
				4450
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4451	// Update the destination register class.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4452	const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4453	if (!NewDstRC)
				4454	continue;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4455
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4456	if (Inst.isCopy() &&
				4457	TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
				4458	NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
				4459	// Instead of creating a copy where src and dst are the same register
				4460	// class, we just replace all uses of dst with src. These kinds of
				4461	// copies interfere with the heuristics MachineSink uses to decide
				4462	// whether or not to split a critical edge. Since the pass assumes
				4463	// that copies will end up as machine instructions and not be
				4464	// eliminated.
				4465	addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
				4466	MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
				4467	MRI.clearKillFlags(Inst.getOperand(1).getReg());
				4468	Inst.getOperand(0).setReg(DstReg);
Matt Arsenault	69932e4	2018-03-19 14:07:15 +0000	[diff] [blame]	4469
				4470	// Make sure we don't leave around a dead VGPR->SGPR copy. Normally
				4471	// these are deleted later, but at -O0 it would leave a suspicious
				4472	// looking illegal copy of an undef register.
				4473	for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
				4474	Inst.RemoveOperand(I);
				4475	Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4476	continue;
				4477	}
				4478
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4479	NewDstReg = MRI.createVirtualRegister(NewDstRC);
				4480	MRI.replaceRegWith(DstReg, NewDstReg);
				4481	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4482
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	4483	// Legalize the operands
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4484	legalizeOperands(Inst, MDT);
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	4485
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4486	if (HasDst)
				4487	addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4488	}
				4489	}
				4490
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4491	// Add/sub require special handling to deal with carry outs.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4492	bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
				4493	MachineDominatorTree *MDT) const {
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4494	if (ST.hasAddNoCarry()) {
				4495	// Assume there is no user of scc since we don't select this in that case.
				4496	// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
				4497	// is used.
				4498
				4499	MachineBasicBlock &MBB = *Inst.getParent();
				4500	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4501
				4502	unsigned OldDstReg = Inst.getOperand(0).getReg();
				4503	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4504
				4505	unsigned Opc = Inst.getOpcode();
				4506	assert(Opc == AMDGPU::S_ADD_I32 \|\| Opc == AMDGPU::S_SUB_I32);
				4507
				4508	unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
				4509	AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
				4510
				4511	assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
				4512	Inst.RemoveOperand(3);
				4513
				4514	Inst.setDesc(get(NewOpc));
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	4515	Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4516	Inst.addImplicitDefUseOperands(*MBB.getParent());
				4517	MRI.replaceRegWith(OldDstReg, ResultReg);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4518	legalizeOperands(Inst, MDT);
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4519
				4520	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				4521	return true;
				4522	}
				4523
				4524	return false;
				4525	}
				4526
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4527	void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4528	MachineInstr &Inst) const {
				4529	MachineBasicBlock &MBB = *Inst.getParent();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4530	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4531	MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4532	DebugLoc DL = Inst.getDebugLoc();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4533
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4534	MachineOperand &Dest = Inst.getOperand(0);
				4535	MachineOperand &Src = Inst.getOperand(1);
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4536	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4537	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4538
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4539	unsigned SubOp = ST.hasAddNoCarry() ?
				4540	AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
				4541
				4542	BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4543	.addImm(0)
				4544	.addReg(Src.getReg());
				4545
				4546	BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
				4547	.addReg(Src.getReg())
				4548	.addReg(TmpReg);
				4549
				4550	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				4551	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				4552	}
				4553
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4554	void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
				4555	MachineInstr &Inst) const {
				4556	MachineBasicBlock &MBB = *Inst.getParent();
				4557	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4558	MachineBasicBlock::iterator MII = Inst;
				4559	const DebugLoc &DL = Inst.getDebugLoc();
				4560
				4561	MachineOperand &Dest = Inst.getOperand(0);
				4562	MachineOperand &Src0 = Inst.getOperand(1);
				4563	MachineOperand &Src1 = Inst.getOperand(2);
				4564
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4565	if (ST.hasDLInsts()) {
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4566	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4567	legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
				4568	legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
				4569
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4570	BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
				4571	.add(Src0)
				4572	.add(Src1);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4573
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4574	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4575	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
				4576	} else {
				4577	// Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
				4578	// invert either source and then perform the XOR. If either source is a
				4579	// scalar register, then we can leave the inversion on the scalar unit to
				4580	// acheive a better distrubution of scalar and vector instructions.
				4581	bool Src0IsSGPR = Src0.isReg() &&
				4582	RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
				4583	bool Src1IsSGPR = Src1.isReg() &&
				4584	RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
				4585	MachineInstr *Not = nullptr;
				4586	MachineInstr *Xor = nullptr;
				4587	unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4588	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4589
				4590	// Build a pair of scalar instructions and add them to the work list.
				4591	// The next iteration over the work list will lower these to the vector
				4592	// unit as necessary.
				4593	if (Src0IsSGPR) {
				4594	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
				4595	.add(Src0);
				4596	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
				4597	.addReg(Temp)
				4598	.add(Src1);
				4599	} else if (Src1IsSGPR) {
				4600	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
				4601	.add(Src1);
				4602	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
				4603	.add(Src0)
				4604	.addReg(Temp);
				4605	} else {
				4606	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
				4607	.add(Src0)
				4608	.add(Src1);
				4609	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
				4610	.addReg(Temp);
				4611	Worklist.insert(Not);
				4612	}
				4613
				4614	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4615
				4616	Worklist.insert(Xor);
				4617
				4618	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4619	}
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4620	}
				4621
				4622	void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
				4623	MachineInstr &Inst,
				4624	unsigned Opcode) const {
				4625	MachineBasicBlock &MBB = *Inst.getParent();
				4626	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4627	MachineBasicBlock::iterator MII = Inst;
				4628	const DebugLoc &DL = Inst.getDebugLoc();
				4629
				4630	MachineOperand &Dest = Inst.getOperand(0);
				4631	MachineOperand &Src0 = Inst.getOperand(1);
				4632	MachineOperand &Src1 = Inst.getOperand(2);
				4633
				4634	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4635	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4636
				4637	MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
				4638	.add(Src0)
				4639	.add(Src1);
				4640
				4641	MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
				4642	.addReg(Interm);
				4643
				4644	Worklist.insert(&Op);
				4645	Worklist.insert(&Not);
				4646
				4647	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4648	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
				4649	}
				4650
				4651	void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
				4652	MachineInstr &Inst,
				4653	unsigned Opcode) const {
				4654	MachineBasicBlock &MBB = *Inst.getParent();
				4655	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4656	MachineBasicBlock::iterator MII = Inst;
				4657	const DebugLoc &DL = Inst.getDebugLoc();
				4658
				4659	MachineOperand &Dest = Inst.getOperand(0);
				4660	MachineOperand &Src0 = Inst.getOperand(1);
				4661	MachineOperand &Src1 = Inst.getOperand(2);
				4662
				4663	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4664	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4665
				4666	MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
				4667	.add(Src1);
				4668
				4669	MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
				4670	.add(Src0)
				4671	.addReg(Interm);
				4672
				4673	Worklist.insert(&Not);
				4674	Worklist.insert(&Op);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4675
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4676	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4677	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4678	}
				4679
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4680	void SIInstrInfo::splitScalar64BitUnaryOp(
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4681	SetVectorType &Worklist, MachineInstr &Inst,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4682	unsigned Opcode) const {
				4683	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4684	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4685
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4686	MachineOperand &Dest = Inst.getOperand(0);
				4687	MachineOperand &Src0 = Inst.getOperand(1);
				4688	DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4689
				4690	MachineBasicBlock::iterator MII = Inst;
				4691
				4692	const MCInstrDesc &InstDesc = get(Opcode);
				4693	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				4694	MRI.getRegClass(Src0.getReg()) :
				4695	&AMDGPU::SGPR_32RegClass;
				4696
				4697	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4698
				4699	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4700	AMDGPU::sub0, Src0SubRC);
				4701
				4702	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4703	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				4704	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4705
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4706	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4707	MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4708
				4709	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4710	AMDGPU::sub1, Src0SubRC);
				4711
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4712	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4713	MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4714
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4715	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4716	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4717	.addReg(DestSub0)
				4718	.addImm(AMDGPU::sub0)
				4719	.addReg(DestSub1)
				4720	.addImm(AMDGPU::sub1);
				4721
				4722	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4723
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4724	Worklist.insert(&LoHalf);
				4725	Worklist.insert(&HiHalf);
				4726
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4727	// We don't need to legalizeOperands here because for a single operand, src0
				4728	// will support any kind of input.
				4729
				4730	// Move all users of this moved value.
				4731	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4732	}
				4733
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4734	void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
				4735	MachineInstr &Inst,
				4736	MachineDominatorTree *MDT) const {
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4737	bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
				4738
				4739	MachineBasicBlock &MBB = *Inst.getParent();
				4740	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4741
				4742	unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				4743	unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4744	unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4745
				4746	unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				4747	unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				4748
				4749	MachineOperand &Dest = Inst.getOperand(0);
				4750	MachineOperand &Src0 = Inst.getOperand(1);
				4751	MachineOperand &Src1 = Inst.getOperand(2);
				4752	const DebugLoc &DL = Inst.getDebugLoc();
				4753	MachineBasicBlock::iterator MII = Inst;
				4754
				4755	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
				4756	const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
				4757	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4758	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				4759
				4760	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4761	AMDGPU::sub0, Src0SubRC);
				4762	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4763	AMDGPU::sub0, Src1SubRC);
				4764
				4765
				4766	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4767	AMDGPU::sub1, Src0SubRC);
				4768	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4769	AMDGPU::sub1, Src1SubRC);
				4770
				4771	unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
				4772	MachineInstr *LoHalf =
				4773	BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
				4774	.addReg(CarryReg, RegState::Define)
				4775	.add(SrcReg0Sub0)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	4776	.add(SrcReg1Sub0)
				4777	.addImm(0); // clamp bit
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4778
				4779	unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
				4780	MachineInstr *HiHalf =
				4781	BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
				4782	.addReg(DeadCarryReg, RegState::Define \| RegState::Dead)
				4783	.add(SrcReg0Sub1)
				4784	.add(SrcReg1Sub1)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	4785	.addReg(CarryReg, RegState::Kill)
				4786	.addImm(0); // clamp bit
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4787
				4788	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4789	.addReg(DestSub0)
				4790	.addImm(AMDGPU::sub0)
				4791	.addReg(DestSub1)
				4792	.addImm(AMDGPU::sub1);
				4793
				4794	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4795
				4796	// Try to legalize the operands in case we need to swap the order to keep it
				4797	// valid.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4798	legalizeOperands(*LoHalf, MDT);
				4799	legalizeOperands(*HiHalf, MDT);
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4800
				4801	// Move all users of this moved vlaue.
				4802	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
				4803	}
				4804
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4805	void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
				4806	MachineInstr &Inst, unsigned Opcode,
				4807	MachineDominatorTree *MDT) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4808	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4809	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4810
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4811	MachineOperand &Dest = Inst.getOperand(0);
				4812	MachineOperand &Src0 = Inst.getOperand(1);
				4813	MachineOperand &Src1 = Inst.getOperand(2);
				4814	DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4815
				4816	MachineBasicBlock::iterator MII = Inst;
				4817
				4818	const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4819	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				4820	MRI.getRegClass(Src0.getReg()) :
				4821	&AMDGPU::SGPR_32RegClass;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4822
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4823	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4824	const TargetRegisterClass *Src1RC = Src1.isReg() ?
				4825	MRI.getRegClass(Src1.getReg()) :
				4826	&AMDGPU::SGPR_32RegClass;
				4827
				4828	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				4829
				4830	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4831	AMDGPU::sub0, Src0SubRC);
				4832	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4833	AMDGPU::sub0, Src1SubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4834	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4835	AMDGPU::sub1, Src0SubRC);
				4836	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4837	AMDGPU::sub1, Src1SubRC);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4838
				4839	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4840	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				4841	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4842
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4843	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4844	MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4845	.add(SrcReg0Sub0)
				4846	.add(SrcReg1Sub0);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4847
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4848	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4849	MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4850	.add(SrcReg0Sub1)
				4851	.add(SrcReg1Sub1);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4852
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4853	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4854	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4855	.addReg(DestSub0)
				4856	.addImm(AMDGPU::sub0)
				4857	.addReg(DestSub1)
				4858	.addImm(AMDGPU::sub1);
				4859
				4860	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4861
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4862	Worklist.insert(&LoHalf);
				4863	Worklist.insert(&HiHalf);
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4864
				4865	// Move all users of this moved vlaue.
				4866	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4867	}
				4868
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4869	void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
				4870	MachineInstr &Inst,
				4871	MachineDominatorTree *MDT) const {
				4872	MachineBasicBlock &MBB = *Inst.getParent();
				4873	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4874
				4875	MachineOperand &Dest = Inst.getOperand(0);
				4876	MachineOperand &Src0 = Inst.getOperand(1);
				4877	MachineOperand &Src1 = Inst.getOperand(2);
				4878	const DebugLoc &DL = Inst.getDebugLoc();
				4879
				4880	MachineBasicBlock::iterator MII = Inst;
				4881
				4882	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
				4883
				4884	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				4885
				4886	MachineOperand* Op0;
				4887	MachineOperand* Op1;
				4888
				4889	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
				4890	Op0 = &Src0;
				4891	Op1 = &Src1;
				4892	} else {
				4893	Op0 = &Src1;
				4894	Op1 = &Src0;
				4895	}
				4896
				4897	BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
				4898	.add(*Op0);
				4899
				4900	unsigned NewDest = MRI.createVirtualRegister(DestRC);
				4901
				4902	MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
				4903	.addReg(Interm)
				4904	.add(*Op1);
				4905
				4906	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4907
				4908	Worklist.insert(&Xor);
				4909	}
				4910
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4911	void SIInstrInfo::splitScalar64BitBCNT(
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4912	SetVectorType &Worklist, MachineInstr &Inst) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4913	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4914	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4915
				4916	MachineBasicBlock::iterator MII = Inst;
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4917	const DebugLoc &DL = Inst.getDebugLoc();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4918
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4919	MachineOperand &Dest = Inst.getOperand(0);
				4920	MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4921
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	4922	const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4923	const TargetRegisterClass *SrcRC = Src.isReg() ?
				4924	MRI.getRegClass(Src.getReg()) :
				4925	&AMDGPU::SGPR_32RegClass;
				4926
				4927	unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4928	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4929
				4930	const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
				4931
				4932	MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				4933	AMDGPU::sub0, SrcSubRC);
				4934	MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				4935	AMDGPU::sub1, SrcSubRC);
				4936
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4937	BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4938
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4939	BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4940
				4941	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				4942
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	4943	// We don't need to legalize operands here. src0 for etiher instruction can be
				4944	// an SGPR, and the second input is unused or determined here.
				4945	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4946	}
				4947
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4948	void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4949	MachineInstr &Inst) const {
				4950	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4951	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4952	MachineBasicBlock::iterator MII = Inst;
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4953	const DebugLoc &DL = Inst.getDebugLoc();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4954
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4955	MachineOperand &Dest = Inst.getOperand(0);
				4956	uint32_t Imm = Inst.getOperand(2).getImm();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4957	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				4958	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
				4959
Matt Arsenault	6ad3426	2014-11-14 18:40:49 +0000	[diff] [blame]	4960	(void) Offset;
				4961
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4962	// Only sext_inreg cases handled.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4963	assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
				4964	Offset == 0 && "Not implemented");
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4965
				4966	if (BitWidth < 32) {
				4967	unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4968	unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4969	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				4970
				4971	BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4972	.addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
				4973	.addImm(0)
				4974	.addImm(BitWidth);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4975
				4976	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
				4977	.addImm(31)
				4978	.addReg(MidRegLo);
				4979
				4980	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				4981	.addReg(MidRegLo)
				4982	.addImm(AMDGPU::sub0)
				4983	.addReg(MidRegHi)
				4984	.addImm(AMDGPU::sub1);
				4985
				4986	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	4987	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4988	return;
				4989	}
				4990
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4991	MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4992	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4993	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				4994
				4995	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
				4996	.addImm(31)
				4997	.addReg(Src.getReg(), 0, AMDGPU::sub0);
				4998
				4999	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				5000	.addReg(Src.getReg(), 0, AMDGPU::sub0)
				5001	.addImm(AMDGPU::sub0)
				5002	.addReg(TmpReg)
				5003	.addImm(AMDGPU::sub1);
				5004
				5005	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	5006	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5007	}
				5008
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5009	void SIInstrInfo::addUsersToMoveToVALUWorklist(
				5010	unsigned DstReg,
				5011	MachineRegisterInfo &MRI,
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5012	SetVectorType &Worklist) const {
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5013	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
Matt Arsenault	4c1e9ec	2016-12-20 18:55:06 +0000	[diff] [blame]	5014	E = MRI.use_end(); I != E;) {
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5015	MachineInstr &UseMI = *I->getParent();
Neil Henning	0799352	2019-01-29 14:28:17 +0000	[diff] [blame]	5016
				5017	unsigned OpNo = 0;
				5018
				5019	switch (UseMI.getOpcode()) {
				5020	case AMDGPU::COPY:
				5021	case AMDGPU::WQM:
				5022	case AMDGPU::WWM:
				5023	case AMDGPU::REG_SEQUENCE:
				5024	case AMDGPU::PHI:
				5025	case AMDGPU::INSERT_SUBREG:
				5026	break;
				5027	default:
				5028	OpNo = I.getOperandNo();
				5029	break;
				5030	}
				5031
				5032	if (!RI.hasVGPRs(getOpRegClass(UseMI, OpNo))) {
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5033	Worklist.insert(&UseMI);
Matt Arsenault	4c1e9ec	2016-12-20 18:55:06 +0000	[diff] [blame]	5034
				5035	do {
				5036	++I;
				5037	} while (I != E && I->getParent() == &UseMI);
				5038	} else {
				5039	++I;
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5040	}
				5041	}
				5042	}
				5043
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5044	void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5045	MachineRegisterInfo &MRI,
				5046	MachineInstr &Inst) const {
				5047	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5048	MachineBasicBlock *MBB = Inst.getParent();
				5049	MachineOperand &Src0 = Inst.getOperand(1);
				5050	MachineOperand &Src1 = Inst.getOperand(2);
				5051	const DebugLoc &DL = Inst.getDebugLoc();
				5052
				5053	switch (Inst.getOpcode()) {
				5054	case AMDGPU::S_PACK_LL_B32_B16: {
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5055	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5056	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5057
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5058	// FIXME: Can do a lot better if we know the high bits of src0 or src1 are
				5059	// 0.
				5060	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
				5061	.addImm(0xffff);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5062
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5063	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
				5064	.addReg(ImmReg, RegState::Kill)
				5065	.add(Src0);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5066
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5067	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
				5068	.add(Src1)
				5069	.addImm(16)
				5070	.addReg(TmpReg, RegState::Kill);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5071	break;
				5072	}
				5073	case AMDGPU::S_PACK_LH_B32_B16: {
				5074	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5075	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
				5076	.addImm(0xffff);
				5077	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
				5078	.addReg(ImmReg, RegState::Kill)
				5079	.add(Src0)
				5080	.add(Src1);
				5081	break;
				5082	}
				5083	case AMDGPU::S_PACK_HH_B32_B16: {
				5084	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5085	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5086	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
				5087	.addImm(16)
				5088	.add(Src0);
				5089	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
Konstantin Zhuravlyov	88938d4	2017-04-21 19:35:05 +0000	[diff] [blame]	5090	.addImm(0xffff0000);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5091	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
				5092	.add(Src1)
				5093	.addReg(ImmReg, RegState::Kill)
				5094	.addReg(TmpReg, RegState::Kill);
				5095	break;
				5096	}
				5097	default:
				5098	llvm_unreachable("unhandled s_pack_* instruction");
				5099	}
				5100
				5101	MachineOperand &Dest = Inst.getOperand(0);
				5102	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				5103	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				5104	}
				5105
Michael Liao	6883d7e	2019-03-15 12:42:21 +0000	[diff] [blame]	5106	void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
				5107	MachineInstr &SCCDefInst,
				5108	SetVectorType &Worklist) const {
				5109	// Ensure that def inst defines SCC, which is still live.
				5110	assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
				5111	!Op.isDead() && Op.getParent() == &SCCDefInst);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5112	// This assumes that all the users of SCC are in the same block
				5113	// as the SCC def.
Michael Liao	6883d7e	2019-03-15 12:42:21 +0000	[diff] [blame]	5114	for (MachineInstr &MI : // Skip the def inst itself.
				5115	make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
				5116	SCCDefInst.getParent()->end())) {
				5117	// Check if SCC is used first.
				5118	if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
				5119	Worklist.insert(&MI);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5120	// Exit if we find another SCC def.
Stanislav Mekhanoshin	13d3371	2018-11-09 17:58:59 +0000	[diff] [blame]	5121	if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5122	return;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5123	}
				5124	}
				5125
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	5126	const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
				5127	const MachineInstr &Inst) const {
				5128	const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
				5129
				5130	switch (Inst.getOpcode()) {
				5131	// For target instructions, getOpRegClass just returns the virtual register
				5132	// class associated with the operand, so we need to find an equivalent VGPR
				5133	// register class in order to move the instruction to the VALU.
				5134	case AMDGPU::COPY:
				5135	case AMDGPU::PHI:
				5136	case AMDGPU::REG_SEQUENCE:
				5137	case AMDGPU::INSERT_SUBREG:
Connor Abbott	8c217d0	2017-08-04 18:36:49 +0000	[diff] [blame]	5138	case AMDGPU::WQM:
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	5139	case AMDGPU::WWM:
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	5140	if (RI.hasVGPRs(NewDstRC))
				5141	return nullptr;
				5142
				5143	NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
				5144	if (!NewDstRC)
				5145	return nullptr;
				5146	return NewDstRC;
				5147	default:
				5148	return NewDstRC;
				5149	}
				5150	}
				5151
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5152	// Find the one SGPR operand we are allowed to use.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5153	unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5154	int OpIndices[3]) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5155	const MCInstrDesc &Desc = MI.getDesc();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5156
				5157	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	5158	//
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5159	// First we need to consider the instruction's operand requirements before
				5160	// legalizing. Some operands are required to be SGPRs, such as implicit uses
				5161	// of VCC, but we are still bound by the constant bus requirement to only use
				5162	// one.
				5163	//
				5164	// If the operand's class is an SGPR, we can never move it.
				5165
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5166	unsigned SGPRReg = findImplicitSGPRRead(MI);
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	5167	if (SGPRReg != AMDGPU::NoRegister)
				5168	return SGPRReg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5169
				5170	unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5171	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5172
				5173	for (unsigned i = 0; i < 3; ++i) {
				5174	int Idx = OpIndices[i];
				5175	if (Idx == -1)
				5176	break;
				5177
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5178	const MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5179	if (!MO.isReg())
				5180	continue;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5181
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5182	// Is this operand statically required to be an SGPR based on the operand
				5183	// constraints?
				5184	const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
				5185	bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
				5186	if (IsRequiredSGPR)
				5187	return MO.getReg();
				5188
				5189	// If this could be a VGPR or an SGPR, Check the dynamic register class.
				5190	unsigned Reg = MO.getReg();
				5191	const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
				5192	if (RI.isSGPRClass(RegRC))
				5193	UsedSGPRs[i] = Reg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5194	}
				5195
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5196	// We don't have a required SGPR operand, so we have a bit more freedom in
				5197	// selecting operands to move.
				5198
				5199	// Try to select the most used SGPR. If an SGPR is equal to one of the
				5200	// others, we choose that.
				5201	//
				5202	// e.g.
				5203	// V_FMA_F32 v0, s0, s0, s0 -> No moves
				5204	// V_FMA_F32 v0, s0, s1, s0 -> Move s1
				5205
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5206	// TODO: If some of the operands are 64-bit SGPRs and some 32, we should
				5207	// prefer those.
				5208
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5209	if (UsedSGPRs[0] != AMDGPU::NoRegister) {
				5210	if (UsedSGPRs[0] == UsedSGPRs[1] \|\| UsedSGPRs[0] == UsedSGPRs[2])
				5211	SGPRReg = UsedSGPRs[0];
				5212	}
				5213
				5214	if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
				5215	if (UsedSGPRs[1] == UsedSGPRs[2])
				5216	SGPRReg = UsedSGPRs[1];
				5217	}
				5218
				5219	return SGPRReg;
				5220	}
				5221
Tom Stellard	6407e1e	2014-08-01 00:32:33 +0000	[diff] [blame]	5222	MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	5223	unsigned OperandName) const {
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	5224	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
				5225	if (Idx == -1)
				5226	return nullptr;
				5227
				5228	return &MI.getOperand(Idx);
				5229	}
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5230
				5231	uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
				5232	uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	5233	if (ST.isAmdHsaOS()) {
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5234	// Set ATC = 1. GFX9 doesn't have this bit.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5235	if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5236	RsrcDataFormat \|= (1ULL << 56);
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5237
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5238	// Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
				5239	// BTW, it disables TC L2 and therefore decreases performance.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5240	if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS)
Michel Danzer	beb79ce	2016-03-16 09:10:35 +0000	[diff] [blame]	5241	RsrcDataFormat \|= (2ULL << 59);
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	5242	}
				5243
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5244	return RsrcDataFormat;
				5245	}
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5246
				5247	uint64_t SIInstrInfo::getScratchRsrcWords23() const {
				5248	uint64_t Rsrc23 = getDefaultRsrcDataFormat() \|
				5249	AMDGPU::RSRC_TID_ENABLE \|
				5250	0xffffffff; // Size;
				5251
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5252	// GFX9 doesn't have ELEMENT_SIZE.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5253	if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5254	uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
				5255	Rsrc23 \|= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
				5256	}
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	5257
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5258	// IndexStride = 64.
				5259	Rsrc23 \|= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	5260
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5261	// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
				5262	// Clear them unless we want a huge stride.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5263	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5264	Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
				5265
				5266	return Rsrc23;
				5267	}
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5268
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5269	bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
				5270	unsigned Opc = MI.getOpcode();
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5271
				5272	return isSMRD(Opc);
				5273	}
				5274
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5275	bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
				5276	unsigned Opc = MI.getOpcode();
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5277
				5278	return isMUBUF(Opc) \|\| isMTBUF(Opc) \|\| isMIMG(Opc);
				5279	}
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	5280
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5281	unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
				5282	int &FrameIndex) const {
				5283	const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
				5284	if (!Addr \|\| !Addr->isFI())
				5285	return AMDGPU::NoRegister;
				5286
				5287	assert(!MI.memoperands_empty() &&
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	5288	(*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5289
				5290	FrameIndex = Addr->getIndex();
				5291	return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
				5292	}
				5293
				5294	unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
				5295	int &FrameIndex) const {
				5296	const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
				5297	assert(Addr && Addr->isFI());
				5298	FrameIndex = Addr->getIndex();
				5299	return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
				5300	}
				5301
				5302	unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
				5303	int &FrameIndex) const {
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5304	if (!MI.mayLoad())
				5305	return AMDGPU::NoRegister;
				5306
				5307	if (isMUBUF(MI) \|\| isVGPRSpill(MI))
				5308	return isStackAccess(MI, FrameIndex);
				5309
				5310	if (isSGPRSpill(MI))
				5311	return isSGPRStackAccess(MI, FrameIndex);
				5312
				5313	return AMDGPU::NoRegister;
				5314	}
				5315
				5316	unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
				5317	int &FrameIndex) const {
				5318	if (!MI.mayStore())
				5319	return AMDGPU::NoRegister;
				5320
				5321	if (isMUBUF(MI) \|\| isVGPRSpill(MI))
				5322	return isStackAccess(MI, FrameIndex);
				5323
				5324	if (isSGPRSpill(MI))
				5325	return isSGPRStackAccess(MI, FrameIndex);
				5326
				5327	return AMDGPU::NoRegister;
				5328	}
				5329
Matt Arsenault	9ab1fa6	2017-10-04 22:59:12 +0000	[diff] [blame]	5330	unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const {
				5331	unsigned Size = 0;
				5332	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
				5333	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
				5334	while (++I != E && I->isInsideBundle()) {
				5335	assert(!I->isBundle() && "No nested bundle!");
				5336	Size += getInstSizeInBytes(*I);
				5337	}
				5338
				5339	return Size;
				5340	}
				5341
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5342	unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
				5343	unsigned Opc = MI.getOpcode();
				5344	const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
				5345	unsigned DescSize = Desc.getSize();
				5346
				5347	// If we have a definitive size, we can use it. Otherwise we need to inspect
				5348	// the operands to know the size.
Matt Arsenault	0183c56	2018-07-27 09:15:03 +0000	[diff] [blame]	5349	if (isFixedSize(MI))
				5350	return DescSize;
				5351
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5352	// 4-byte instructions may have a 32-bit literal encoded after them. Check
				5353	// operands that coud ever be literals.
				5354	if (isVALU(MI) \|\| isSALU(MI)) {
				5355	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				5356	if (Src0Idx == -1)
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5357	return DescSize; // No operands.
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5358
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	5359	if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5360	return DescSize + 4;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5361
				5362	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				5363	if (Src1Idx == -1)
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5364	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5365
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	5366	if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5367	return DescSize + 4;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5368
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5369	int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
				5370	if (Src2Idx == -1)
				5371	return DescSize;
				5372
				5373	if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
				5374	return DescSize + 4;
				5375
				5376	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5377	}
				5378
				5379	switch (Opc) {
				5380	case TargetOpcode::IMPLICIT_DEF:
				5381	case TargetOpcode::KILL:
				5382	case TargetOpcode::DBG_VALUE:
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5383	case TargetOpcode::EH_LABEL:
				5384	return 0;
Matt Arsenault	9ab1fa6	2017-10-04 22:59:12 +0000	[diff] [blame]	5385	case TargetOpcode::BUNDLE:
				5386	return getInstBundleSize(MI);
Craig Topper	784929d	2019-02-08 20:48:56 +0000	[diff] [blame]	5387	case TargetOpcode::INLINEASM:
				5388	case TargetOpcode::INLINEASM_BR: {
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5389	const MachineFunction *MF = MI.getParent()->getParent();
				5390	const char *AsmStr = MI.getOperand(0).getSymbolName();
				5391	return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
				5392	}
				5393	default:
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5394	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5395	}
				5396	}
				5397
Tom Stellard	6695ba0	2016-10-28 23:53:48 +0000	[diff] [blame]	5398	bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
				5399	if (!isFLAT(MI))
				5400	return false;
				5401
				5402	if (MI.memoperands_empty())
				5403	return true;
				5404
				5405	for (const MachineMemOperand *MMO : MI.memoperands()) {
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	5406	if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
Tom Stellard	6695ba0	2016-10-28 23:53:48 +0000	[diff] [blame]	5407	return true;
				5408	}
				5409	return false;
				5410	}
				5411
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	5412	bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
				5413	return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
				5414	}
				5415
				5416	void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
				5417	MachineBasicBlock *IfEnd) const {
				5418	MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
				5419	assert(TI != IfEntry->end());
				5420
				5421	MachineInstr Branch = &(TI);
				5422	MachineFunction *MF = IfEntry->getParent();
				5423	MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
				5424
				5425	if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				5426	unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5427	MachineInstr *SIIF =
				5428	BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
				5429	.add(Branch->getOperand(0))
				5430	.add(Branch->getOperand(1));
				5431	MachineInstr *SIEND =
				5432	BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
				5433	.addReg(DstReg);
				5434
				5435	IfEntry->erase(TI);
				5436	IfEntry->insert(IfEntry->end(), SIIF);
				5437	IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
				5438	}
				5439	}
				5440
				5441	void SIInstrInfo::convertNonUniformLoopRegion(
				5442	MachineBasicBlock LoopEntry, MachineBasicBlock LoopEnd) const {
				5443	MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
				5444	// We expect 2 terminators, one conditional and one unconditional.
				5445	assert(TI != LoopEnd->end());
				5446
				5447	MachineInstr Branch = &(TI);
				5448	MachineFunction *MF = LoopEnd->getParent();
				5449	MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
				5450
				5451	if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				5452
				5453	unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5454	unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5455	MachineInstrBuilder HeaderPHIBuilder =
				5456	BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
				5457	for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
				5458	E = LoopEntry->pred_end();
				5459	PI != E; ++PI) {
				5460	if (*PI == LoopEnd) {
				5461	HeaderPHIBuilder.addReg(BackEdgeReg);
				5462	} else {
				5463	MachineBasicBlock PMBB = PI;
				5464	unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5465	materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
				5466	ZeroReg, 0);
				5467	HeaderPHIBuilder.addReg(ZeroReg);
				5468	}
				5469	HeaderPHIBuilder.addMBB(*PI);
				5470	}
				5471	MachineInstr *HeaderPhi = HeaderPHIBuilder;
				5472	MachineInstr SIIFBREAK = BuildMI((MF), Branch->getDebugLoc(),
				5473	get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
				5474	.addReg(DstReg)
				5475	.add(Branch->getOperand(0));
				5476	MachineInstr *SILOOP =
				5477	BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
				5478	.addReg(BackEdgeReg)
				5479	.addMBB(LoopEntry);
				5480
				5481	LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
				5482	LoopEnd->erase(TI);
				5483	LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
				5484	LoopEnd->insert(LoopEnd->end(), SILOOP);
				5485	}
				5486	}
				5487
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	5488	ArrayRef<std::pair<int, const char *>>
				5489	SIInstrInfo::getSerializableTargetIndices() const {
				5490	static const std::pair<int, const char *> TargetIndices[] = {
				5491	{AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
				5492	{AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
				5493	{AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
				5494	{AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
				5495	{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
				5496	return makeArrayRef(TargetIndices);
				5497	}
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	5498
				5499	/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
				5500	/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
				5501	ScheduleHazardRecognizer *
				5502	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
				5503	const ScheduleDAG *DAG) const {
				5504	return new GCNHazardRecognizer(DAG->MF);
				5505	}
				5506
				5507	/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
				5508	/// pass.
				5509	ScheduleHazardRecognizer *
				5510	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
				5511	return new GCNHazardRecognizer(MF);
				5512	}
Stanislav Mekhanoshin	6ec3e3a	2017-01-20 00:44:31 +0000	[diff] [blame]	5513
Matt Arsenault	3f031e7	2017-07-02 23:21:48 +0000	[diff] [blame]	5514	std::pair<unsigned, unsigned>
				5515	SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
				5516	return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
				5517	}
				5518
				5519	ArrayRef<std::pair<unsigned, const char *>>
				5520	SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
				5521	static const std::pair<unsigned, const char *> TargetFlags[] = {
				5522	{ MO_GOTPCREL, "amdgpu-gotprel" },
				5523	{ MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
				5524	{ MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
				5525	{ MO_REL32_LO, "amdgpu-rel32-lo" },
				5526	{ MO_REL32_HI, "amdgpu-rel32-hi" }
				5527	};
				5528
				5529	return makeArrayRef(TargetFlags);
				5530	}
				5531
Stanislav Mekhanoshin	6ec3e3a	2017-01-20 00:44:31 +0000	[diff] [blame]	5532	bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
				5533	return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
				5534	MI.modifiesRegister(AMDGPU::EXEC, &RI);
				5535	}
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5536
				5537	MachineInstrBuilder
				5538	SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
				5539	MachineBasicBlock::iterator I,
				5540	const DebugLoc &DL,
				5541	unsigned DestReg) const {
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5542	if (ST.hasAddNoCarry())
				5543	return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5544
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5545	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5546	unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5547	MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5548
				5549	return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
				5550	.addReg(UnusedCarry, RegState::Define \| RegState::Dead);
				5551	}
Marek Olsak	ce76ea0	2017-10-24 10:27:13 +0000	[diff] [blame]	5552
				5553	bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
				5554	switch (Opcode) {
				5555	case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
				5556	case AMDGPU::SI_KILL_I1_TERMINATOR:
				5557	return true;
				5558	default:
				5559	return false;
				5560	}
				5561	}
				5562
				5563	const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const {
				5564	switch (Opcode) {
				5565	case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
				5566	return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
				5567	case AMDGPU::SI_KILL_I1_PSEUDO:
				5568	return get(AMDGPU::SI_KILL_I1_TERMINATOR);
				5569	default:
				5570	llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
				5571	}
				5572	}
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	5573
				5574	bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
				5575	if (!isSMRD(MI))
				5576	return false;
				5577
				5578	// Check that it is using a buffer resource.
				5579	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sbase);
				5580	if (Idx == -1) // e.g. s_memtime
				5581	return false;
				5582
				5583	const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
				5584	return RCID == AMDGPU::SReg_128RegClassID;
				5585	}
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5586
				5587	// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
				5588	enum SIEncodingFamily {
				5589	SI = 0,
				5590	VI = 1,
				5591	SDWA = 2,
				5592	SDWA9 = 3,
				5593	GFX80 = 4,
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	5594	GFX9 = 5,
				5595	GFX10 = 6,
				5596	SDWA10 = 7
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5597	};
				5598
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5599	static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5600	switch (ST.getGeneration()) {
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5601	default:
				5602	break;
				5603	case AMDGPUSubtarget::SOUTHERN_ISLANDS:
				5604	case AMDGPUSubtarget::SEA_ISLANDS:
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5605	return SIEncodingFamily::SI;
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5606	case AMDGPUSubtarget::VOLCANIC_ISLANDS:
				5607	case AMDGPUSubtarget::GFX9:
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5608	return SIEncodingFamily::VI;
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame^]	5609	case AMDGPUSubtarget::GFX10:
				5610	return SIEncodingFamily::GFX10;
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5611	}
				5612	llvm_unreachable("Unknown subtarget generation!");
				5613	}
				5614
				5615	int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
				5616	SIEncodingFamily Gen = subtargetEncodingFamily(ST);
				5617
				5618	if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5619	ST.getGeneration() >= AMDGPUSubtarget::GFX9)
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5620	Gen = SIEncodingFamily::GFX9;
				5621
				5622	if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5623	Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5624	: SIEncodingFamily::SDWA;
				5625	// Adjust the encoding family to GFX80 for D16 buffer instructions when the
				5626	// subtarget has UnpackedD16VMem feature.
				5627	// TODO: remove this when we discard GFX80 encoding.
				5628	if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
				5629	Gen = SIEncodingFamily::GFX80;
				5630
				5631	int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
				5632
				5633	// -1 means that Opcode is already a native instruction.
				5634	if (MCOp == -1)
				5635	return Opcode;
				5636
				5637	// (uint16_t)-1 means that Opcode is a pseudo instruction that has
				5638	// no encoding in the given subtarget generation.
				5639	if (MCOp == (uint16_t)-1)
				5640	return -1;
				5641
				5642	return MCOp;
				5643	}
Valery Pykhtin	3d9afa2	2018-11-30 14:21:56 +0000	[diff] [blame]	5644
				5645	static
				5646	TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
				5647	assert(RegOpnd.isReg());
				5648	return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
				5649	getRegSubRegPair(RegOpnd);
				5650	}
				5651
				5652	TargetInstrInfo::RegSubRegPair
				5653	llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
				5654	assert(MI.isRegSequence());
				5655	for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
				5656	if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
				5657	auto &RegOp = MI.getOperand(1 + 2 * I);
				5658	return getRegOrUndef(RegOp);
				5659	}
				5660	return TargetInstrInfo::RegSubRegPair();
				5661	}
				5662
				5663	// Try to find the definition of reg:subreg in subreg-manipulation pseudos
				5664	// Following a subreg of reg:subreg isn't supported
				5665	static bool followSubRegDef(MachineInstr &MI,
				5666	TargetInstrInfo::RegSubRegPair &RSR) {
				5667	if (!RSR.SubReg)
				5668	return false;
				5669	switch (MI.getOpcode()) {
				5670	default: break;
				5671	case AMDGPU::REG_SEQUENCE:
				5672	RSR = getRegSequenceSubReg(MI, RSR.SubReg);
				5673	return true;
				5674	// EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
				5675	case AMDGPU::INSERT_SUBREG:
				5676	if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
				5677	// inserted the subreg we're looking for
				5678	RSR = getRegOrUndef(MI.getOperand(2));
				5679	else { // the subreg in the rest of the reg
				5680	auto R1 = getRegOrUndef(MI.getOperand(1));
				5681	if (R1.SubReg) // subreg of subreg isn't supported
				5682	return false;
				5683	RSR.Reg = R1.Reg;
				5684	}
				5685	return true;
				5686	}
				5687	return false;
				5688	}
				5689
				5690	MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
				5691	MachineRegisterInfo &MRI) {
				5692	assert(MRI.isSSA());
				5693	if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
				5694	return nullptr;
				5695
				5696	auto RSR = P;
				5697	auto *DefInst = MRI.getVRegDef(RSR.Reg);
				5698	while (auto *MI = DefInst) {
				5699	DefInst = nullptr;
				5700	switch (MI->getOpcode()) {
				5701	case AMDGPU::COPY:
				5702	case AMDGPU::V_MOV_B32_e32: {
				5703	auto &Op1 = MI->getOperand(1);
				5704	if (Op1.isReg() &&
				5705	TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
				5706	if (Op1.isUndef())
				5707	return nullptr;
				5708	RSR = getRegSubRegPair(Op1);
				5709	DefInst = MRI.getVRegDef(RSR.Reg);
				5710	}
				5711	break;
				5712	}
				5713	default:
				5714	if (followSubRegDef(*MI, RSR)) {
				5715	if (!RSR.Reg)
				5716	return nullptr;
				5717	DefInst = MRI.getVRegDef(RSR.Reg);
				5718	}
				5719	}
				5720	if (!DefInst)
				5721	return MI;
				5722	}
				5723	return nullptr;
				5724	}
Valery Pykhtin	7fe97f8	2019-02-08 11:59:48 +0000	[diff] [blame]	5725
				5726	bool llvm::isEXECMaskConstantBetweenDefAndUses(unsigned VReg,
				5727	MachineRegisterInfo &MRI) {
				5728	assert(MRI.isSSA() && "Must be run on SSA");
				5729	auto *TRI = MRI.getTargetRegisterInfo();
				5730
				5731	auto *DefI = MRI.getVRegDef(VReg);
				5732	auto *BB = DefI->getParent();
				5733
				5734	DenseSet<MachineInstr*> Uses;
				5735	for (auto &Use : MRI.use_nodbg_operands(VReg)) {
				5736	auto *I = Use.getParent();
				5737	if (I->getParent() != BB)
				5738	return false;
				5739	Uses.insert(I);
				5740	}
				5741
				5742	auto E = BB->end();
				5743	for (auto I = std::next(DefI->getIterator()); I != E; ++I) {
				5744	Uses.erase(&*I);
				5745	// don't check the last use
				5746	if (Uses.empty() \|\| I->modifiesRegister(AMDGPU::EXEC, TRI))
				5747	break;
				5748	}
				5749	return Uses.empty();
				5750	}