Blame - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp - toolchain/llvm-project

blob: 48793fcda94d8fd5f5c6b0d45c5bf220051b15b0 [file] [log] [blame]

Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1	//===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	10	/// SI Implementation of TargetInstrInfo.
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	11	//
				12	//===----------------------------------------------------------------------===//
				13
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	14	#include "SIInstrInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	15	#include "AMDGPU.h"
				16	#include "AMDGPUSubtarget.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	17	#include "GCNHazardRecognizer.h"
Tom Stellard	16a9a20	2013-08-14 23:24:17 +0000	[diff] [blame]	18	#include "SIDefines.h"
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	19	#include "SIMachineFunctionInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	20	#include "SIRegisterInfo.h"
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	21	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	22	#include "Utils/AMDGPUBaseInfo.h"
				23	#include "llvm/ADT/APInt.h"
				24	#include "llvm/ADT/ArrayRef.h"
				25	#include "llvm/ADT/SmallVector.h"
				26	#include "llvm/ADT/StringRef.h"
				27	#include "llvm/ADT/iterator_range.h"
				28	#include "llvm/Analysis/AliasAnalysis.h"
				29	#include "llvm/Analysis/MemoryLocation.h"
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	30	#include "llvm/Analysis/ValueTracking.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	31	#include "llvm/CodeGen/MachineBasicBlock.h"
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	32	#include "llvm/CodeGen/MachineDominators.h"
Tom Stellard	c5cf2f0	2014-08-21 20:40:54 +0000	[diff] [blame]	33	#include "llvm/CodeGen/MachineFrameInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	34	#include "llvm/CodeGen/MachineFunction.h"
				35	#include "llvm/CodeGen/MachineInstr.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	36	#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	37	#include "llvm/CodeGen/MachineInstrBundle.h"
				38	#include "llvm/CodeGen/MachineMemOperand.h"
				39	#include "llvm/CodeGen/MachineOperand.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	40	#include "llvm/CodeGen/MachineRegisterInfo.h"
Chandler Carruth	6bda14b	2017-06-06 11:49:48 +0000	[diff] [blame]	41	#include "llvm/CodeGen/RegisterScavenging.h"
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	42	#include "llvm/CodeGen/ScheduleDAG.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	43	#include "llvm/CodeGen/SelectionDAGNodes.h"
David Blaikie	b3bde2e	2017-11-17 01:07:10 +0000	[diff] [blame]	44	#include "llvm/CodeGen/TargetOpcodes.h"
				45	#include "llvm/CodeGen/TargetRegisterInfo.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	46	#include "llvm/IR/DebugLoc.h"
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	47	#include "llvm/IR/DiagnosticInfo.h"
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	48	#include "llvm/IR/Function.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	49	#include "llvm/IR/InlineAsm.h"
				50	#include "llvm/IR/LLVMContext.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	51	#include "llvm/MC/MCInstrDesc.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	52	#include "llvm/Support/Casting.h"
				53	#include "llvm/Support/CommandLine.h"
				54	#include "llvm/Support/Compiler.h"
				55	#include "llvm/Support/ErrorHandling.h"
David Blaikie	13e77db	2018-03-23 23:58:25 +0000	[diff] [blame]	56	#include "llvm/Support/MachineValueType.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	57	#include "llvm/Support/MathExtras.h"
				58	#include "llvm/Target/TargetMachine.h"
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	59	#include <cassert>
				60	#include <cstdint>
				61	#include <iterator>
				62	#include <utility>
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	63
				64	using namespace llvm;
				65
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	66	#define GET_INSTRINFO_CTOR_DTOR
				67	#include "AMDGPUGenInstrInfo.inc"
				68
				69	namespace llvm {
				70	namespace AMDGPU {
				71	#define GET_D16ImageDimIntrinsics_IMPL
				72	#define GET_ImageDimIntrinsicTable_IMPL
				73	#define GET_RsrcIntrinsics_IMPL
				74	#include "AMDGPUGenSearchableTables.inc"
				75	}
				76	}
				77
				78
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	79	// Must be at least 4 to be able to branch over minimum unconditional branch
				80	// code. This is only for making it possible to write reasonably small tests for
				81	// long branches.
				82	static cl::opt<unsigned>
				83	BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
				84	cl::desc("Restrict range of branch instructions (DEBUG)"));
				85
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	86	SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	87	: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
				88	RI(ST), ST(ST) {}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	89
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	90	//===----------------------------------------------------------------------===//
				91	// TargetInstrInfo callbacks
				92	//===----------------------------------------------------------------------===//
				93
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	94	static unsigned getNumOperandsNoGlue(SDNode *Node) {
				95	unsigned N = Node->getNumOperands();
				96	while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
				97	--N;
				98	return N;
				99	}
				100
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	101	/// Returns true if both nodes have the same value for the given
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	102	/// operand \p Op, or if both nodes do not have this operand.
				103	static bool nodesHaveSameOperandValue(SDNode N0, SDNode N1, unsigned OpName) {
				104	unsigned Opc0 = N0->getMachineOpcode();
				105	unsigned Opc1 = N1->getMachineOpcode();
				106
				107	int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
				108	int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
				109
				110	if (Op0Idx == -1 && Op1Idx == -1)
				111	return true;
				112
				113
				114	if ((Op0Idx == -1 && Op1Idx != -1) \|\|
				115	(Op1Idx == -1 && Op0Idx != -1))
				116	return false;
				117
				118	// getNamedOperandIdx returns the index for the MachineInstr's operands,
				119	// which includes the result as the first operand. We are indexing into the
				120	// MachineSDNode's operands, so we need to skip the result operand to get
				121	// the real index.
				122	--Op0Idx;
				123	--Op1Idx;
				124
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	125	return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	126	}
				127
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	128	bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	129	AliasAnalysis *AA) const {
				130	// TODO: The generic check fails for VALU instructions that should be
				131	// rematerializable due to implicit reads of exec. We really want all of the
				132	// generic logic for this except for this.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	133	switch (MI.getOpcode()) {
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	134	case AMDGPU::V_MOV_B32_e32:
				135	case AMDGPU::V_MOV_B32_e64:
Matt Arsenault	80f766a	2015-09-10 01:23:28 +0000	[diff] [blame]	136	case AMDGPU::V_MOV_B64_PSEUDO:
Matt Arsenault	cba0c6d	2019-02-04 22:26:21 +0000	[diff] [blame]	137	// No implicit operands.
				138	return MI.getNumOperands() == MI.getDesc().getNumOperands();
Matt Arsenault	a48b866	2015-04-23 23:34:48 +0000	[diff] [blame]	139	default:
				140	return false;
				141	}
				142	}
				143
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	144	bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1,
				145	int64_t &Offset0,
				146	int64_t &Offset1) const {
				147	if (!Load0->isMachineOpcode() \|\| !Load1->isMachineOpcode())
				148	return false;
				149
				150	unsigned Opc0 = Load0->getMachineOpcode();
				151	unsigned Opc1 = Load1->getMachineOpcode();
				152
				153	// Make sure both are actually loads.
				154	if (!get(Opc0).mayLoad() \|\| !get(Opc1).mayLoad())
				155	return false;
				156
				157	if (isDS(Opc0) && isDS(Opc1)) {
Tom Stellard	20fa0be	2014-10-07 21:09:20 +0000	[diff] [blame]	158
				159	// FIXME: Handle this case:
				160	if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
				161	return false;
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	162
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	163	// Check base reg.
Matt Arsenault	07f904b	2019-03-08 20:30:50 +0000	[diff] [blame]	164	if (Load0->getOperand(0) != Load1->getOperand(0))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	165	return false;
				166
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	167	// Skip read2 / write2 variants for simplicity.
				168	// TODO: We should report true if the used offsets are adjacent (excluded
				169	// st64 versions).
Matt Arsenault	bbc59d8	2019-03-27 15:41:00 +0000	[diff] [blame]	170	int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				171	int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				172	if (Offset0Idx == -1 \|\| Offset1Idx == -1)
Matt Arsenault	972c12a	2014-09-17 17:48:32 +0000	[diff] [blame]	173	return false;
				174
Matt Arsenault	bbc59d8	2019-03-27 15:41:00 +0000	[diff] [blame]	175	// XXX - be careful of datalesss loads
				176	// getNamedOperandIdx returns the index for MachineInstrs. Since they
				177	// include the output in the operand list, but SDNodes don't, we need to
				178	// subtract the index by one.
				179	Offset0Idx -= get(Opc0).NumDefs;
				180	Offset1Idx -= get(Opc1).NumDefs;
				181	Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue();
				182	Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	183	return true;
				184	}
				185
				186	if (isSMRD(Opc0) && isSMRD(Opc1)) {
Nicolai Haehnle	ef44978	2017-04-24 16:53:52 +0000	[diff] [blame]	187	// Skip time and cache invalidation instructions.
				188	if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 \|\|
				189	AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
				190	return false;
				191
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	192	assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
				193
				194	// Check base reg.
				195	if (Load0->getOperand(0) != Load1->getOperand(0))
				196	return false;
				197
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	198	const ConstantSDNode *Load0Offset =
				199	dyn_cast<ConstantSDNode>(Load0->getOperand(1));
				200	const ConstantSDNode *Load1Offset =
				201	dyn_cast<ConstantSDNode>(Load1->getOperand(1));
				202
				203	if (!Load0Offset \|\| !Load1Offset)
				204	return false;
				205
Tom Stellard	f0a575f	2015-03-23 16:06:01 +0000	[diff] [blame]	206	Offset0 = Load0Offset->getZExtValue();
				207	Offset1 = Load1Offset->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	208	return true;
				209	}
				210
				211	// MUBUF and MTBUF can access the same addresses.
				212	if ((isMUBUF(Opc0) \|\| isMTBUF(Opc0)) && (isMUBUF(Opc1) \|\| isMTBUF(Opc1))) {
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	213
				214	// MUBUF and MTBUF have vaddr at different indices.
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	215	if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) \|\|
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	216	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) \|\|
Tom Stellard	b8b8413	2014-09-03 15:22:39 +0000	[diff] [blame]	217	!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	218	return false;
				219
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	220	int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
				221	int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
				222
				223	if (OffIdx0 == -1 \|\| OffIdx1 == -1)
				224	return false;
				225
				226	// getNamedOperandIdx returns the index for MachineInstrs. Since they
Matt Arsenault	07f904b	2019-03-08 20:30:50 +0000	[diff] [blame]	227	// include the output in the operand list, but SDNodes don't, we need to
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	228	// subtract the index by one.
Matt Arsenault	28f97f1	2019-03-27 16:12:29 +0000	[diff] [blame]	229	OffIdx0 -= get(Opc0).NumDefs;
				230	OffIdx1 -= get(Opc1).NumDefs;
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	231
				232	SDValue Off0 = Load0->getOperand(OffIdx0);
				233	SDValue Off1 = Load1->getOperand(OffIdx1);
				234
				235	// The offset might be a FrameIndexSDNode.
				236	if (!isa<ConstantSDNode>(Off0) \|\| !isa<ConstantSDNode>(Off1))
				237	return false;
				238
				239	Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
				240	Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
Matt Arsenault	c10853f	2014-08-06 00:29:43 +0000	[diff] [blame]	241	return true;
				242	}
				243
				244	return false;
				245	}
				246
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	247	static bool isStride64(unsigned Opc) {
				248	switch (Opc) {
				249	case AMDGPU::DS_READ2ST64_B32:
				250	case AMDGPU::DS_READ2ST64_B64:
				251	case AMDGPU::DS_WRITE2ST64_B32:
				252	case AMDGPU::DS_WRITE2ST64_B64:
				253	return true;
				254	default:
				255	return false;
				256	}
				257	}
				258
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	259	bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
				260	const MachineOperand *&BaseOp,
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	261	int64_t &Offset,
				262	const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	263	unsigned Opc = LdSt.getOpcode();
Matt Arsenault	3add643	2015-10-20 04:35:43 +0000	[diff] [blame]	264
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	265	if (isDS(LdSt)) {
				266	const MachineOperand *OffsetImm =
				267	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	268	if (OffsetImm) {
				269	// Normal, single offset LDS instruction.
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	270	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault	cdd191d	2019-01-28 20:14:49 +0000	[diff] [blame]	271	// TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
				272	// report that here?
				273	if (!BaseOp)
				274	return false;
				275
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	276	Offset = OffsetImm->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	277	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				278	"operands of type register.");
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	279	return true;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	280	}
				281
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	282	// The 2 offset instructions use offset0 and offset1 instead. We can treat
				283	// these as a load with a single offset if the 2 offsets are consecutive. We
				284	// will use this for some partially aligned loads.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	285	const MachineOperand *Offset0Imm =
				286	getNamedOperand(LdSt, AMDGPU::OpName::offset0);
				287	const MachineOperand *Offset1Imm =
				288	getNamedOperand(LdSt, AMDGPU::OpName::offset1);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	289
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	290	uint8_t Offset0 = Offset0Imm->getImm();
				291	uint8_t Offset1 = Offset1Imm->getImm();
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	292
Matt Arsenault	84db5d9	2015-07-14 17:57:36 +0000	[diff] [blame]	293	if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	294	// Each of these offsets is in element sized units, so we need to convert
				295	// to bytes of the individual reads.
				296
				297	unsigned EltSize;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	298	if (LdSt.mayLoad())
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	299	EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	300	else {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	301	assert(LdSt.mayStore());
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	302	int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	303	EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	304	}
				305
Matt Arsenault	2e99112	2014-09-10 23:26:16 +0000	[diff] [blame]	306	if (isStride64(Opc))
				307	EltSize *= 64;
				308
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	309	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	310	Offset = EltSize * Offset0;
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	311	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				312	"operands of type register.");
Matt Arsenault	7eb0a10	2014-07-30 01:01:10 +0000	[diff] [blame]	313	return true;
				314	}
				315
				316	return false;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	317	}
				318
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	319	if (isMUBUF(LdSt) \|\| isMTBUF(LdSt)) {
Matt Arsenault	3666629	2016-11-15 20:14:27 +0000	[diff] [blame]	320	const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
				321	if (SOffset && SOffset->isReg())
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	322	return false;
				323
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	324	const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	325	if (!AddrReg)
				326	return false;
				327
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	328	const MachineOperand *OffsetImm =
				329	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	330	BaseOp = AddrReg;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	331	Offset = OffsetImm->getImm();
Matt Arsenault	3666629	2016-11-15 20:14:27 +0000	[diff] [blame]	332
				333	if (SOffset) // soffset can be an inline immediate.
				334	Offset += SOffset->getImm();
				335
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	336	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				337	"operands of type register.");
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	338	return true;
				339	}
				340
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	341	if (isSMRD(LdSt)) {
				342	const MachineOperand *OffsetImm =
				343	getNamedOperand(LdSt, AMDGPU::OpName::offset);
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	344	if (!OffsetImm)
				345	return false;
				346
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	347	const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	348	BaseOp = SBaseReg;
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	349	Offset = OffsetImm->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	350	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				351	"operands of type register.");
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	352	return true;
				353	}
				354
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	355	if (isFLAT(LdSt)) {
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	356	const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	357	if (VAddr) {
				358	// Can't analyze 2 offsets.
				359	if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
				360	return false;
				361
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	362	BaseOp = VAddr;
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	363	} else {
				364	// scratch instructions have either vaddr or saddr.
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	365	BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
Matt Arsenault	37a58e0	2017-07-21 18:06:36 +0000	[diff] [blame]	366	}
				367
				368	Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	369	assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
				370	"operands of type register.");
Matt Arsenault	43578ec	2016-06-02 20:05:20 +0000	[diff] [blame]	371	return true;
				372	}
				373
Matt Arsenault	1acc72f	2014-07-29 21:34:55 +0000	[diff] [blame]	374	return false;
				375	}
				376
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	377	static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
				378	const MachineOperand &BaseOp1,
				379	const MachineInstr &MI2,
				380	const MachineOperand &BaseOp2) {
				381	// Support only base operands with base registers.
				382	// Note: this could be extended to support FI operands.
				383	if (!BaseOp1.isReg() \|\| !BaseOp2.isReg())
				384	return false;
				385
				386	if (BaseOp1.isIdenticalTo(BaseOp2))
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	387	return true;
				388
				389	if (!MI1.hasOneMemOperand() \|\| !MI2.hasOneMemOperand())
				390	return false;
				391
				392	auto MO1 = *MI1.memoperands_begin();
				393	auto MO2 = *MI2.memoperands_begin();
				394	if (MO1->getAddrSpace() != MO2->getAddrSpace())
				395	return false;
				396
				397	auto Base1 = MO1->getValue();
				398	auto Base2 = MO2->getValue();
				399	if (!Base1 \|\| !Base2)
				400	return false;
				401	const MachineFunction &MF = *MI1.getParent()->getParent();
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	402	const DataLayout &DL = MF.getFunction().getParent()->getDataLayout();
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	403	Base1 = GetUnderlyingObject(Base1, DL);
				404	Base2 = GetUnderlyingObject(Base1, DL);
				405
				406	if (isa<UndefValue>(Base1) \|\| isa<UndefValue>(Base2))
				407	return false;
				408
				409	return Base1 == Base2;
				410	}
				411
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	412	bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
				413	const MachineOperand &BaseOp2,
Jun Bum Lim	4c5bd58	2016-04-15 14:58:38 +0000	[diff] [blame]	414	unsigned NumLoads) const {
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	415	const MachineInstr &FirstLdSt = *BaseOp1.getParent();
				416	const MachineInstr &SecondLdSt = *BaseOp2.getParent();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	417
				418	if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	419	return false;
				420
NAKAMURA Takumi	fe1202c	2016-06-20 00:37:41 +0000	[diff] [blame]	421	const MachineOperand *FirstDst = nullptr;
				422	const MachineOperand *SecondDst = nullptr;
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	423
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	424	if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) \|\|
Matt Arsenault	74f6483	2017-02-01 20:22:51 +0000	[diff] [blame]	425	(isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) \|\|
				426	(isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
Stanislav Mekhanoshin	7fe9a5d	2017-09-13 22:20:47 +0000	[diff] [blame]	427	const unsigned MaxGlobalLoadCluster = 6;
				428	if (NumLoads > MaxGlobalLoadCluster)
				429	return false;
				430
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	431	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin	949fac9	2017-09-06 15:31:30 +0000	[diff] [blame]	432	if (!FirstDst)
				433	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	434	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
Stanislav Mekhanoshin	949fac9	2017-09-06 15:31:30 +0000	[diff] [blame]	435	if (!SecondDst)
				436	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Matt Arsenault	437fd71	2016-11-29 19:30:41 +0000	[diff] [blame]	437	} else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
				438	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
				439	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
				440	} else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
				441	FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
				442	SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	443	}
				444
				445	if (!FirstDst \|\| !SecondDst)
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	446	return false;
				447
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	448	// Try to limit clustering based on the total number of bytes loaded
				449	// rather than the number of instructions. This is done to help reduce
				450	// register pressure. The method used is somewhat inexact, though,
				451	// because it assumes that all loads in the cluster will load the
				452	// same number of bytes as FirstLdSt.
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	453
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	454	// The unit of this value is bytes.
				455	// FIXME: This needs finer tuning.
				456	unsigned LoadClusterThreshold = 16;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	457
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	458	const MachineRegisterInfo &MRI =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	459	FirstLdSt.getParent()->getParent()->getRegInfo();
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	460
				461	const unsigned Reg = FirstDst->getReg();
				462
				463	const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg)
				464	? MRI.getRegClass(Reg)
				465	: RI.getPhysRegClass(Reg);
Tom Stellard	a76bcc2	2016-03-28 16:10:13 +0000	[diff] [blame]	466
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	467	return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
Matt Arsenault	0e75a06	2014-09-17 17:48:30 +0000	[diff] [blame]	468	}
				469
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	470	// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
				471	// the first 16 loads will be interleaved with the stores, and the next 16 will
				472	// be clustered as expected. It should really split into 2 16 store batches.
				473	//
				474	// Loads are clustered until this returns false, rather than trying to schedule
				475	// groups of stores. This also means we have to deal with saying different
				476	// address space loads should be clustered, and ones which might cause bank
				477	// conflicts.
				478	//
				479	// This might be deprecated so it might not be worth that much effort to fix.
				480	bool SIInstrInfo::shouldScheduleLoadsNear(SDNode Load0, SDNode Load1,
				481	int64_t Offset0, int64_t Offset1,
				482	unsigned NumLoads) const {
				483	assert(Offset1 > Offset0 &&
				484	"Second offset should be larger than first offset!");
				485	// If we have less than 16 loads in a row, and the offsets are within 64
				486	// bytes, then schedule together.
				487
				488	// A cacheline is 64 bytes (for global memory).
				489	return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
				490	}
				491
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	492	static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
				493	MachineBasicBlock::iterator MI,
				494	const DebugLoc &DL, unsigned DestReg,
				495	unsigned SrcReg, bool KillSrc) {
				496	MachineFunction *MF = MBB.getParent();
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	497	DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	498	"illegal SGPR to VGPR copy",
				499	DL, DS_Error);
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	500	LLVMContext &C = MF->getFunction().getContext();
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	501	C.diagnose(IllegalCopy);
				502
				503	BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
				504	.addReg(SrcReg, getKillRegState(KillSrc));
				505	}
				506
Benjamin Kramer	bdc4956	2016-06-12 15:39:02 +0000	[diff] [blame]	507	void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
				508	MachineBasicBlock::iterator MI,
				509	const DebugLoc &DL, unsigned DestReg,
				510	unsigned SrcReg, bool KillSrc) const {
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	511	const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	512
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	513	if (RC == &AMDGPU::VGPR_32RegClass) {
				514	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) \|\|
				515	AMDGPU::SReg_32RegClass.contains(SrcReg));
				516	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				517	.addReg(SrcReg, getKillRegState(KillSrc));
				518	return;
				519	}
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	520
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	521	if (RC == &AMDGPU::SReg_32_XM0RegClass \|\|
				522	RC == &AMDGPU::SReg_32RegClass) {
Nicolai Haehnle	e58e0e3	2016-09-12 16:25:20 +0000	[diff] [blame]	523	if (SrcReg == AMDGPU::SCC) {
				524	BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
				525	.addImm(-1)
				526	.addImm(0);
				527	return;
				528	}
				529
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	530	if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
				531	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				532	return;
				533	}
				534
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	535	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				536	.addReg(SrcReg, getKillRegState(KillSrc));
				537	return;
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	538	}
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	539
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	540	if (RC == &AMDGPU::SReg_64RegClass) {
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	541	if (DestReg == AMDGPU::VCC) {
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	542	if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				543	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
				544	.addReg(SrcReg, getKillRegState(KillSrc));
				545	} else {
				546	// FIXME: Hack until VReg_1 removed.
				547	assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
Matt Arsenault	5d8eb25	2016-09-30 01:50:20 +0000	[diff] [blame]	548	BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
Matt Arsenault	9998168	2015-02-14 02:55:56 +0000	[diff] [blame]	549	.addImm(0)
				550	.addReg(SrcReg, getKillRegState(KillSrc));
				551	}
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	552
Matt Arsenault	834b1aa	2015-02-14 02:55:54 +0000	[diff] [blame]	553	return;
				554	}
				555
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	556	if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
				557	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				558	return;
				559	}
				560
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	561	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				562	.addReg(SrcReg, getKillRegState(KillSrc));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	563	return;
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	564	}
				565
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	566	if (DestReg == AMDGPU::SCC) {
				567	assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
				568	BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
				569	.addReg(SrcReg, getKillRegState(KillSrc))
				570	.addImm(0);
				571	return;
				572	}
				573
				574	unsigned EltSize = 4;
				575	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
				576	if (RI.isSGPRClass(RC)) {
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	577	// TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
				578	if (!(RI.getRegSizeInBits(*RC) % 64)) {
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	579	Opcode = AMDGPU::S_MOV_B64;
				580	EltSize = 8;
				581	} else {
				582	Opcode = AMDGPU::S_MOV_B32;
				583	EltSize = 4;
				584	}
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	585
				586	if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
				587	reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
				588	return;
				589	}
Matt Arsenault	314cbf7	2016-11-07 16:39:22 +0000	[diff] [blame]	590	}
				591
				592	ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
Matt Arsenault	73d2f89	2016-07-15 22:32:02 +0000	[diff] [blame]	593	bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	594
				595	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				596	unsigned SubIdx;
				597	if (Forward)
				598	SubIdx = SubIndices[Idx];
				599	else
				600	SubIdx = SubIndices[SubIndices.size() - Idx - 1];
				601
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	602	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				603	get(Opcode), RI.getSubReg(DestReg, SubIdx));
				604
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	605	Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	606
Nicolai Haehnle	dd58705	2015-12-19 01:16:06 +0000	[diff] [blame]	607	if (Idx == 0)
Christian Konig	d0e3da1	2013-03-01 09:46:27 +0000	[diff] [blame]	608	Builder.addReg(DestReg, RegState::Define \| RegState::Implicit);
Matt Arsenault	73d2f89	2016-07-15 22:32:02 +0000	[diff] [blame]	609
Matt Arsenault	05c2647	2017-06-12 17:19:20 +0000	[diff] [blame]	610	bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
				611	Builder.addReg(SrcReg, getKillRegState(UseKill) \| RegState::Implicit);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	612	}
				613	}
				614
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	615	int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	616	int NewOpc;
				617
				618	// Try to map original to commuted opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	619	NewOpc = AMDGPU::getCommuteRev(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	620	if (NewOpc != -1)
				621	// Check if the commuted (REV) opcode exists on the target.
				622	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	623
				624	// Try to map commuted to original opcode
Marek Olsak	191507e	2015-02-03 17:38:12 +0000	[diff] [blame]	625	NewOpc = AMDGPU::getCommuteOrig(Opcode);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	626	if (NewOpc != -1)
				627	// Check if the original (non-REV) opcode exists on the target.
				628	return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	629
				630	return Opcode;
				631	}
				632
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	633	void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
				634	MachineBasicBlock::iterator MI,
				635	const DebugLoc &DL, unsigned DestReg,
				636	int64_t Value) const {
				637	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				638	const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
				639	if (RegClass == &AMDGPU::SReg_32RegClass \|\|
				640	RegClass == &AMDGPU::SGPR_32RegClass \|\|
				641	RegClass == &AMDGPU::SReg_32_XM0RegClass \|\|
				642	RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
				643	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
				644	.addImm(Value);
				645	return;
				646	}
				647
				648	if (RegClass == &AMDGPU::SReg_64RegClass \|\|
				649	RegClass == &AMDGPU::SGPR_64RegClass \|\|
				650	RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
				651	BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
				652	.addImm(Value);
				653	return;
				654	}
				655
				656	if (RegClass == &AMDGPU::VGPR_32RegClass) {
				657	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
				658	.addImm(Value);
				659	return;
				660	}
				661	if (RegClass == &AMDGPU::VReg_64RegClass) {
				662	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
				663	.addImm(Value);
				664	return;
				665	}
				666
				667	unsigned EltSize = 4;
				668	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
				669	if (RI.isSGPRClass(RegClass)) {
				670	if (RI.getRegSizeInBits(*RegClass) > 32) {
				671	Opcode = AMDGPU::S_MOV_B64;
				672	EltSize = 8;
				673	} else {
				674	Opcode = AMDGPU::S_MOV_B32;
				675	EltSize = 4;
				676	}
				677	}
				678
				679	ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
				680	for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
				681	int64_t IdxValue = Idx == 0 ? Value : 0;
				682
				683	MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
				684	get(Opcode), RI.getSubReg(DestReg, Idx));
				685	Builder.addImm(IdxValue);
				686	}
				687	}
				688
				689	const TargetRegisterClass *
				690	SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
				691	return &AMDGPU::VGPR_32RegClass;
				692	}
				693
				694	void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
				695	MachineBasicBlock::iterator I,
				696	const DebugLoc &DL, unsigned DstReg,
				697	ArrayRef<MachineOperand> Cond,
				698	unsigned TrueReg,
				699	unsigned FalseReg) const {
				700	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
NAKAMURA Takumi	994a43d	2017-05-16 04:01:23 +0000	[diff] [blame]	701	assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
				702	"Not a VGPR32 reg");
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	703
				704	if (Cond.size() == 1) {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	705	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				706	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				707	.add(Cond[0]);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	708	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	709	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	710	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	711	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	712	.addReg(TrueReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	713	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	714	} else if (Cond.size() == 2) {
				715	assert(Cond[0].isImm() && "Cond[0] is not an immediate");
				716	switch (Cond[0].getImm()) {
				717	case SIInstrInfo::SCC_TRUE: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	718	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	719	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				720	.addImm(-1)
				721	.addImm(0);
				722	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	723	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	724	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	725	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	726	.addReg(TrueReg)
				727	.addReg(SReg);
				728	break;
				729	}
				730	case SIInstrInfo::SCC_FALSE: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	731	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	732	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				733	.addImm(0)
				734	.addImm(-1);
				735	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	736	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	737	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	738	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	739	.addReg(TrueReg)
				740	.addReg(SReg);
				741	break;
				742	}
				743	case SIInstrInfo::VCCNZ: {
				744	MachineOperand RegOp = Cond[1];
				745	RegOp.setImplicit(false);
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	746	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				747	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				748	.add(RegOp);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	749	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	750	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	751	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	752	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	753	.addReg(TrueReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	754	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	755	break;
				756	}
				757	case SIInstrInfo::VCCZ: {
				758	MachineOperand RegOp = Cond[1];
				759	RegOp.setImplicit(false);
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	760	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				761	BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
				762	.add(RegOp);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	763	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	764	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	765	.addReg(TrueReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	766	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	767	.addReg(FalseReg)
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	768	.addReg(SReg);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	769	break;
				770	}
				771	case SIInstrInfo::EXECNZ: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	772	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	773	unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				774	BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
				775	.addImm(0);
				776	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				777	.addImm(-1)
				778	.addImm(0);
				779	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	780	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	781	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	782	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	783	.addReg(TrueReg)
				784	.addReg(SReg);
				785	break;
				786	}
				787	case SIInstrInfo::EXECZ: {
Nicolai Haehnle	ce4ddd0	2017-09-29 15:37:31 +0000	[diff] [blame]	788	unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	789	unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				790	BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
				791	.addImm(0);
				792	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
				793	.addImm(0)
				794	.addImm(-1);
				795	BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	796	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	797	.addReg(FalseReg)
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	798	.addImm(0)
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	799	.addReg(TrueReg)
				800	.addReg(SReg);
				801	llvm_unreachable("Unhandled branch predicate EXECZ");
				802	break;
				803	}
				804	default:
				805	llvm_unreachable("invalid branch predicate");
				806	}
				807	} else {
				808	llvm_unreachable("Can only handle Cond size 1 or 2");
				809	}
				810	}
				811
				812	unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
				813	MachineBasicBlock::iterator I,
				814	const DebugLoc &DL,
				815	unsigned SrcReg, int Value) const {
				816	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				817	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				818	BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
				819	.addImm(Value)
				820	.addReg(SrcReg);
				821
				822	return Reg;
				823	}
				824
				825	unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
				826	MachineBasicBlock::iterator I,
				827	const DebugLoc &DL,
				828	unsigned SrcReg, int Value) const {
				829	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				830	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				831	BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
				832	.addImm(Value)
				833	.addReg(SrcReg);
				834
				835	return Reg;
				836	}
				837
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	838	unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
				839
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	840	if (RI.getRegSizeInBits(*DstRC) == 32) {
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	841	return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	842	} else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	843	return AMDGPU::S_MOV_B64;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	844	} else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	845	return AMDGPU::V_MOV_B64_PSEUDO;
Tom Stellard	ef3b864	2015-01-07 19:56:17 +0000	[diff] [blame]	846	}
				847	return AMDGPU::COPY;
				848	}
				849
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	850	static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
				851	switch (Size) {
				852	case 4:
				853	return AMDGPU::SI_SPILL_S32_SAVE;
				854	case 8:
				855	return AMDGPU::SI_SPILL_S64_SAVE;
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	856	case 12:
				857	return AMDGPU::SI_SPILL_S96_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	858	case 16:
				859	return AMDGPU::SI_SPILL_S128_SAVE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	860	case 20:
				861	return AMDGPU::SI_SPILL_S160_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	862	case 32:
				863	return AMDGPU::SI_SPILL_S256_SAVE;
				864	case 64:
				865	return AMDGPU::SI_SPILL_S512_SAVE;
				866	default:
				867	llvm_unreachable("unknown register size");
				868	}
				869	}
				870
				871	static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
				872	switch (Size) {
				873	case 4:
				874	return AMDGPU::SI_SPILL_V32_SAVE;
				875	case 8:
				876	return AMDGPU::SI_SPILL_V64_SAVE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	877	case 12:
				878	return AMDGPU::SI_SPILL_V96_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	879	case 16:
				880	return AMDGPU::SI_SPILL_V128_SAVE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	881	case 20:
				882	return AMDGPU::SI_SPILL_V160_SAVE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	883	case 32:
				884	return AMDGPU::SI_SPILL_V256_SAVE;
				885	case 64:
				886	return AMDGPU::SI_SPILL_V512_SAVE;
				887	default:
				888	llvm_unreachable("unknown register size");
				889	}
				890	}
				891
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	892	void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
				893	MachineBasicBlock::iterator MI,
				894	unsigned SrcReg, bool isKill,
				895	int FrameIndex,
				896	const TargetRegisterClass *RC,
				897	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	898	MachineFunction *MF = MBB.getParent();
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	899	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	900	MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	901	const DebugLoc &DL = MBB.findDebugLoc(MI);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	902
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	903	unsigned Size = FrameInfo.getObjectSize(FrameIndex);
				904	unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	905	MachinePointerInfo PtrInfo
				906	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				907	MachineMemOperand *MMO
				908	= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
				909	Size, Align);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	910	unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	911
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	912	if (RI.isSGPRClass(RC)) {
Matt Arsenault	5b22dfa	2015-11-05 05:27:10 +0000	[diff] [blame]	913	MFI->setHasSpilledSGPRs();
				914
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	915	// We are only allowed to create one new instruction when spilling
				916	// registers, so we need to use pseudo instruction for spilling SGPRs.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	917	const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	918
				919	// The SGPR spill/restore instructions only work on number sgprs, so we need
				920	// to make sure we are using the correct register class.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	921	if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
Matt Arsenault	b6e1cc2	2016-05-21 00:53:42 +0000	[diff] [blame]	922	MachineRegisterInfo &MRI = MF->getRegInfo();
				923	MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
				924	}
				925
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	926	MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	927	.addReg(SrcReg, getKillRegState(isKill)) // data
				928	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	929	.addMemOperand(MMO)
				930	.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	931	.addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	932	// Add the scratch resource registers as implicit uses because we may end up
				933	// needing them, and need to ensure that the reserved registers are
				934	// correctly handled.
Tom Stellard	42fb60e	2015-01-14 15:42:31 +0000	[diff] [blame]	935
Matt Arsenault	adc59d7	2018-04-23 15:51:26 +0000	[diff] [blame]	936	FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	937	if (ST.hasScalarStores()) {
				938	// m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle	43cc6c4	2017-06-27 08:04:13 +0000	[diff] [blame]	939	Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine \| RegState::Dead);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	940	}
				941
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	942	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	943	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	944
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	945	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				946
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	947	unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	948	MFI->setHasSpilledVGPRs();
				949	BuildMI(MBB, MI, DL, get(Opcode))
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	950	.addReg(SrcReg, getKillRegState(isKill)) // data
				951	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	952	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	953	.addReg(MFI->getFrameOffsetReg()) // scratch_offset
Matt Arsenault	2510a31	2016-09-03 06:57:55 +0000	[diff] [blame]	954	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	955	.addMemOperand(MMO);
				956	}
				957
				958	static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
				959	switch (Size) {
				960	case 4:
				961	return AMDGPU::SI_SPILL_S32_RESTORE;
				962	case 8:
				963	return AMDGPU::SI_SPILL_S64_RESTORE;
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	964	case 12:
				965	return AMDGPU::SI_SPILL_S96_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	966	case 16:
				967	return AMDGPU::SI_SPILL_S128_RESTORE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	968	case 20:
				969	return AMDGPU::SI_SPILL_S160_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	970	case 32:
				971	return AMDGPU::SI_SPILL_S256_RESTORE;
				972	case 64:
				973	return AMDGPU::SI_SPILL_S512_RESTORE;
				974	default:
				975	llvm_unreachable("unknown register size");
				976	}
				977	}
				978
				979	static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
				980	switch (Size) {
				981	case 4:
				982	return AMDGPU::SI_SPILL_V32_RESTORE;
				983	case 8:
				984	return AMDGPU::SI_SPILL_V64_RESTORE;
Tom Stellard	703b2ec	2016-04-12 23:57:30 +0000	[diff] [blame]	985	case 12:
				986	return AMDGPU::SI_SPILL_V96_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	987	case 16:
				988	return AMDGPU::SI_SPILL_V128_RESTORE;
Tim Renouf	033f99a	2019-03-22 10:11:21 +0000	[diff] [blame]	989	case 20:
				990	return AMDGPU::SI_SPILL_V160_RESTORE;
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	991	case 32:
				992	return AMDGPU::SI_SPILL_V256_RESTORE;
				993	case 64:
				994	return AMDGPU::SI_SPILL_V512_RESTORE;
				995	default:
				996	llvm_unreachable("unknown register size");
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	997	}
				998	}
				999
				1000	void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
				1001	MachineBasicBlock::iterator MI,
				1002	unsigned DestReg, int FrameIndex,
				1003	const TargetRegisterClass *RC,
				1004	const TargetRegisterInfo *TRI) const {
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	1005	MachineFunction *MF = MBB.getParent();
Matt Arsenault	88ce3dc	2018-11-26 21:28:40 +0000	[diff] [blame]	1006	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	1007	MachineFrameInfo &FrameInfo = MF->getFrameInfo();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1008	const DebugLoc &DL = MBB.findDebugLoc(MI);
Matthias Braun	941a705	2016-07-28 18:40:00 +0000	[diff] [blame]	1009	unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
				1010	unsigned Size = FrameInfo.getObjectSize(FrameIndex);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1011	unsigned SpillSize = TRI->getSpillSize(*RC);
Tom Stellard	4e07b1d	2014-06-10 21:20:41 +0000	[diff] [blame]	1012
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1013	MachinePointerInfo PtrInfo
				1014	= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
				1015
				1016	MachineMemOperand *MMO = MF->getMachineMemOperand(
				1017	PtrInfo, MachineMemOperand::MOLoad, Size, Align);
				1018
				1019	if (RI.isSGPRClass(RC)) {
Matt Arsenault	88ce3dc	2018-11-26 21:28:40 +0000	[diff] [blame]	1020	MFI->setHasSpilledSGPRs();
				1021
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1022	// FIXME: Maybe this should not include a memoperand because it will be
				1023	// lowered to non-memory instructions.
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1024	const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
				1025	if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
Matt Arsenault	b6e1cc2	2016-05-21 00:53:42 +0000	[diff] [blame]	1026	MachineRegisterInfo &MRI = MF->getRegInfo();
				1027	MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
				1028	}
				1029
Matt Arsenault	adc59d7	2018-04-23 15:51:26 +0000	[diff] [blame]	1030	FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1031	MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	1032	.addFrameIndex(FrameIndex) // addr
Matt Arsenault	08906a3	2016-10-28 19:43:31 +0000	[diff] [blame]	1033	.addMemOperand(MMO)
				1034	.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	1035	.addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1036
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1037	if (ST.hasScalarStores()) {
				1038	// m0 is used for offset to scalar stores if used to spill.
Nicolai Haehnle	43cc6c4	2017-06-27 08:04:13 +0000	[diff] [blame]	1039	Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine \| RegState::Dead);
Marek Olsak	79c0587	2016-11-25 17:37:09 +0000	[diff] [blame]	1040	}
				1041
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1042	return;
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1043	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1044
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1045	assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
				1046
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1047	unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1048	BuildMI(MBB, MI, DL, get(Opcode), DestReg)
Matt Arsenault	ea8a4ed	2017-05-17 19:37:57 +0000	[diff] [blame]	1049	.addFrameIndex(FrameIndex) // vaddr
				1050	.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
				1051	.addReg(MFI->getFrameOffsetReg()) // scratch_offset
				1052	.addImm(0) // offset
Matt Arsenault	08f14de	2015-11-06 18:07:53 +0000	[diff] [blame]	1053	.addMemOperand(MMO);
Tom Stellard	c149dc0	2013-11-27 21:23:35 +0000	[diff] [blame]	1054	}
				1055
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1056	/// \param @Offset Offset in bytes of the FrameIndex being spilled
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1057	unsigned SIInstrInfo::calculateLDSSpillAddress(
				1058	MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg,
				1059	unsigned FrameOffset, unsigned Size) const {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1060	MachineFunction *MF = MBB.getParent();
				1061	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	1062	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1063	const DebugLoc &DL = MBB.findDebugLoc(MI);
Konstantin Zhuravlyov	1d65026	2016-09-06 20:22:28 +0000	[diff] [blame]	1064	unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1065	unsigned WavefrontSize = ST.getWavefrontSize();
				1066
				1067	unsigned TIDReg = MFI->getTIDReg();
				1068	if (!MFI->hasCalculatedTID()) {
				1069	MachineBasicBlock &Entry = MBB.getParent()->front();
				1070	MachineBasicBlock::iterator Insert = Entry.front();
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	1071	const DebugLoc &DL = Insert->getDebugLoc();
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1072
Tom Stellard	19f4301	2016-07-28 14:30:43 +0000	[diff] [blame]	1073	TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
				1074	*MF);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1075	if (TIDReg == AMDGPU::NoRegister)
				1076	return TIDReg;
				1077
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	1078	if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) &&
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1079	WorkGroupSize > WavefrontSize) {
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1080	unsigned TIDIGXReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1081	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1082	unsigned TIDIGYReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1083	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
Matt Arsenault	ac234b6	2015-11-30 21:15:57 +0000	[diff] [blame]	1084	unsigned TIDIGZReg
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1085	= MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1086	unsigned InputPtrReg =
Matt Arsenault	8623e8d	2017-08-03 23:00:29 +0000	[diff] [blame]	1087	MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Benjamin Kramer	7149aab	2015-03-01 18:09:56 +0000	[diff] [blame]	1088	for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1089	if (!Entry.isLiveIn(Reg))
				1090	Entry.addLiveIn(Reg);
				1091	}
				1092
Matthias Braun	7dc03f0	2016-04-06 02:47:09 +0000	[diff] [blame]	1093	RS->enterBasicBlock(Entry);
Matt Arsenault	0c90e95	2015-11-06 18:17:45 +0000	[diff] [blame]	1094	// FIXME: Can we scavenge an SReg_64 and access the subregs?
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1095	unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				1096	unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
				1097	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
				1098	.addReg(InputPtrReg)
				1099	.addImm(SI::KernelInputOffsets::NGROUPS_Z);
				1100	BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
				1101	.addReg(InputPtrReg)
				1102	.addImm(SI::KernelInputOffsets::NGROUPS_Y);
				1103
				1104	// NGROUPS.X * NGROUPS.Y
				1105	BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
				1106	.addReg(STmp1)
				1107	.addReg(STmp0);
				1108	// (NGROUPS.X * NGROUPS.Y) * TIDIG.X
				1109	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
				1110	.addReg(STmp1)
				1111	.addReg(TIDIGXReg);
				1112	// NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
				1113	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
				1114	.addReg(STmp0)
				1115	.addReg(TIDIGYReg)
				1116	.addReg(TIDReg);
				1117	// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	1118	getAddNoCarry(Entry, Insert, DL, TIDReg)
				1119	.addReg(TIDReg)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	1120	.addReg(TIDIGZReg)
				1121	.addImm(0); // clamp bit
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1122	} else {
				1123	// Get the wave id
				1124	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
				1125	TIDReg)
				1126	.addImm(-1)
				1127	.addImm(0);
				1128
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	1129	BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1130	TIDReg)
				1131	.addImm(-1)
				1132	.addReg(TIDReg);
				1133	}
				1134
				1135	BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
				1136	TIDReg)
				1137	.addImm(2)
				1138	.addReg(TIDReg);
				1139	MFI->setTIDReg(TIDReg);
				1140	}
				1141
				1142	// Add FrameIndex to LDS offset
Matt Arsenault	52ef401	2016-07-26 16:45:58 +0000	[diff] [blame]	1143	unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	1144	getAddNoCarry(MBB, MI, DL, TmpReg)
				1145	.addImm(LDSOffset)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	1146	.addReg(TIDReg)
				1147	.addImm(0); // clamp bit
Tom Stellard	9646890	2014-09-24 01:33:17 +0000	[diff] [blame]	1148
				1149	return TmpReg;
				1150	}
				1151
Tom Stellard	d37630e	2016-04-07 14:47:07 +0000	[diff] [blame]	1152	void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
				1153	MachineBasicBlock::iterator MI,
Nicolai Haehnle	87323da	2015-12-17 16:46:42 +0000	[diff] [blame]	1154	int Count) const {
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	1155	DebugLoc DL = MBB.findDebugLoc(MI);
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1156	while (Count > 0) {
				1157	int Arg;
				1158	if (Count >= 8)
				1159	Arg = 7;
				1160	else
				1161	Arg = Count - 1;
				1162	Count -= 8;
Tom Stellard	341e293	2016-05-02 18:02:24 +0000	[diff] [blame]	1163	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1164	.addImm(Arg);
				1165	}
				1166	}
				1167
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	1168	void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
				1169	MachineBasicBlock::iterator MI) const {
				1170	insertWaitStates(MBB, MI, 1);
				1171	}
				1172
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1173	void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
				1174	auto MF = MBB.getParent();
				1175	SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
				1176
				1177	assert(Info->isEntryFunction());
				1178
				1179	if (MBB.succ_empty()) {
				1180	bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
David Stuttard	20ea21c	2019-03-12 09:52:58 +0000	[diff] [blame]	1181	if (HasNoTerminator) {
				1182	if (Info->returnsVoid()) {
				1183	BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0);
				1184	} else {
				1185	BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG));
				1186	}
				1187	}
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1188	}
				1189	}
				1190
Stanislav Mekhanoshin	f92ed69	2019-01-21 19:11:26 +0000	[diff] [blame]	1191	unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	1192	switch (MI.getOpcode()) {
				1193	default: return 1; // FIXME: Do wait states equal cycles?
				1194
				1195	case AMDGPU::S_NOP:
				1196	return MI.getOperand(0).getImm() + 1;
				1197	}
				1198	}
				1199
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1200	bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
				1201	MachineBasicBlock &MBB = *MI.getParent();
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1202	DebugLoc DL = MBB.findDebugLoc(MI);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1203	switch (MI.getOpcode()) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	1204	default: return TargetInstrInfo::expandPostRAPseudo(MI);
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1205	case AMDGPU::S_MOV_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1206	// This is only a terminator to get the correct spill code placement during
				1207	// register allocation.
				1208	MI.setDesc(get(AMDGPU::S_MOV_B64));
				1209	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1210
				1211	case AMDGPU::S_XOR_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1212	// This is only a terminator to get the correct spill code placement during
				1213	// register allocation.
				1214	MI.setDesc(get(AMDGPU::S_XOR_B64));
				1215	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1216
				1217	case AMDGPU::S_ANDN2_B64_term:
Matt Arsenault	e674075	2016-09-29 01:44:16 +0000	[diff] [blame]	1218	// This is only a terminator to get the correct spill code placement during
				1219	// register allocation.
				1220	MI.setDesc(get(AMDGPU::S_ANDN2_B64));
				1221	break;
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1222
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1223	case AMDGPU::V_MOV_B64_PSEUDO: {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1224	unsigned Dst = MI.getOperand(0).getReg();
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1225	unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
				1226	unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
				1227
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1228	const MachineOperand &SrcOp = MI.getOperand(1);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1229	// FIXME: Will this work for 64-bit floating point immediates?
				1230	assert(!SrcOp.isFPImm());
				1231	if (SrcOp.isImm()) {
				1232	APInt Imm(64, SrcOp.getImm());
				1233	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1234	.addImm(Imm.getLoBits(32).getZExtValue())
				1235	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1236	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1237	.addImm(Imm.getHiBits(32).getZExtValue())
				1238	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1239	} else {
				1240	assert(SrcOp.isReg());
				1241	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1242	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
				1243	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1244	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
Matt Arsenault	80bc355	2016-06-13 15:53:52 +0000	[diff] [blame]	1245	.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
				1246	.addReg(Dst, RegState::Implicit \| RegState::Define);
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1247	}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1248	MI.eraseFromParent();
Tom Stellard	4842c05	2015-01-07 20:27:25 +0000	[diff] [blame]	1249	break;
				1250	}
Connor Abbott	66b9bd6	2017-08-04 18:36:54 +0000	[diff] [blame]	1251	case AMDGPU::V_SET_INACTIVE_B32: {
				1252	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1253	.addReg(AMDGPU::EXEC);
				1254	BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
				1255	.add(MI.getOperand(2));
				1256	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1257	.addReg(AMDGPU::EXEC);
				1258	MI.eraseFromParent();
				1259	break;
				1260	}
				1261	case AMDGPU::V_SET_INACTIVE_B64: {
				1262	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1263	.addReg(AMDGPU::EXEC);
				1264	MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
				1265	MI.getOperand(0).getReg())
				1266	.add(MI.getOperand(2));
				1267	expandPostRAPseudo(*Copy);
				1268	BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
				1269	.addReg(AMDGPU::EXEC);
				1270	MI.eraseFromParent();
				1271	break;
				1272	}
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1273	case AMDGPU::V_MOVRELD_B32_V1:
				1274	case AMDGPU::V_MOVRELD_B32_V2:
				1275	case AMDGPU::V_MOVRELD_B32_V4:
				1276	case AMDGPU::V_MOVRELD_B32_V8:
				1277	case AMDGPU::V_MOVRELD_B32_V16: {
				1278	const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
				1279	unsigned VecReg = MI.getOperand(0).getReg();
				1280	bool IsUndef = MI.getOperand(1).isUndef();
				1281	unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
				1282	assert(VecReg == MI.getOperand(1).getReg());
				1283
				1284	MachineInstr *MovRel =
				1285	BuildMI(MBB, MI, DL, MovRelDesc)
				1286	.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1287	.add(MI.getOperand(2))
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1288	.addReg(VecReg, RegState::ImplicitDefine)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1289	.addReg(VecReg,
				1290	RegState::Implicit \| (IsUndef ? RegState::Undef : 0));
Nicolai Haehnle	a785209	2016-10-24 14:56:02 +0000	[diff] [blame]	1291
				1292	const int ImpDefIdx =
				1293	MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
				1294	const int ImpUseIdx = ImpDefIdx + 1;
				1295	MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
				1296
				1297	MI.eraseFromParent();
				1298	break;
				1299	}
Tom Stellard	bf3e6e5	2016-06-14 20:29:59 +0000	[diff] [blame]	1300	case AMDGPU::SI_PC_ADD_REL_OFFSET: {
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1301	MachineFunction &MF = *MBB.getParent();
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1302	unsigned Reg = MI.getOperand(0).getReg();
Matt Arsenault	11587d9	2016-08-10 19:11:45 +0000	[diff] [blame]	1303	unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
				1304	unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1305
				1306	// Create a bundle so these instructions won't be re-ordered by the
				1307	// post-RA scheduler.
				1308	MIBundleBuilder Bundler(MBB, MI);
				1309	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
				1310
				1311	// Add 32-bit offset from this instruction to the start of the
				1312	// constant data.
				1313	Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1314	.addReg(RegLo)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1315	.add(MI.getOperand(1)));
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1316
Konstantin Zhuravlyov	c96b5d7	2016-10-14 04:37:34 +0000	[diff] [blame]	1317	MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
				1318	.addReg(RegHi);
				1319	if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
				1320	MIB.addImm(0);
				1321	else
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	1322	MIB.add(MI.getOperand(2));
Konstantin Zhuravlyov	c96b5d7	2016-10-14 04:37:34 +0000	[diff] [blame]	1323
				1324	Bundler.append(MIB);
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	1325	finalizeBundle(MBB, Bundler.begin());
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1326
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1327	MI.eraseFromParent();
Tom Stellard	c93fc11	2015-12-10 02:13:01 +0000	[diff] [blame]	1328	break;
				1329	}
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	1330	case AMDGPU::ENTER_WWM: {
				1331	// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
				1332	// WWM is entered.
				1333	MI.setDesc(get(AMDGPU::S_OR_SAVEEXEC_B64));
				1334	break;
				1335	}
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	1336	case AMDGPU::EXIT_WWM: {
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	1337	// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
				1338	// WWM is exited.
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	1339	MI.setDesc(get(AMDGPU::S_MOV_B64));
				1340	break;
				1341	}
Stanislav Mekhanoshin	739174c	2018-05-31 20:13:51 +0000	[diff] [blame]	1342	case TargetOpcode::BUNDLE: {
				1343	if (!MI.mayLoad())
				1344	return false;
				1345
				1346	// If it is a load it must be a memory clause
				1347	for (MachineBasicBlock::instr_iterator I = MI.getIterator();
				1348	I->isBundledWithSucc(); ++I) {
				1349	I->unbundleFromSucc();
				1350	for (MachineOperand &MO : I->operands())
				1351	if (MO.isReg())
				1352	MO.setIsInternalRead(false);
				1353	}
				1354
				1355	MI.eraseFromParent();
				1356	break;
				1357	}
Tom Stellard	eba6107	2014-05-02 15:41:42 +0000	[diff] [blame]	1358	}
				1359	return true;
				1360	}
				1361
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1362	bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
				1363	MachineOperand &Src0,
				1364	unsigned Src0OpName,
				1365	MachineOperand &Src1,
				1366	unsigned Src1OpName) const {
				1367	MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
				1368	if (!Src0Mods)
				1369	return false;
				1370
				1371	MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
				1372	assert(Src1Mods &&
				1373	"All commutable instructions have both src0 and src1 modifiers");
				1374
				1375	int Src0ModsVal = Src0Mods->getImm();
				1376	int Src1ModsVal = Src1Mods->getImm();
				1377
				1378	Src1Mods->setImm(Src0ModsVal);
				1379	Src0Mods->setImm(Src1ModsVal);
				1380	return true;
				1381	}
				1382
				1383	static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
				1384	MachineOperand &RegOp,
Matt Arsenault	25dba30	2016-09-13 19:03:12 +0000	[diff] [blame]	1385	MachineOperand &NonRegOp) {
				1386	unsigned Reg = RegOp.getReg();
				1387	unsigned SubReg = RegOp.getSubReg();
				1388	bool IsKill = RegOp.isKill();
				1389	bool IsDead = RegOp.isDead();
				1390	bool IsUndef = RegOp.isUndef();
				1391	bool IsDebug = RegOp.isDebug();
				1392
				1393	if (NonRegOp.isImm())
				1394	RegOp.ChangeToImmediate(NonRegOp.getImm());
				1395	else if (NonRegOp.isFI())
				1396	RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
				1397	else
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1398	return nullptr;
				1399
Matt Arsenault	25dba30	2016-09-13 19:03:12 +0000	[diff] [blame]	1400	NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
				1401	NonRegOp.setSubReg(SubReg);
				1402
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1403	return &MI;
				1404	}
				1405
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1406	MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1407	unsigned Src0Idx,
				1408	unsigned Src1Idx) const {
				1409	assert(!NewMI && "this should never be used");
				1410
				1411	unsigned Opc = MI.getOpcode();
				1412	int CommutedOpcode = commuteOpcode(Opc);
Marek Olsak	cfbdba2	2015-06-26 20:29:10 +0000	[diff] [blame]	1413	if (CommutedOpcode == -1)
				1414	return nullptr;
				1415
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1416	assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
				1417	static_cast<int>(Src0Idx) &&
				1418	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
				1419	static_cast<int>(Src1Idx) &&
				1420	"inconsistency with findCommutedOpIndices");
				1421
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1422	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1423	MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault	aa5ccfb	2014-10-17 18:00:37 +0000	[diff] [blame]	1424
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1425	MachineInstr *CommutedMI = nullptr;
				1426	if (Src0.isReg() && Src1.isReg()) {
				1427	if (isOperandLegal(MI, Src1Idx, &Src0)) {
				1428	// Be sure to copy the source modifiers to the right place.
				1429	CommutedMI
				1430	= TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
Matt Arsenault	d282ada	2014-10-17 18:00:48 +0000	[diff] [blame]	1431	}
				1432
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1433	} else if (Src0.isReg() && !Src1.isReg()) {
				1434	// src0 should always be able to support any operand type, so no need to
				1435	// check operand legality.
				1436	CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
				1437	} else if (!Src0.isReg() && Src1.isReg()) {
				1438	if (isOperandLegal(MI, Src1Idx, &Src0))
				1439	CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1440	} else {
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1441	// FIXME: Found two non registers to commute. This does happen.
				1442	return nullptr;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	1443	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1444
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1445	if (CommutedMI) {
				1446	swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
				1447	Src1, AMDGPU::OpName::src1_modifiers);
				1448
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1449	CommutedMI->setDesc(get(CommutedOpcode));
Matt Arsenault	bbb47da	2016-09-08 17:19:29 +0000	[diff] [blame]	1450	}
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	1451
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1452	return CommutedMI;
Christian Konig	76edd4f	2013-02-26 17:52:29 +0000	[diff] [blame]	1453	}
				1454
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1455	// This needs to be implemented because the source modifiers may be inserted
				1456	// between the true commutable operands, and the base
				1457	// TargetInstrInfo::commuteInstruction uses it.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	1458	bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1459	unsigned &SrcOpIdx1) const {
Alexander Timofeev	db7ee76	2018-09-11 11:56:50 +0000	[diff] [blame]	1460	return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
				1461	}
				1462
				1463	bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
				1464	unsigned &SrcOpIdx1) const {
				1465	if (!Desc.isCommutable())
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1466	return false;
				1467
Alexander Timofeev	db7ee76	2018-09-11 11:56:50 +0000	[diff] [blame]	1468	unsigned Opc = Desc.getOpcode();
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1469	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				1470	if (Src0Idx == -1)
				1471	return false;
				1472
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1473	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				1474	if (Src1Idx == -1)
				1475	return false;
				1476
Andrew Kaylor	16c4da0	2015-09-28 20:33:22 +0000	[diff] [blame]	1477	return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
Matt Arsenault	92befe7	2014-09-26 17:54:54 +0000	[diff] [blame]	1478	}
				1479
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1480	bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
				1481	int64_t BrOffset) const {
				1482	// BranchRelaxation should never have to check s_setpc_b64 because its dest
				1483	// block is unanalyzable.
				1484	assert(BranchOp != AMDGPU::S_SETPC_B64);
				1485
				1486	// Convert to dwords.
				1487	BrOffset /= 4;
				1488
				1489	// The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
				1490	// from the next instruction.
				1491	BrOffset -= 1;
				1492
				1493	return isIntN(BranchOffsetBits, BrOffset);
				1494	}
				1495
				1496	MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
				1497	const MachineInstr &MI) const {
				1498	if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
				1499	// This would be a difficult analysis to perform, but can always be legal so
				1500	// there's no need to analyze it.
				1501	return nullptr;
				1502	}
				1503
				1504	return MI.getOperand(0).getMBB();
				1505	}
				1506
				1507	unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
				1508	MachineBasicBlock &DestBB,
				1509	const DebugLoc &DL,
				1510	int64_t BrOffset,
				1511	RegScavenger *RS) const {
				1512	assert(RS && "RegScavenger required for long branching");
				1513	assert(MBB.empty() &&
				1514	"new block should be inserted for expanding unconditional branch");
				1515	assert(MBB.pred_size() == 1);
				1516
				1517	MachineFunction *MF = MBB.getParent();
				1518	MachineRegisterInfo &MRI = MF->getRegInfo();
				1519
				1520	// FIXME: Virtual register workaround for RegScavenger not working with empty
				1521	// blocks.
				1522	unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				1523
				1524	auto I = MBB.end();
				1525
				1526	// We need to compute the offset relative to the instruction immediately after
				1527	// s_getpc_b64. Insert pc arithmetic code before last terminator.
				1528	MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
				1529
				1530	// TODO: Handle > 32-bit block address.
				1531	if (BrOffset >= 0) {
				1532	BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
				1533	.addReg(PCReg, RegState::Define, AMDGPU::sub0)
				1534	.addReg(PCReg, 0, AMDGPU::sub0)
				1535	.addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
				1536	BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
				1537	.addReg(PCReg, RegState::Define, AMDGPU::sub1)
				1538	.addReg(PCReg, 0, AMDGPU::sub1)
				1539	.addImm(0);
				1540	} else {
				1541	// Backwards branch.
				1542	BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
				1543	.addReg(PCReg, RegState::Define, AMDGPU::sub0)
				1544	.addReg(PCReg, 0, AMDGPU::sub0)
				1545	.addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
				1546	BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
				1547	.addReg(PCReg, RegState::Define, AMDGPU::sub1)
				1548	.addReg(PCReg, 0, AMDGPU::sub1)
				1549	.addImm(0);
				1550	}
				1551
				1552	// Insert the indirect branch after the other terminator.
				1553	BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
				1554	.addReg(PCReg);
				1555
				1556	// FIXME: If spilling is necessary, this will fail because this scavenger has
				1557	// no emergency stack slots. It is non-trivial to spill in this situation,
				1558	// because the restore code needs to be specially placed after the
				1559	// jump. BranchRelaxation then needs to be made aware of the newly inserted
				1560	// block.
				1561	//
				1562	// If a spill is needed for the pc register pair, we need to insert a spill
				1563	// restore block right before the destination block, and insert a short branch
				1564	// into the old destination block's fallthrough predecessor.
				1565	// e.g.:
				1566	//
				1567	// s_cbranch_scc0 skip_long_branch:
				1568	//
				1569	// long_branch_bb:
				1570	// spill s[8:9]
				1571	// s_getpc_b64 s[8:9]
				1572	// s_add_u32 s8, s8, restore_bb
				1573	// s_addc_u32 s9, s9, 0
				1574	// s_setpc_b64 s[8:9]
				1575	//
				1576	// skip_long_branch:
				1577	// foo;
				1578	//
				1579	// .....
				1580	//
				1581	// dest_bb_fallthrough_predecessor:
				1582	// bar;
				1583	// s_branch dest_bb
				1584	//
				1585	// restore_bb:
				1586	// restore s[8:9]
				1587	// fallthrough dest_bb
				1588	///
				1589	// dest_bb:
				1590	// buzz;
				1591
				1592	RS->enterBasicBlockEnd(MBB);
Matt Arsenault	b0b741e	2018-10-30 01:33:14 +0000	[diff] [blame]	1593	unsigned Scav = RS->scavengeRegisterBackwards(
				1594	AMDGPU::SReg_64RegClass,
				1595	MachineBasicBlock::iterator(GetPC), false, 0);
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1596	MRI.replaceRegWith(PCReg, Scav);
				1597	MRI.clearVirtRegs();
				1598	RS->setRegUsed(Scav);
				1599
				1600	return 4 + 8 + 4 + 4;
				1601	}
				1602
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1603	unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
				1604	switch (Cond) {
				1605	case SIInstrInfo::SCC_TRUE:
				1606	return AMDGPU::S_CBRANCH_SCC1;
				1607	case SIInstrInfo::SCC_FALSE:
				1608	return AMDGPU::S_CBRANCH_SCC0;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame]	1609	case SIInstrInfo::VCCNZ:
				1610	return AMDGPU::S_CBRANCH_VCCNZ;
				1611	case SIInstrInfo::VCCZ:
				1612	return AMDGPU::S_CBRANCH_VCCZ;
				1613	case SIInstrInfo::EXECNZ:
				1614	return AMDGPU::S_CBRANCH_EXECNZ;
				1615	case SIInstrInfo::EXECZ:
				1616	return AMDGPU::S_CBRANCH_EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1617	default:
				1618	llvm_unreachable("invalid branch predicate");
				1619	}
				1620	}
				1621
				1622	SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
				1623	switch (Opcode) {
				1624	case AMDGPU::S_CBRANCH_SCC0:
				1625	return SCC_FALSE;
				1626	case AMDGPU::S_CBRANCH_SCC1:
				1627	return SCC_TRUE;
Matt Arsenault	4945905	2016-05-21 00:29:40 +0000	[diff] [blame]	1628	case AMDGPU::S_CBRANCH_VCCNZ:
				1629	return VCCNZ;
				1630	case AMDGPU::S_CBRANCH_VCCZ:
				1631	return VCCZ;
				1632	case AMDGPU::S_CBRANCH_EXECNZ:
				1633	return EXECNZ;
				1634	case AMDGPU::S_CBRANCH_EXECZ:
				1635	return EXECZ;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1636	default:
				1637	return INVALID_BR;
				1638	}
				1639	}
				1640
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1641	bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
				1642	MachineBasicBlock::iterator I,
				1643	MachineBasicBlock *&TBB,
				1644	MachineBasicBlock *&FBB,
				1645	SmallVectorImpl<MachineOperand> &Cond,
				1646	bool AllowModify) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1647	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1648	// Unconditional Branch
				1649	TBB = I->getOperand(0).getMBB();
				1650	return false;
				1651	}
				1652
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1653	MachineBasicBlock *CondBB = nullptr;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1654
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1655	if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				1656	CondBB = I->getOperand(1).getMBB();
				1657	Cond.push_back(I->getOperand(0));
				1658	} else {
				1659	BranchPredicate Pred = getBranchPredicate(I->getOpcode());
				1660	if (Pred == INVALID_BR)
				1661	return true;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1662
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1663	CondBB = I->getOperand(0).getMBB();
				1664	Cond.push_back(MachineOperand::CreateImm(Pred));
				1665	Cond.push_back(I->getOperand(1)); // Save the branch register.
				1666	}
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1667	++I;
				1668
				1669	if (I == MBB.end()) {
				1670	// Conditional branch followed by fall-through.
				1671	TBB = CondBB;
				1672	return false;
				1673	}
				1674
				1675	if (I->getOpcode() == AMDGPU::S_BRANCH) {
				1676	TBB = CondBB;
				1677	FBB = I->getOperand(0).getMBB();
				1678	return false;
				1679	}
				1680
				1681	return true;
				1682	}
				1683
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1684	bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
				1685	MachineBasicBlock *&FBB,
				1686	SmallVectorImpl<MachineOperand> &Cond,
				1687	bool AllowModify) const {
				1688	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
Matt Arsenault	eabb8dd	2018-11-16 05:03:02 +0000	[diff] [blame]	1689	auto E = MBB.end();
				1690	if (I == E)
				1691	return false;
				1692
				1693	// Skip over the instructions that are artificially terminators for special
				1694	// exec management.
				1695	while (I != E && !I->isBranch() && !I->isReturn() &&
				1696	I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
				1697	switch (I->getOpcode()) {
				1698	case AMDGPU::SI_MASK_BRANCH:
				1699	case AMDGPU::S_MOV_B64_term:
				1700	case AMDGPU::S_XOR_B64_term:
				1701	case AMDGPU::S_ANDN2_B64_term:
				1702	break;
				1703	case AMDGPU::SI_IF:
				1704	case AMDGPU::SI_ELSE:
				1705	case AMDGPU::SI_KILL_I1_TERMINATOR:
				1706	case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
				1707	// FIXME: It's messy that these need to be considered here at all.
				1708	return true;
				1709	default:
				1710	llvm_unreachable("unexpected non-branch terminator inst");
				1711	}
				1712
				1713	++I;
				1714	}
				1715
				1716	if (I == E)
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1717	return false;
				1718
				1719	if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
				1720	return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
				1721
				1722	++I;
				1723
				1724	// TODO: Should be able to treat as fallthrough?
				1725	if (I == MBB.end())
				1726	return true;
				1727
				1728	if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
				1729	return true;
				1730
				1731	MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
				1732
				1733	// Specifically handle the case where the conditional branch is to the same
				1734	// destination as the mask branch. e.g.
				1735	//
				1736	// si_mask_branch BB8
				1737	// s_cbranch_execz BB8
				1738	// s_cbranch BB9
				1739	//
				1740	// This is required to understand divergent loops which may need the branches
				1741	// to be relaxed.
				1742	if (TBB != MaskBrDest \|\| Cond.empty())
				1743	return true;
				1744
				1745	auto Pred = Cond[0].getImm();
				1746	return (Pred != EXECZ && Pred != EXECNZ);
				1747	}
				1748
Matt Arsenault	1b9fc8e	2016-09-14 20:43:16 +0000	[diff] [blame]	1749	unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1750	int *BytesRemoved) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1751	MachineBasicBlock::iterator I = MBB.getFirstTerminator();
				1752
				1753	unsigned Count = 0;
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1754	unsigned RemovedSize = 0;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1755	while (I != MBB.end()) {
				1756	MachineBasicBlock::iterator Next = std::next(I);
Matt Arsenault	6bc43d8	2016-10-06 16:20:41 +0000	[diff] [blame]	1757	if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
				1758	I = Next;
				1759	continue;
				1760	}
				1761
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1762	RemovedSize += getInstSizeInBytes(*I);
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1763	I->eraseFromParent();
				1764	++Count;
				1765	I = Next;
				1766	}
				1767
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1768	if (BytesRemoved)
				1769	*BytesRemoved = RemovedSize;
				1770
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1771	return Count;
				1772	}
				1773
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1774	// Copy the flags onto the implicit condition register operand.
				1775	static void preserveCondRegFlags(MachineOperand &CondReg,
				1776	const MachineOperand &OrigCond) {
				1777	CondReg.setIsUndef(OrigCond.isUndef());
				1778	CondReg.setIsKill(OrigCond.isKill());
				1779	}
				1780
Matt Arsenault	e8e0f5c	2016-09-14 17:24:15 +0000	[diff] [blame]	1781	unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1782	MachineBasicBlock *TBB,
				1783	MachineBasicBlock *FBB,
				1784	ArrayRef<MachineOperand> Cond,
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1785	const DebugLoc &DL,
				1786	int *BytesAdded) const {
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1787	if (!FBB && Cond.empty()) {
				1788	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1789	.addMBB(TBB);
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1790	if (BytesAdded)
				1791	*BytesAdded = 4;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1792	return 1;
				1793	}
				1794
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1795	if(Cond.size() == 1 && Cond[0].isReg()) {
				1796	BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
				1797	.add(Cond[0])
				1798	.addMBB(TBB);
				1799	return 1;
				1800	}
				1801
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1802	assert(TBB && Cond[0].isImm());
				1803
				1804	unsigned Opcode
				1805	= getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
				1806
				1807	if (!FBB) {
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1808	Cond[1].isUndef();
				1809	MachineInstr *CondBr =
				1810	BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1811	.addMBB(TBB);
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1812
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1813	// Copy the flags onto the implicit condition register operand.
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1814	preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1815
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1816	if (BytesAdded)
				1817	*BytesAdded = 4;
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1818	return 1;
				1819	}
				1820
				1821	assert(TBB && FBB);
				1822
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1823	MachineInstr *CondBr =
				1824	BuildMI(&MBB, DL, get(Opcode))
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1825	.addMBB(TBB);
				1826	BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
				1827	.addMBB(FBB);
				1828
Matt Arsenault	52f14ec	2016-11-07 19:09:27 +0000	[diff] [blame]	1829	MachineOperand &CondReg = CondBr->getOperand(1);
				1830	CondReg.setIsUndef(Cond[1].isUndef());
				1831	CondReg.setIsKill(Cond[1].isKill());
				1832
Matt Arsenault	a2b036e	2016-09-14 17:23:48 +0000	[diff] [blame]	1833	if (BytesAdded)
				1834	*BytesAdded = 8;
				1835
Matt Arsenault	6d09380	2016-05-21 00:29:27 +0000	[diff] [blame]	1836	return 2;
				1837	}
				1838
Matt Arsenault	1b9fc8e	2016-09-14 20:43:16 +0000	[diff] [blame]	1839	bool SIInstrInfo::reverseBranchCondition(
Matt Arsenault	72fcd5f	2016-05-21 00:29:34 +0000	[diff] [blame]	1840	SmallVectorImpl<MachineOperand> &Cond) const {
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	1841	if (Cond.size() != 2) {
				1842	return true;
				1843	}
				1844
				1845	if (Cond[0].isImm()) {
				1846	Cond[0].setImm(-Cond[0].getImm());
				1847	return false;
				1848	}
				1849
				1850	return true;
Matt Arsenault	72fcd5f	2016-05-21 00:29:34 +0000	[diff] [blame]	1851	}
				1852
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1853	bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
				1854	ArrayRef<MachineOperand> Cond,
				1855	unsigned TrueReg, unsigned FalseReg,
				1856	int &CondCycles,
				1857	int &TrueCycles, int &FalseCycles) const {
				1858	switch (Cond[0].getImm()) {
				1859	case VCCNZ:
				1860	case VCCZ: {
				1861	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1862	const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
				1863	assert(MRI.getRegClass(FalseReg) == RC);
				1864
				1865	int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
				1866	CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
				1867
				1868	// Limit to equal cost for branch vs. N v_cndmask_b32s.
				1869	return !RI.isSGPRClass(RC) && NumInsts <= 6;
				1870	}
				1871	case SCC_TRUE:
				1872	case SCC_FALSE: {
				1873	// FIXME: We could insert for VGPRs if we could replace the original compare
				1874	// with a vector one.
				1875	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1876	const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
				1877	assert(MRI.getRegClass(FalseReg) == RC);
				1878
				1879	int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
				1880
				1881	// Multiples of 8 can do s_cselect_b64
				1882	if (NumInsts % 2 == 0)
				1883	NumInsts /= 2;
				1884
				1885	CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
				1886	return RI.isSGPRClass(RC);
				1887	}
				1888	default:
				1889	return false;
				1890	}
				1891	}
				1892
				1893	void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
				1894	MachineBasicBlock::iterator I, const DebugLoc &DL,
				1895	unsigned DstReg, ArrayRef<MachineOperand> Cond,
				1896	unsigned TrueReg, unsigned FalseReg) const {
				1897	BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
				1898	if (Pred == VCCZ \|\| Pred == SCC_FALSE) {
				1899	Pred = static_cast<BranchPredicate>(-Pred);
				1900	std::swap(TrueReg, FalseReg);
				1901	}
				1902
				1903	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				1904	const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1905	unsigned DstSize = RI.getRegSizeInBits(*DstRC);
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1906
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1907	if (DstSize == 32) {
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1908	unsigned SelOp = Pred == SCC_TRUE ?
				1909	AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
				1910
				1911	// Instruction's operands are backwards from what is expected.
				1912	MachineInstr *Select =
				1913	BuildMI(MBB, I, DL, get(SelOp), DstReg)
				1914	.addReg(FalseReg)
				1915	.addReg(TrueReg);
				1916
				1917	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1918	return;
				1919	}
				1920
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1921	if (DstSize == 64 && Pred == SCC_TRUE) {
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1922	MachineInstr *Select =
				1923	BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
				1924	.addReg(FalseReg)
				1925	.addReg(TrueReg);
				1926
				1927	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1928	return;
				1929	}
				1930
				1931	static const int16_t Sub0_15[] = {
				1932	AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
				1933	AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
				1934	AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
				1935	AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
				1936	};
				1937
				1938	static const int16_t Sub0_15_64[] = {
				1939	AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
				1940	AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
				1941	AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
				1942	AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
				1943	};
				1944
				1945	unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
				1946	const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
				1947	const int16_t *SubIndices = Sub0_15;
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	1948	int NElts = DstSize / 32;
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1949
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	1950	// 64-bit select is only available for SALU.
				1951	// TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit.
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1952	if (Pred == SCC_TRUE) {
Tim Renouf	361b5b2	2019-03-21 12:01:21 +0000	[diff] [blame]	1953	if (NElts % 2) {
				1954	SelOp = AMDGPU::S_CSELECT_B32;
				1955	EltRC = &AMDGPU::SGPR_32RegClass;
				1956	} else {
				1957	SelOp = AMDGPU::S_CSELECT_B64;
				1958	EltRC = &AMDGPU::SGPR_64RegClass;
				1959	SubIndices = Sub0_15_64;
				1960	NElts /= 2;
				1961	}
Matt Arsenault	9f5e0ef	2017-01-25 04:25:02 +0000	[diff] [blame]	1962	}
				1963
				1964	MachineInstrBuilder MIB = BuildMI(
				1965	MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
				1966
				1967	I = MIB->getIterator();
				1968
				1969	SmallVector<unsigned, 8> Regs;
				1970	for (int Idx = 0; Idx != NElts; ++Idx) {
				1971	unsigned DstElt = MRI.createVirtualRegister(EltRC);
				1972	Regs.push_back(DstElt);
				1973
				1974	unsigned SubIdx = SubIndices[Idx];
				1975
				1976	MachineInstr *Select =
				1977	BuildMI(MBB, I, DL, get(SelOp), DstElt)
				1978	.addReg(FalseReg, 0, SubIdx)
				1979	.addReg(TrueReg, 0, SubIdx);
				1980	preserveCondRegFlags(Select->getOperand(3), Cond[1]);
				1981
				1982	MIB.addReg(DstElt)
				1983	.addImm(SubIdx);
				1984	}
				1985	}
				1986
Sam Kolton	27e0f8b	2017-03-31 11:42:43 +0000	[diff] [blame]	1987	bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
				1988	switch (MI.getOpcode()) {
				1989	case AMDGPU::V_MOV_B32_e32:
				1990	case AMDGPU::V_MOV_B32_e64:
				1991	case AMDGPU::V_MOV_B64_PSEUDO: {
				1992	// If there are additional implicit register operands, this may be used for
				1993	// register indexing so the source register operand isn't simply copied.
				1994	unsigned NumOps = MI.getDesc().getNumOperands() +
				1995	MI.getDesc().getNumImplicitUses();
				1996
				1997	return MI.getNumOperands() == NumOps;
				1998	}
				1999	case AMDGPU::S_MOV_B32:
				2000	case AMDGPU::S_MOV_B64:
				2001	case AMDGPU::COPY:
				2002	return true;
				2003	default:
				2004	return false;
				2005	}
				2006	}
				2007
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2008	unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
Marcello Maggioni	5ca4128	2018-08-20 19:23:45 +0000	[diff] [blame]	2009	unsigned Kind) const {
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2010	switch(Kind) {
				2011	case PseudoSourceValue::Stack:
				2012	case PseudoSourceValue::FixedStack:
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	2013	return AMDGPUAS::PRIVATE_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2014	case PseudoSourceValue::ConstantPool:
				2015	case PseudoSourceValue::GOT:
				2016	case PseudoSourceValue::JumpTable:
				2017	case PseudoSourceValue::GlobalValueCallEntry:
				2018	case PseudoSourceValue::ExternalSymbolCallEntry:
				2019	case PseudoSourceValue::TargetCustom:
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	2020	return AMDGPUAS::CONSTANT_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2021	}
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	2022	return AMDGPUAS::FLAT_ADDRESS;
Jan Sjodin	312ccf7	2017-09-14 20:53:51 +0000	[diff] [blame]	2023	}
				2024
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2025	static void removeModOperands(MachineInstr &MI) {
				2026	unsigned Opc = MI.getOpcode();
				2027	int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				2028	AMDGPU::OpName::src0_modifiers);
				2029	int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				2030	AMDGPU::OpName::src1_modifiers);
				2031	int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
				2032	AMDGPU::OpName::src2_modifiers);
				2033
				2034	MI.RemoveOperand(Src2ModIdx);
				2035	MI.RemoveOperand(Src1ModIdx);
				2036	MI.RemoveOperand(Src0ModIdx);
				2037	}
				2038
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2039	bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2040	unsigned Reg, MachineRegisterInfo *MRI) const {
				2041	if (!MRI->hasOneNonDBGUse(Reg))
				2042	return false;
				2043
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2044	switch (DefMI.getOpcode()) {
				2045	default:
				2046	return false;
				2047	case AMDGPU::S_MOV_B64:
				2048	// TODO: We could fold 64-bit immediates, but this get compilicated
				2049	// when there are sub-registers.
				2050	return false;
				2051
				2052	case AMDGPU::V_MOV_B32_e32:
				2053	case AMDGPU::S_MOV_B32:
				2054	break;
				2055	}
				2056
				2057	const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
				2058	assert(ImmOp);
				2059	// FIXME: We could handle FrameIndex values here.
				2060	if (!ImmOp->isImm())
				2061	return false;
				2062
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2063	unsigned Opc = UseMI.getOpcode();
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2064	if (Opc == AMDGPU::COPY) {
				2065	bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2066	unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
Tom Stellard	2add8a1	2016-09-06 20:00:26 +0000	[diff] [blame]	2067	UseMI.setDesc(get(NewOpc));
				2068	UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
				2069	UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
				2070	return true;
				2071	}
				2072
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2073	if (Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2074	Opc == AMDGPU::V_MAD_F16 \|\| Opc == AMDGPU::V_MAC_F16_e64) {
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2075	// Don't fold if we are using source or output modifiers. The new VOP2
				2076	// instructions don't have them.
				2077	if (hasAnyModifiersSet(UseMI))
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2078	return false;
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2079
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	2080	// If this is a free constant, there's no reason to do this.
				2081	// TODO: We could fold this here instead of letting SIFoldOperands do it
				2082	// later.
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2083	MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
				2084
				2085	// Any src operand can be used for the legality check.
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2086	if (isInlineConstant(UseMI, Src0, ImmOp))
Matt Arsenault	3d1c1de	2016-04-14 21:58:24 +0000	[diff] [blame]	2087	return false;
				2088
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2089	bool IsF32 = Opc == AMDGPU::V_MAD_F32 \|\| Opc == AMDGPU::V_MAC_F32_e64;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2090	MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
				2091	MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2092
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2093	// Multiplied part is the constant: Use v_madmk_{f16, f32}.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2094	// We should only expect these to be on src0 due to canonicalizations.
				2095	if (Src0->isReg() && Src0->getReg() == Reg) {
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	2096	if (!Src1->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2097	return false;
				2098
Matt Arsenault	a266bd8	2016-03-02 04:05:14 +0000	[diff] [blame]	2099	if (!Src2->isReg() \|\| RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2100	return false;
				2101
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	2102	// We need to swap operands 0 and 1 since madmk constant is at operand 1.
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2103
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2104	const int64_t Imm = ImmOp->getImm();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2105
				2106	// FIXME: This would be a lot easier if we could return a new instruction
				2107	// instead of having to modify in place.
				2108
				2109	// Remove these first since they are at the end.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2110	UseMI.RemoveOperand(
				2111	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
				2112	UseMI.RemoveOperand(
				2113	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2114
				2115	unsigned Src1Reg = Src1->getReg();
				2116	unsigned Src1SubReg = Src1->getSubReg();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2117	Src0->setReg(Src1Reg);
				2118	Src0->setSubReg(Src1SubReg);
Matt Arsenault	5e10016	2015-04-24 01:57:58 +0000	[diff] [blame]	2119	Src0->setIsKill(Src1->isKill());
				2120
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2121	if (Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2122	Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2123	UseMI.untieRegOperand(
				2124	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2125
Nikolay Haustov	6560781	2016-03-11 09:27:25 +0000	[diff] [blame]	2126	Src1->ChangeToImmediate(Imm);
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2127
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2128	removeModOperands(UseMI);
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2129	UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2130
				2131	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				2132	if (DeleteDef)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2133	DefMI.eraseFromParent();
Matt Arsenault	f078330	2015-02-21 21:29:10 +0000	[diff] [blame]	2134
				2135	return true;
				2136	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2137
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2138	// Added part is the constant: Use v_madak_{f16, f32}.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2139	if (Src2->isReg() && Src2->getReg() == Reg) {
				2140	// Not allowed to use constant bus for another operand.
				2141	// We can however allow an inline immediate as src0.
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2142	bool Src0Inlined = false;
				2143	if (Src0->isReg()) {
				2144	// Try to inline constant if possible.
				2145	// If the Def moves immediate and the use is single
				2146	// We are saving VGPR here.
				2147	MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
				2148	if (Def && Def->isMoveImmediate() &&
				2149	isInlineConstant(Def->getOperand(1)) &&
				2150	MRI->hasOneUse(Src0->getReg())) {
				2151	Src0->ChangeToImmediate(Def->getOperand(1).getImm());
				2152	Src0Inlined = true;
				2153	} else if ((RI.isPhysicalRegister(Src0->getReg()) &&
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	2154	(ST.getConstantBusLimit(Opc) <= 1 &&
				2155	RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) \|\|
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2156	(RI.isVirtualRegister(Src0->getReg()) &&
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	2157	(ST.getConstantBusLimit(Opc) <= 1 &&
				2158	RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2159	return false;
				2160	// VGPR is okay as Src0 - fallthrough
				2161	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2162
Alexander Timofeev	20cbe6f	2018-09-10 16:42:49 +0000	[diff] [blame]	2163	if (Src1->isReg() && !Src0Inlined ) {
				2164	// We have one slot for inlinable constant so far - try to fill it
				2165	MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
				2166	if (Def && Def->isMoveImmediate() &&
				2167	isInlineConstant(Def->getOperand(1)) &&
				2168	MRI->hasOneUse(Src1->getReg()) &&
				2169	commuteInstruction(UseMI)) {
				2170	Src0->ChangeToImmediate(Def->getOperand(1).getImm());
				2171	} else if ((RI.isPhysicalRegister(Src1->getReg()) &&
				2172	RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) \|\|
				2173	(RI.isVirtualRegister(Src1->getReg()) &&
				2174	RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
				2175	return false;
				2176	// VGPR is okay as Src1 - fallthrough
				2177	}
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2178
Nicolai Haehnle	39980da	2017-11-28 08:41:50 +0000	[diff] [blame]	2179	const int64_t Imm = ImmOp->getImm();
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2180
				2181	// FIXME: This would be a lot easier if we could return a new instruction
				2182	// instead of having to modify in place.
				2183
				2184	// Remove these first since they are at the end.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2185	UseMI.RemoveOperand(
				2186	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
				2187	UseMI.RemoveOperand(
				2188	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2189
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2190	if (Opc == AMDGPU::V_MAC_F32_e64 \|\|
				2191	Opc == AMDGPU::V_MAC_F16_e64)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2192	UseMI.untieRegOperand(
				2193	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2194
				2195	// ChangingToImmediate adds Src2 back to the instruction.
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2196	Src2->ChangeToImmediate(Imm);
				2197
				2198	// These come before src2.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2199	removeModOperands(UseMI);
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2200	UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2201
				2202	bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
				2203	if (DeleteDef)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2204	DefMI.eraseFromParent();
Matt Arsenault	0325d3d	2015-02-21 21:29:07 +0000	[diff] [blame]	2205
				2206	return true;
				2207	}
				2208	}
				2209
				2210	return false;
				2211	}
				2212
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2213	static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
				2214	int WidthB, int OffsetB) {
				2215	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
				2216	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
				2217	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
				2218	return LowOffset + LowWidth <= HighOffset;
				2219	}
				2220
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	2221	bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
				2222	const MachineInstr &MIb) const {
				2223	const MachineOperand BaseOp0, BaseOp1;
Chad Rosier	c27a18f	2016-03-09 16:00:35 +0000	[diff] [blame]	2224	int64_t Offset0, Offset1;
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2225
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	2226	if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
				2227	getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
				2228	if (!BaseOp0->isIdenticalTo(*BaseOp1))
				2229	return false;
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	2230
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2231	if (!MIa.hasOneMemOperand() \|\| !MIb.hasOneMemOperand()) {
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	2232	// FIXME: Handle ds_read2 / ds_write2.
				2233	return false;
				2234	}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2235	unsigned Width0 = (*MIa.memoperands_begin())->getSize();
				2236	unsigned Width1 = (*MIb.memoperands_begin())->getSize();
Francis Visoiu Mistrih	d7eebd6	2018-11-28 12:00:20 +0000	[diff] [blame]	2237	if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2238	return true;
				2239	}
				2240	}
				2241
				2242	return false;
				2243	}
				2244
Bjorn Pettersson	238c9d630	2019-04-19 09:08:38 +0000	[diff] [blame]	2245	bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
				2246	const MachineInstr &MIb,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2247	AliasAnalysis *AA) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2248	assert((MIa.mayLoad() \|\| MIa.mayStore()) &&
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2249	"MIa must load from or modify a memory location");
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2250	assert((MIb.mayLoad() \|\| MIb.mayStore()) &&
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2251	"MIb must load from or modify a memory location");
				2252
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2253	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects())
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2254	return false;
				2255
				2256	// XXX - Can we relax this between address spaces?
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2257	if (MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2258	return false;
				2259
				2260	// TODO: Should we check the address space from the MachineMemOperand? That
				2261	// would allow us to distinguish objects we know don't alias based on the
Benjamin Kramer	df005cb	2015-08-08 18:27:36 +0000	[diff] [blame]	2262	// underlying address space, even if it was lowered to a different one,
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2263	// e.g. private accesses lowered to use MUBUF instructions on a scratch
				2264	// buffer.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2265	if (isDS(MIa)) {
				2266	if (isDS(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2267	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2268
Matt Arsenault	9608a289	2017-07-29 01:26:21 +0000	[diff] [blame]	2269	return !isFLAT(MIb) \|\| isSegmentSpecificFLAT(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2270	}
				2271
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2272	if (isMUBUF(MIa) \|\| isMTBUF(MIa)) {
				2273	if (isMUBUF(MIb) \|\| isMTBUF(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2274	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2275
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2276	return !isFLAT(MIb) && !isSMRD(MIb);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2277	}
				2278
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2279	if (isSMRD(MIa)) {
				2280	if (isSMRD(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2281	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2282
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2283	return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2284	}
				2285
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2286	if (isFLAT(MIa)) {
				2287	if (isFLAT(MIb))
Matt Arsenault	c09cc3c	2014-11-19 00:01:31 +0000	[diff] [blame]	2288	return checkInstOffsetsDoNotOverlap(MIa, MIb);
				2289
				2290	return false;
				2291	}
				2292
				2293	return false;
				2294	}
				2295
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2296	static int64_t getFoldableImm(const MachineOperand* MO) {
				2297	if (!MO->isReg())
				2298	return false;
				2299	const MachineFunction *MF = MO->getParent()->getParent()->getParent();
				2300	const MachineRegisterInfo &MRI = MF->getRegInfo();
				2301	auto Def = MRI.getUniqueVRegDef(MO->getReg());
Matt Arsenault	c317287	2017-09-14 20:54:29 +0000	[diff] [blame]	2302	if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
				2303	Def->getOperand(1).isImm())
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2304	return Def->getOperand(1).getImm();
				2305	return AMDGPU::NoRegister;
				2306	}
				2307
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2308	MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2309	MachineInstr &MI,
				2310	LiveVariables *LV) const {
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2311	unsigned Opc = MI.getOpcode();
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2312	bool IsF16 = false;
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2313	bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 \|\| Opc == AMDGPU::V_FMAC_F32_e64;
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2314
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2315	switch (Opc) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2316	default:
				2317	return nullptr;
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2318	case AMDGPU::V_MAC_F16_e64:
				2319	IsF16 = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	2320	LLVM_FALLTHROUGH;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2321	case AMDGPU::V_MAC_F32_e64:
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2322	case AMDGPU::V_FMAC_F32_e64:
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2323	break;
Konstantin Zhuravlyov	f86e4b7	2016-11-13 07:01:11 +0000	[diff] [blame]	2324	case AMDGPU::V_MAC_F16_e32:
				2325	IsF16 = true;
Simon Pilgrim	0f5b350	2017-07-07 10:18:57 +0000	[diff] [blame]	2326	LLVM_FALLTHROUGH;
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2327	case AMDGPU::V_MAC_F32_e32:
				2328	case AMDGPU::V_FMAC_F32_e32: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2329	int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
				2330	AMDGPU::OpName::src0);
				2331	const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2332	if (!Src0->isReg() && !Src0->isImm())
				2333	return nullptr;
				2334
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2335	if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2336	return nullptr;
Matt Arsenault	fdcdd88	2017-09-21 00:45:59 +0000	[diff] [blame]	2337
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2338	break;
				2339	}
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2340	}
				2341
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2342	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
				2343	const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2344	const MachineOperand *Src0Mods =
				2345	getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2346	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2347	const MachineOperand *Src1Mods =
				2348	getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2349	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2350	const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
				2351	const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2352
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2353	if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
Matt Arsenault	c317287	2017-09-14 20:54:29 +0000	[diff] [blame]	2354	// If we have an SGPR input, we will violate the constant bus restriction.
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	2355	(ST.getConstantBusLimit(Opc) > 1 \|\|
				2356	!Src0->isReg() \|\|
				2357	!RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
Stanislav Mekhanoshin	710da42	2017-09-11 17:13:57 +0000	[diff] [blame]	2358	if (auto Imm = getFoldableImm(Src2)) {
				2359	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2360	get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
				2361	.add(*Dst)
				2362	.add(*Src0)
				2363	.add(*Src1)
				2364	.addImm(Imm);
				2365	}
				2366	if (auto Imm = getFoldableImm(Src1)) {
				2367	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2368	get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
				2369	.add(*Dst)
				2370	.add(*Src0)
				2371	.addImm(Imm)
				2372	.add(*Src2);
				2373	}
				2374	if (auto Imm = getFoldableImm(Src0)) {
				2375	if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
				2376	AMDGPU::OpName::src0), Src1))
				2377	return BuildMI(*MBB, MI, MI.getDebugLoc(),
				2378	get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
				2379	.add(*Dst)
				2380	.add(*Src1)
				2381	.addImm(Imm)
				2382	.add(*Src2);
				2383	}
				2384	}
				2385
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	2386	assert((!IsFMA \|\| !IsF16) && "fmac only expected with f32");
				2387	unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
				2388	(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
				2389	return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2390	.add(*Dst)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2391	.addImm(Src0Mods ? Src0Mods->getImm() : 0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2392	.add(*Src0)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2393	.addImm(Src1Mods ? Src1Mods->getImm() : 0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2394	.add(*Src1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2395	.addImm(0) // Src mods
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	2396	.add(*Src2)
Matt Arsenault	3cb9ff8	2017-03-11 05:40:40 +0000	[diff] [blame]	2397	.addImm(Clamp ? Clamp->getImm() : 0)
				2398	.addImm(Omod ? Omod->getImm() : 0);
Tom Stellard	db5a11f	2015-07-13 15:47:57 +0000	[diff] [blame]	2399	}
				2400
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2401	// It's not generally safe to move VALU instructions across these since it will
				2402	// start using the register as a base index rather than directly.
				2403	// XXX - Why isn't hasSideEffects sufficient for these?
				2404	static bool changesVGPRIndexingMode(const MachineInstr &MI) {
				2405	switch (MI.getOpcode()) {
				2406	case AMDGPU::S_SET_GPR_IDX_ON:
				2407	case AMDGPU::S_SET_GPR_IDX_MODE:
				2408	case AMDGPU::S_SET_GPR_IDX_OFF:
				2409	return true;
				2410	default:
				2411	return false;
				2412	}
				2413	}
				2414
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2415	bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2416	const MachineBasicBlock *MBB,
				2417	const MachineFunction &MF) const {
Matt Arsenault	95c7897	2016-07-09 01:13:51 +0000	[diff] [blame]	2418	// XXX - Do we want the SP check in the base implementation?
				2419
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2420	// Target-independent instructions do not have an implicit-use of EXEC, even
				2421	// when they operate on VGPRs. Treating EXEC modifications as scheduling
				2422	// boundaries prevents incorrect movements of such instructions.
Matt Arsenault	95c7897	2016-07-09 01:13:51 +0000	[diff] [blame]	2423	return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) \|\|
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2424	MI.modifiesRegister(AMDGPU::EXEC, &RI) \|\|
Tom Stellard	8485fa0	2016-12-07 02:42:15 +0000	[diff] [blame]	2425	MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 \|\|
				2426	MI.getOpcode() == AMDGPU::S_SETREG_B32 \|\|
Matt Arsenault	d486d3f	2016-10-12 18:49:05 +0000	[diff] [blame]	2427	changesVGPRIndexingMode(MI);
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	2428	}
				2429
Marek Olsak	c5cec5e	2019-01-16 15:43:53 +0000	[diff] [blame]	2430	bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
				2431	return Opcode == AMDGPU::DS_ORDERED_COUNT \|\|
				2432	Opcode == AMDGPU::DS_GWS_INIT \|\|
				2433	Opcode == AMDGPU::DS_GWS_SEMA_V \|\|
				2434	Opcode == AMDGPU::DS_GWS_SEMA_BR \|\|
				2435	Opcode == AMDGPU::DS_GWS_SEMA_P \|\|
				2436	Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL \|\|
				2437	Opcode == AMDGPU::DS_GWS_BARRIER;
				2438	}
				2439
Nicolai Haehnle	7f0d05d	2018-07-30 09:23:59 +0000	[diff] [blame]	2440	bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
				2441	unsigned Opcode = MI.getOpcode();
				2442
				2443	if (MI.mayStore() && isSMRD(MI))
				2444	return true; // scalar store or atomic
				2445
				2446	// These instructions cause shader I/O that may cause hardware lockups
				2447	// when executed with an empty EXEC mask.
				2448	//
				2449	// Note: exp with VM = DONE = 0 is automatically skipped by hardware when
				2450	// EXEC = 0, but checking for that case here seems not worth it
				2451	// given the typical code patterns.
				2452	if (Opcode == AMDGPU::S_SENDMSG \|\| Opcode == AMDGPU::S_SENDMSGHALT \|\|
Marek Olsak	c5cec5e	2019-01-16 15:43:53 +0000	[diff] [blame]	2453	Opcode == AMDGPU::EXP \|\| Opcode == AMDGPU::EXP_DONE \|\|
				2454	Opcode == AMDGPU::DS_ORDERED_COUNT)
Nicolai Haehnle	7f0d05d	2018-07-30 09:23:59 +0000	[diff] [blame]	2455	return true;
				2456
				2457	if (MI.isInlineAsm())
				2458	return true; // conservative assumption
				2459
				2460	// These are like SALU instructions in terms of effects, so it's questionable
				2461	// whether we should return true for those.
				2462	//
				2463	// However, executing them with EXEC = 0 causes them to operate on undefined
				2464	// data, which we avoid by returning true here.
				2465	if (Opcode == AMDGPU::V_READFIRSTLANE_B32 \|\| Opcode == AMDGPU::V_READLANE_B32)
				2466	return true;
				2467
				2468	return false;
				2469	}
				2470
Matt Arsenault	a353fd5	2019-03-28 14:01:39 +0000	[diff] [blame]	2471	bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI,
				2472	const MachineInstr &MI) const {
				2473	if (MI.isMetaInstruction())
				2474	return false;
				2475
				2476	// This won't read exec if this is an SGPR->SGPR copy.
				2477	if (MI.isCopyLike()) {
				2478	if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg()))
				2479	return true;
				2480
				2481	// Make sure this isn't copying exec as a normal operand
				2482	return MI.readsRegister(AMDGPU::EXEC, &RI);
				2483	}
				2484
				2485	// Be conservative with any unhandled generic opcodes.
				2486	if (!isTargetSpecificOpcode(MI.getOpcode()))
				2487	return true;
				2488
				2489	return !isSALU(MI) \|\| MI.readsRegister(AMDGPU::EXEC, &RI);
				2490	}
				2491
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2492	bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
Matt Arsenault	26faed3	2016-12-05 22:26:17 +0000	[diff] [blame]	2493	switch (Imm.getBitWidth()) {
				2494	case 32:
				2495	return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
				2496	ST.hasInv2PiInlineImm());
				2497	case 64:
				2498	return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
				2499	ST.hasInv2PiInlineImm());
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2500	case 16:
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2501	return ST.has16BitInsts() &&
				2502	AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2503	ST.hasInv2PiInlineImm());
Matt Arsenault	26faed3	2016-12-05 22:26:17 +0000	[diff] [blame]	2504	default:
				2505	llvm_unreachable("invalid bitwidth");
Matt Arsenault	303011a	2014-12-17 21:04:08 +0000	[diff] [blame]	2506	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2507	}
				2508
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2509	bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2510	uint8_t OperandType) const {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2511	if (!MO.isImm() \|\|
				2512	OperandType < AMDGPU::OPERAND_SRC_FIRST \|\|
				2513	OperandType > AMDGPU::OPERAND_SRC_LAST)
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2514	return false;
				2515
				2516	// MachineOperand provides no way to tell the true operand size, since it only
				2517	// records a 64-bit value. We need to know the size to determine if a 32-bit
				2518	// floating point immediate bit pattern is legal for an integer immediate. It
				2519	// would be for any 32-bit integer operand, but would not be for a 64-bit one.
				2520
				2521	int64_t Imm = MO.getImm();
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2522	switch (OperandType) {
				2523	case AMDGPU::OPERAND_REG_IMM_INT32:
				2524	case AMDGPU::OPERAND_REG_IMM_FP32:
				2525	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
				2526	case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2527	int32_t Trunc = static_cast<int32_t>(Imm);
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	2528	return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2529	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2530	case AMDGPU::OPERAND_REG_IMM_INT64:
				2531	case AMDGPU::OPERAND_REG_IMM_FP64:
				2532	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	2533	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2534	return AMDGPU::isInlinableLiteral64(MO.getImm(),
				2535	ST.hasInv2PiInlineImm());
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2536	case AMDGPU::OPERAND_REG_IMM_INT16:
				2537	case AMDGPU::OPERAND_REG_IMM_FP16:
				2538	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
				2539	case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2540	if (isInt<16>(Imm) \|\| isUInt<16>(Imm)) {
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2541	// A few special case instructions have 16-bit operands on subtargets
				2542	// where 16-bit instructions are not legal.
				2543	// TODO: Do the 32-bit immediates work? We shouldn't really need to handle
				2544	// constants in these cases
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2545	int16_t Trunc = static_cast<int16_t>(Imm);
Matt Arsenault	9dba9bd	2017-02-02 02:27:04 +0000	[diff] [blame]	2546	return ST.has16BitInsts() &&
				2547	AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2548	}
Matt Arsenault	d7bdcc4	2014-03-31 19:54:27 +0000	[diff] [blame]	2549
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2550	return false;
				2551	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2552	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
				2553	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
Stanislav Mekhanoshin	160f857	2018-04-19 21:16:50 +0000	[diff] [blame]	2554	if (isUInt<16>(Imm)) {
				2555	int16_t Trunc = static_cast<int16_t>(Imm);
				2556	return ST.has16BitInsts() &&
				2557	AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
				2558	}
				2559	if (!(Imm & 0xffff)) {
				2560	return ST.has16BitInsts() &&
				2561	AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm());
				2562	}
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	2563	uint32_t Trunc = static_cast<uint32_t>(Imm);
				2564	return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
				2565	}
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2566	default:
				2567	llvm_unreachable("invalid bitwidth");
				2568	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2569	}
				2570
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2571	bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2572	const MCOperandInfo &OpInfo) const {
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2573	switch (MO.getType()) {
				2574	case MachineOperand::MO_Register:
				2575	return false;
				2576	case MachineOperand::MO_Immediate:
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2577	return !isInlineConstant(MO, OpInfo);
Matt Arsenault	c1ebd82	2016-08-13 01:43:54 +0000	[diff] [blame]	2578	case MachineOperand::MO_FrameIndex:
				2579	case MachineOperand::MO_MachineBasicBlock:
				2580	case MachineOperand::MO_ExternalSymbol:
				2581	case MachineOperand::MO_GlobalAddress:
				2582	case MachineOperand::MO_MCSymbol:
				2583	return true;
				2584	default:
				2585	llvm_unreachable("unexpected operand type");
				2586	}
				2587	}
				2588
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	2589	static bool compareMachineOp(const MachineOperand &Op0,
				2590	const MachineOperand &Op1) {
				2591	if (Op0.getType() != Op1.getType())
				2592	return false;
				2593
				2594	switch (Op0.getType()) {
				2595	case MachineOperand::MO_Register:
				2596	return Op0.getReg() == Op1.getReg();
				2597	case MachineOperand::MO_Immediate:
				2598	return Op0.getImm() == Op1.getImm();
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	2599	default:
				2600	llvm_unreachable("Didn't expect to be comparing these operand types");
				2601	}
				2602	}
				2603
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2604	bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
				2605	const MachineOperand &MO) const {
				2606	const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2607
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	2608	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2609
				2610	if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
				2611	return true;
				2612
				2613	if (OpInfo.RegClass < 0)
				2614	return false;
				2615
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2616	if (MO.isImm() && isInlineConstant(MO, OpInfo))
				2617	return RI.opCanUseInlineConstant(OpInfo.OperandType);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2618
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2619	return RI.opCanUseLiteralConstant(OpInfo.OperandType);
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2620	}
				2621
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	2622	bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
Marek Olsak	a93603d	2015-01-15 18:42:51 +0000	[diff] [blame]	2623	int Op32 = AMDGPU::getVOPe32(Opcode);
				2624	if (Op32 == -1)
				2625	return false;
				2626
				2627	return pseudoToMCOpcode(Op32) != -1;
Tom Stellard	86d12eb	2014-08-01 00:32:28 +0000	[diff] [blame]	2628	}
				2629
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	2630	bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
				2631	// The src0_modifier operand is present on all instructions
				2632	// that have modifiers.
				2633
				2634	return AMDGPU::getNamedOperandIdx(Opcode,
				2635	AMDGPU::OpName::src0_modifiers) != -1;
				2636	}
				2637
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	2638	bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
				2639	unsigned OpName) const {
				2640	const MachineOperand *Mods = getNamedOperand(MI, OpName);
				2641	return Mods && Mods->getImm();
				2642	}
				2643
Matt Arsenault	2ed2193	2017-02-27 20:21:31 +0000	[diff] [blame]	2644	bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
				2645	return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) \|\|
				2646	hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) \|\|
				2647	hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) \|\|
				2648	hasModifiersSet(MI, AMDGPU::OpName::clamp) \|\|
				2649	hasModifiersSet(MI, AMDGPU::OpName::omod);
				2650	}
				2651
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2652	bool SIInstrInfo::canShrink(const MachineInstr &MI,
				2653	const MachineRegisterInfo &MRI) const {
				2654	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				2655	// Can't shrink instruction with three operands.
				2656	// FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
				2657	// a special case for it. It can only be shrunk if the third operand
Tim Renouf	2e94f6e	2019-03-18 19:25:39 +0000	[diff] [blame]	2658	// is vcc, and src0_modifiers and src1_modifiers are not set.
				2659	// We should handle this the same way we handle vopc, by addding
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2660	// a register allocation hint pre-regalloc and then do the shrinking
				2661	// post-regalloc.
				2662	if (Src2) {
				2663	switch (MI.getOpcode()) {
				2664	default: return false;
				2665
				2666	case AMDGPU::V_ADDC_U32_e64:
				2667	case AMDGPU::V_SUBB_U32_e64:
				2668	case AMDGPU::V_SUBBREV_U32_e64: {
				2669	const MachineOperand *Src1
				2670	= getNamedOperand(MI, AMDGPU::OpName::src1);
				2671	if (!Src1->isReg() \|\| !RI.isVGPR(MRI, Src1->getReg()))
				2672	return false;
				2673	// Additional verification is needed for sdst/src2.
				2674	return true;
				2675	}
				2676	case AMDGPU::V_MAC_F32_e64:
				2677	case AMDGPU::V_MAC_F16_e64:
				2678	case AMDGPU::V_FMAC_F32_e64:
				2679	if (!Src2->isReg() \|\| !RI.isVGPR(MRI, Src2->getReg()) \|\|
				2680	hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
				2681	return false;
				2682	break;
				2683
				2684	case AMDGPU::V_CNDMASK_B32_e64:
				2685	break;
				2686	}
				2687	}
				2688
				2689	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				2690	if (Src1 && (!Src1->isReg() \|\| !RI.isVGPR(MRI, Src1->getReg()) \|\|
				2691	hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
				2692	return false;
				2693
				2694	// We don't need to check src0, all input types are legal, so just make sure
				2695	// src0 isn't using any modifiers.
				2696	if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
				2697	return false;
				2698
Ron Lieberman	16de4fd	2018-12-03 13:04:54 +0000	[diff] [blame]	2699	// Can it be shrunk to a valid 32 bit opcode?
				2700	if (!hasVALU32BitEncoding(MI.getOpcode()))
				2701	return false;
				2702
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2703	// Check output modifiers
				2704	return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
				2705	!hasModifiersSet(MI, AMDGPU::OpName::clamp);
Matt Arsenault	de6c421	2018-08-28 18:34:24 +0000	[diff] [blame]	2706	}
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2707
Matt Arsenault	de6c421	2018-08-28 18:34:24 +0000	[diff] [blame]	2708	// Set VCC operand with all flags from \p Orig, except for setting it as
				2709	// implicit.
				2710	static void copyFlagsToImplicitVCC(MachineInstr &MI,
				2711	const MachineOperand &Orig) {
				2712
				2713	for (MachineOperand &Use : MI.implicit_operands()) {
				2714	if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
				2715	Use.setIsUndef(Orig.isUndef());
				2716	Use.setIsKill(Orig.isKill());
				2717	return;
				2718	}
				2719	}
				2720	}
				2721
				2722	MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
				2723	unsigned Op32) const {
				2724	MachineBasicBlock *MBB = MI.getParent();;
				2725	MachineInstrBuilder Inst32 =
				2726	BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
				2727
				2728	// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
				2729	// For VOPC instructions, this is replaced by an implicit def of vcc.
				2730	int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
				2731	if (Op32DstIdx != -1) {
				2732	// dst
				2733	Inst32.add(MI.getOperand(0));
				2734	} else {
				2735	assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
				2736	"Unexpected case");
				2737	}
				2738
				2739	Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
				2740
				2741	const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
				2742	if (Src1)
				2743	Inst32.add(*Src1);
				2744
				2745	const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
				2746
				2747	if (Src2) {
				2748	int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
				2749	if (Op32Src2Idx != -1) {
				2750	Inst32.add(*Src2);
				2751	} else {
				2752	// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
				2753	// replaced with an implicit read of vcc. This was already added
				2754	// during the initial BuildMI, so find it to preserve the flags.
				2755	copyFlagsToImplicitVCC(Inst32, Src2);
				2756	}
				2757	}
				2758
				2759	return Inst32;
Matt Arsenault	35b1902	2018-08-28 18:22:34 +0000	[diff] [blame]	2760	}
				2761
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2762	bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	2763	const MachineOperand &MO,
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2764	const MCOperandInfo &OpInfo) const {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2765	// Literal constants use the constant bus.
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2766	//if (isLiteralConstantLike(MO, OpInfo))
				2767	// return true;
				2768	if (MO.isImm())
				2769	return !isInlineConstant(MO, OpInfo);
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2770
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2771	if (!MO.isReg())
				2772	return true; // Misc other operands like FrameIndex
				2773
				2774	if (!MO.isUse())
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2775	return false;
				2776
				2777	if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
				2778	return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
				2779
				2780	// FLAT_SCR is just an SGPR pair.
				2781	if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
				2782	return true;
				2783
				2784	// EXEC register uses the constant bus.
				2785	if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
				2786	return true;
				2787
				2788	// SGPRs use the constant bus
Matt Arsenault	8226fc4	2016-03-02 23:00:21 +0000	[diff] [blame]	2789	return (MO.getReg() == AMDGPU::VCC \|\| MO.getReg() == AMDGPU::M0 \|\|
				2790	(!MO.isImplicit() &&
				2791	(AMDGPU::SGPR_32RegClass.contains(MO.getReg()) \|\|
				2792	AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	2793	}
				2794
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	2795	static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
				2796	for (const MachineOperand &MO : MI.implicit_operands()) {
				2797	// We only care about reads.
				2798	if (MO.isDef())
				2799	continue;
				2800
				2801	switch (MO.getReg()) {
				2802	case AMDGPU::VCC:
				2803	case AMDGPU::M0:
				2804	case AMDGPU::FLAT_SCR:
				2805	return MO.getReg();
				2806
				2807	default:
				2808	break;
				2809	}
				2810	}
				2811
				2812	return AMDGPU::NoRegister;
				2813	}
				2814
Matt Arsenault	529cf25	2016-06-23 01:26:16 +0000	[diff] [blame]	2815	static bool shouldReadExec(const MachineInstr &MI) {
				2816	if (SIInstrInfo::isVALU(MI)) {
				2817	switch (MI.getOpcode()) {
				2818	case AMDGPU::V_READLANE_B32:
Stanislav Mekhanoshin	8f3da70	2019-04-26 16:37:51 +0000	[diff] [blame]	2819	case AMDGPU::V_READLANE_B32_gfx6_gfx7:
Stanislav Mekhanoshin	61beff0	2019-04-26 17:56:03 +0000	[diff] [blame]	2820	case AMDGPU::V_READLANE_B32_gfx10:
Matt Arsenault	529cf25	2016-06-23 01:26:16 +0000	[diff] [blame]	2821	case AMDGPU::V_READLANE_B32_vi:
				2822	case AMDGPU::V_WRITELANE_B32:
Stanislav Mekhanoshin	8f3da70	2019-04-26 16:37:51 +0000	[diff] [blame]	2823	case AMDGPU::V_WRITELANE_B32_gfx6_gfx7:
Stanislav Mekhanoshin	61beff0	2019-04-26 17:56:03 +0000	[diff] [blame]	2824	case AMDGPU::V_WRITELANE_B32_gfx10:
Matt Arsenault	529cf25	2016-06-23 01:26:16 +0000	[diff] [blame]	2825	case AMDGPU::V_WRITELANE_B32_vi:
				2826	return false;
				2827	}
				2828
				2829	return true;
				2830	}
				2831
				2832	if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) \|\|
				2833	SIInstrInfo::isSALU(MI) \|\|
				2834	SIInstrInfo::isSMRD(MI))
				2835	return false;
				2836
				2837	return true;
				2838	}
				2839
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	2840	static bool isSubRegOf(const SIRegisterInfo &TRI,
				2841	const MachineOperand &SuperVec,
				2842	const MachineOperand &SubReg) {
				2843	if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
				2844	return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
				2845
				2846	return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
				2847	SubReg.getReg() == SuperVec.getReg();
				2848	}
				2849
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2850	bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2851	StringRef &ErrInfo) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2852	uint16_t Opcode = MI.getOpcode();
Tom Stellard	dde28a8	2017-05-26 16:40:03 +0000	[diff] [blame]	2853	if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
				2854	return true;
				2855
Matt Arsenault	89ad17c	2017-06-12 16:37:55 +0000	[diff] [blame]	2856	const MachineFunction *MF = MI.getParent()->getParent();
				2857	const MachineRegisterInfo &MRI = MF->getRegInfo();
				2858
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	2859	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
				2860	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
				2861	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
				2862
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2863	// Make sure the number of operands is correct.
				2864	const MCInstrDesc &Desc = get(Opcode);
				2865	if (!Desc.isVariadic() &&
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2866	Desc.getNumOperands() != MI.getNumExplicitOperands()) {
				2867	ErrInfo = "Instruction has wrong number of operands.";
				2868	return false;
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2869	}
				2870
Matt Arsenault	3d46319	2016-11-01 22:55:07 +0000	[diff] [blame]	2871	if (MI.isInlineAsm()) {
				2872	// Verify register classes for inlineasm constraints.
				2873	for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
				2874	I != E; ++I) {
				2875	const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
				2876	if (!RC)
				2877	continue;
				2878
				2879	const MachineOperand &Op = MI.getOperand(I);
				2880	if (!Op.isReg())
				2881	continue;
				2882
				2883	unsigned Reg = Op.getReg();
				2884	if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
				2885	ErrInfo = "inlineasm operand has incorrect register class.";
				2886	return false;
				2887	}
				2888	}
				2889
				2890	return true;
				2891	}
				2892
Changpeng Fang	c996393	2015-12-18 20:04:28 +0000	[diff] [blame]	2893	// Make sure the register classes are correct.
Tom Stellard	b4a313a	2014-08-01 00:32:39 +0000	[diff] [blame]	2894	for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2895	if (MI.getOperand(i).isFPImm()) {
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	2896	ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
				2897	"all fp values to integers.";
				2898	return false;
				2899	}
				2900
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	2901	int RegClass = Desc.OpInfo[i].RegClass;
				2902
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2903	switch (Desc.OpInfo[i].OperandType) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2904	case MCOI::OPERAND_REGISTER:
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2905	if (MI.getOperand(i).isImm()) {
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2906	ErrInfo = "Illegal immediate value for operand.";
				2907	return false;
				2908	}
				2909	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2910	case AMDGPU::OPERAND_REG_IMM_INT32:
				2911	case AMDGPU::OPERAND_REG_IMM_FP32:
Tom Stellard	1106b1c	2015-01-20 17:49:41 +0000	[diff] [blame]	2912	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2913	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
				2914	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
				2915	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
				2916	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
				2917	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
				2918	case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
				2919	const MachineOperand &MO = MI.getOperand(i);
				2920	if (!MO.isReg() && (!MO.isImm() \|\| !isInlineConstant(MI, i))) {
Marek Olsak	8eeebcc	2015-02-18 22:12:41 +0000	[diff] [blame]	2921	ErrInfo = "Illegal immediate value for operand.";
				2922	return false;
Tom Stellard	a305f93	2014-07-02 20:53:44 +0000	[diff] [blame]	2923	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2924	break;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	2925	}
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2926	case MCOI::OPERAND_IMMEDIATE:
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	2927	case AMDGPU::OPERAND_KIMM32:
Tom Stellard	b02094e	2014-07-21 15:45:01 +0000	[diff] [blame]	2928	// Check if this operand is an immediate.
				2929	// FrameIndex operands will be replaced by immediates, so they are
				2930	// allowed.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2931	if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) {
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2932	ErrInfo = "Expected immediate, but got non-immediate";
				2933	return false;
				2934	}
Justin Bogner	b03fd12	2016-08-17 05:10:15 +0000	[diff] [blame]	2935	LLVM_FALLTHROUGH;
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2936	default:
				2937	continue;
				2938	}
				2939
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2940	if (!MI.getOperand(i).isReg())
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2941	continue;
				2942
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2943	if (RegClass != -1) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	2944	unsigned Reg = MI.getOperand(i).getReg();
Matt Arsenault	1322b6f	2016-07-09 01:13:56 +0000	[diff] [blame]	2945	if (Reg == AMDGPU::NoRegister \|\|
				2946	TargetRegisterInfo::isVirtualRegister(Reg))
Tom Stellard	ca700e4	2014-03-17 17:03:49 +0000	[diff] [blame]	2947	continue;
				2948
				2949	const TargetRegisterClass *RC = RI.getRegClass(RegClass);
				2950	if (!RC->contains(Reg)) {
				2951	ErrInfo = "Operand has incorrect register class.";
				2952	return false;
				2953	}
				2954	}
				2955	}
				2956
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2957	// Verify SDWA
				2958	if (isSDWA(MI)) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2959	if (!ST.hasSDWA()) {
				2960	ErrInfo = "SDWA is not supported on this target";
				2961	return false;
				2962	}
				2963
				2964	int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2965
				2966	const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
				2967
				2968	for (int OpIdx: OpIndicies) {
				2969	if (OpIdx == -1)
				2970	continue;
				2971	const MachineOperand &MO = MI.getOperand(OpIdx);
				2972
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	2973	if (!ST.hasSDWAScalar()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2974	// Only VGPRS on VI
				2975	if (!MO.isReg() \|\| !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
				2976	ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
				2977	return false;
				2978	}
				2979	} else {
				2980	// No immediates on GFX9
				2981	if (!MO.isReg()) {
				2982	ErrInfo = "Only reg allowed as operands in SDWA instructions on GFX9";
				2983	return false;
				2984	}
				2985	}
				2986	}
				2987
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	2988	if (!ST.hasSDWAOmod()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	2989	// No omod allowed on VI
				2990	const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
				2991	if (OMod != nullptr &&
				2992	(!OMod->isImm() \|\| OMod->getImm() != 0)) {
				2993	ErrInfo = "OMod not allowed in SDWA instructions on VI";
				2994	return false;
				2995	}
				2996	}
				2997
				2998	uint16_t BasicOpcode = AMDGPU::getBasicFromSDWAOp(Opcode);
				2999	if (isVOPC(BasicOpcode)) {
Sam Kolton	3c4933f	2017-06-22 06:26:41 +0000	[diff] [blame]	3000	if (!ST.hasSDWASdst() && DstIdx != -1) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3001	// Only vcc allowed as dst on VI for VOPC
				3002	const MachineOperand &Dst = MI.getOperand(DstIdx);
				3003	if (!Dst.isReg() \|\| Dst.getReg() != AMDGPU::VCC) {
				3004	ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
				3005	return false;
				3006	}
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	3007	} else if (!ST.hasSDWAOutModsVOPC()) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3008	// No clamp allowed on GFX9 for VOPC
				3009	const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	3010	if (Clamp && (!Clamp->isImm() \|\| Clamp->getImm() != 0)) {
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3011	ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
				3012	return false;
				3013	}
Sam Kolton	a179d25	2017-06-27 15:02:23 +0000	[diff] [blame]	3014
				3015	// No omod allowed on GFX9 for VOPC
				3016	const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
				3017	if (OMod && (!OMod->isImm() \|\| OMod->getImm() != 0)) {
				3018	ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
				3019	return false;
				3020	}
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3021	}
				3022	}
Sam Kolton	5f7f32c	2017-12-04 16:22:32 +0000	[diff] [blame]	3023
				3024	const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused);
				3025	if (DstUnused && DstUnused->isImm() &&
				3026	DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) {
				3027	const MachineOperand &Dst = MI.getOperand(DstIdx);
				3028	if (!Dst.isReg() \|\| !Dst.isTied()) {
				3029	ErrInfo = "Dst register should have tied register";
				3030	return false;
				3031	}
				3032
				3033	const MachineOperand &TiedMO =
				3034	MI.getOperand(MI.findTiedOperandIdx(DstIdx));
				3035	if (!TiedMO.isReg() \|\| !TiedMO.isImplicit() \|\| !TiedMO.isUse()) {
				3036	ErrInfo =
				3037	"Dst register should be tied to implicit use of preserved register";
				3038	return false;
				3039	} else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) &&
				3040	Dst.getReg() != TiedMO.getReg()) {
				3041	ErrInfo = "Dst register should use same physical register as preserved";
				3042	return false;
				3043	}
				3044	}
Sam Kolton	549c89d	2017-06-21 08:53:38 +0000	[diff] [blame]	3045	}
				3046
David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	3047	// Verify MIMG
				3048	if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
				3049	// Ensure that the return type used is large enough for all the options
				3050	// being used TFE/LWE require an extra result register.
				3051	const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
				3052	if (DMask) {
				3053	uint64_t DMaskImm = DMask->getImm();
				3054	uint32_t RegCount =
				3055	isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm);
				3056	const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe);
				3057	const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe);
				3058	const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16);
				3059
				3060	// Adjust for packed 16 bit values
				3061	if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
				3062	RegCount >>= 1;
				3063
				3064	// Adjust if using LWE or TFE
				3065	if ((LWE && LWE->getImm()) \|\| (TFE && TFE->getImm()))
				3066	RegCount += 1;
				3067
				3068	const uint32_t DstIdx =
				3069	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
				3070	const MachineOperand &Dst = MI.getOperand(DstIdx);
				3071	if (Dst.isReg()) {
				3072	const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
				3073	uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
				3074	if (RegCount > DstSize) {
				3075	ErrInfo = "MIMG instruction returns too many registers for dst "
				3076	"register class";
				3077	return false;
				3078	}
				3079	}
				3080	}
				3081	}
				3082
Tim Renouf	2a99fa2	2018-02-28 19:10:32 +0000	[diff] [blame]	3083	// Verify VOP*. Ignore multiple sgpr operands on writelane.
				3084	if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
				3085	&& (isVOP1(MI) \|\| isVOP2(MI) \|\| isVOP3(MI) \|\| isVOPC(MI) \|\| isSDWA(MI))) {
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	3086	// Only look at the true operands. Only a real operand can use the constant
				3087	// bus, and we don't want to check pseudo-operands like the source modifier
				3088	// flags.
				3089	const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
				3090
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3091	unsigned ConstantBusCount = 0;
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3092	unsigned LiteralCount = 0;
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3093
				3094	if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
				3095	++ConstantBusCount;
				3096
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3097	SmallVector<unsigned, 2> SGPRsUsed;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3098	unsigned SGPRUsed = findImplicitSGPRRead(MI);
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3099	if (SGPRUsed != AMDGPU::NoRegister) {
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	3100	++ConstantBusCount;
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3101	SGPRsUsed.push_back(SGPRUsed);
				3102	}
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	3103
Matt Arsenault	e368cb3	2014-12-11 23:37:32 +0000	[diff] [blame]	3104	for (int OpIdx : OpIndices) {
				3105	if (OpIdx == -1)
				3106	break;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3107	const MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3108	if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3109	if (MO.isReg()) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3110	SGPRUsed = MO.getReg();
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3111	if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
				3112	return !RI.regsOverlap(SGPRUsed, SGPR);
				3113	})) {
				3114	++ConstantBusCount;
				3115	SGPRsUsed.push_back(SGPRUsed);
				3116	}
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3117	} else {
				3118	++ConstantBusCount;
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3119	++LiteralCount;
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3120	}
				3121	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3122	}
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3123	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
				3124	// v_writelane_b32 is an exception from constant bus restriction:
				3125	// vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const
				3126	if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
				3127	Opcode != AMDGPU::V_WRITELANE_B32) {
				3128	ErrInfo = "VOP* instruction violates constant bus restriction";
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3129	return false;
				3130	}
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3131
				3132	if (isVOP3(MI) && LiteralCount) {
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3133	if (LiteralCount && !ST.hasVOP3Literal()) {
				3134	ErrInfo = "VOP3 instruction uses literal";
				3135	return false;
				3136	}
				3137	if (LiteralCount > 1) {
				3138	ErrInfo = "VOP3 instruction uses more than one literal";
				3139	return false;
				3140	}
Stanislav Mekhanoshin	a4bfb3c	2018-04-24 18:17:55 +0000	[diff] [blame]	3141	}
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3142	}
				3143
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	3144	// Verify misc. restrictions on specific instructions.
				3145	if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 \|\|
				3146	Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3147	const MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3148	const MachineOperand &Src1 = MI.getOperand(Src1Idx);
				3149	const MachineOperand &Src2 = MI.getOperand(Src2Idx);
Matt Arsenault	becb140	2014-06-23 18:28:31 +0000	[diff] [blame]	3150	if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
				3151	if (!compareMachineOp(Src0, Src1) &&
				3152	!compareMachineOp(Src0, Src2)) {
				3153	ErrInfo = "v_div_scale_{f32\|f64} require src0 = src1 or src2";
				3154	return false;
				3155	}
				3156	}
				3157	}
				3158
Matt Arsenault	7ccf6cd	2016-09-16 21:41:16 +0000	[diff] [blame]	3159	if (isSOPK(MI)) {
				3160	int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
				3161	if (sopkIsZext(MI)) {
				3162	if (!isUInt<16>(Imm)) {
				3163	ErrInfo = "invalid immediate for SOPK instruction";
				3164	return false;
				3165	}
				3166	} else {
				3167	if (!isInt<16>(Imm)) {
				3168	ErrInfo = "invalid immediate for SOPK instruction";
				3169	return false;
				3170	}
				3171	}
				3172	}
				3173
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	3174	if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 \|\|
				3175	Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 \|\|
				3176	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 \|\|
				3177	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
				3178	const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 \|\|
				3179	Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
				3180
				3181	const unsigned StaticNumOps = Desc.getNumOperands() +
				3182	Desc.getNumImplicitUses();
				3183	const unsigned NumImplicitOps = IsDst ? 2 : 1;
				3184
Nicolai Haehnle	368972c	2016-11-02 17:03:11 +0000	[diff] [blame]	3185	// Allow additional implicit operands. This allows a fixup done by the post
				3186	// RA scheduler where the main implicit operand is killed and implicit-defs
				3187	// are added for sub-registers that remain live after this instruction.
				3188	if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
Matt Arsenault	cb540bc	2016-07-19 00:35:03 +0000	[diff] [blame]	3189	ErrInfo = "missing implicit register operands";
				3190	return false;
				3191	}
				3192
				3193	const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
				3194	if (IsDst) {
				3195	if (!Dst->isUse()) {
				3196	ErrInfo = "v_movreld_b32 vdst should be a use operand";
				3197	return false;
				3198	}
				3199
				3200	unsigned UseOpIdx;
				3201	if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) \|\|
				3202	UseOpIdx != StaticNumOps + 1) {
				3203	ErrInfo = "movrel implicit operands should be tied";
				3204	return false;
				3205	}
				3206	}
				3207
				3208	const MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3209	const MachineOperand &ImpUse
				3210	= MI.getOperand(StaticNumOps + NumImplicitOps - 1);
				3211	if (!ImpUse.isReg() \|\| !ImpUse.isUse() \|\|
				3212	!isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
				3213	ErrInfo = "src0 should be subreg of implicit vector use";
				3214	return false;
				3215	}
				3216	}
				3217
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	3218	// Make sure we aren't losing exec uses in the td files. This mostly requires
				3219	// being careful when using let Uses to try to add other use registers.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3220	if (shouldReadExec(MI)) {
				3221	if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
Matt Arsenault	d092a06	2015-10-02 18:58:37 +0000	[diff] [blame]	3222	ErrInfo = "VALU instruction does not implicitly read exec mask";
				3223	return false;
				3224	}
				3225	}
				3226
Matt Arsenault	7b64755	2016-10-28 21:55:15 +0000	[diff] [blame]	3227	if (isSMRD(MI)) {
				3228	if (MI.mayStore()) {
				3229	// The register offset form of scalar stores may only use m0 as the
				3230	// soffset register.
				3231	const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
				3232	if (Soff && Soff->getReg() != AMDGPU::M0) {
				3233	ErrInfo = "scalar stores must use m0 as offset register";
				3234	return false;
				3235	}
				3236	}
				3237	}
				3238
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	3239	if (isFLAT(MI) && !MF->getSubtarget<GCNSubtarget>().hasFlatInstOffsets()) {
Matt Arsenault	89ad17c	2017-06-12 16:37:55 +0000	[diff] [blame]	3240	const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				3241	if (Offset->getImm() != 0) {
				3242	ErrInfo = "subtarget does not support offsets in flat instructions";
				3243	return false;
				3244	}
				3245	}
				3246
Stanislav Mekhanoshin	692560d	2019-05-01 16:32:58 +0000	[diff] [blame]	3247	if (isMIMG(MI)) {
				3248	const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
				3249	if (DimOp) {
				3250	int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
				3251	AMDGPU::OpName::vaddr0);
				3252	int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
				3253	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode);
				3254	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
				3255	AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
				3256	const AMDGPU::MIMGDimInfo *Dim =
				3257	AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm());
				3258
				3259	if (!Dim) {
				3260	ErrInfo = "dim is out of range";
				3261	return false;
				3262	}
				3263
				3264	bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
				3265	unsigned AddrWords = BaseOpcode->NumExtraArgs +
				3266	(BaseOpcode->Gradients ? Dim->NumGradients : 0) +
				3267	(BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
				3268	(BaseOpcode->LodOrClampOrMip ? 1 : 0);
				3269
				3270	unsigned VAddrWords;
				3271	if (IsNSA) {
				3272	VAddrWords = SRsrcIdx - VAddr0Idx;
				3273	} else {
				3274	const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);
				3275	VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
				3276	if (AddrWords > 8)
				3277	AddrWords = 16;
				3278	else if (AddrWords > 4)
				3279	AddrWords = 8;
				3280	else if (AddrWords == 3 && VAddrWords == 4) {
				3281	// CodeGen uses the V4 variant of instructions for three addresses,
				3282	// because the selection DAG does not support non-power-of-two types.
				3283	AddrWords = 4;
				3284	}
				3285	}
				3286
				3287	if (VAddrWords != AddrWords) {
				3288	ErrInfo = "bad vaddr size";
				3289	return false;
				3290	}
				3291	}
				3292	}
				3293
Stanislav Mekhanoshin	4329361	2018-05-08 16:53:02 +0000	[diff] [blame]	3294	const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl);
				3295	if (DppCt) {
				3296	using namespace AMDGPU::DPP;
				3297
				3298	unsigned DC = DppCt->getImm();
				3299	if (DC == DppCtrl::DPP_UNUSED1 \|\| DC == DppCtrl::DPP_UNUSED2 \|\|
				3300	DC == DppCtrl::DPP_UNUSED3 \|\| DC > DppCtrl::DPP_LAST \|\|
				3301	(DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) \|\|
				3302	(DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) \|\|
				3303	(DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) \|\|
				3304	(DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST)) {
				3305	ErrInfo = "Invalid dpp_ctrl value";
				3306	return false;
				3307	}
				3308	}
				3309
Tom Stellard	93fabce	2013-10-10 17:11:55 +0000	[diff] [blame]	3310	return true;
				3311	}
				3312
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3313	unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3314	switch (MI.getOpcode()) {
				3315	default: return AMDGPU::INSTRUCTION_LIST_END;
				3316	case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
				3317	case AMDGPU::COPY: return AMDGPU::COPY;
				3318	case AMDGPU::PHI: return AMDGPU::PHI;
Tom Stellard	204e61b	2014-04-07 19:45:45 +0000	[diff] [blame]	3319	case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
Connor Abbott	8c217d0	2017-08-04 18:36:49 +0000	[diff] [blame]	3320	case AMDGPU::WQM: return AMDGPU::WQM;
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	3321	case AMDGPU::WWM: return AMDGPU::WWM;
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	3322	case AMDGPU::S_MOV_B32:
				3323	return MI.getOperand(1).isReg() ?
Tom Stellard	8c12fd9	2014-03-24 16:12:34 +0000	[diff] [blame]	3324	AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	3325	case AMDGPU::S_ADD_I32:
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3326	return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
				3327	case AMDGPU::S_ADDC_U32:
				3328	return AMDGPU::V_ADDC_U32_e32;
Tom Stellard	80942a1	2014-09-05 14:07:59 +0000	[diff] [blame]	3329	case AMDGPU::S_SUB_I32:
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	3330	return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
				3331	// FIXME: These are not consistently handled, and selected when the carry is
				3332	// used.
				3333	case AMDGPU::S_ADD_U32:
				3334	return AMDGPU::V_ADD_I32_e32;
				3335	case AMDGPU::S_SUB_U32:
				3336	return AMDGPU::V_SUB_I32_e32;
Matt Arsenault	43b8e4e	2013-11-18 20:09:29 +0000	[diff] [blame]	3337	case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
Matt Arsenault	869cd07	2014-09-03 23:24:35 +0000	[diff] [blame]	3338	case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
Michael Liao	efb4f9e	2019-03-18 20:40:09 +0000	[diff] [blame]	3339	case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32;
				3340	case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32;
Matt Arsenault	124384f	2016-09-09 23:32:53 +0000	[diff] [blame]	3341	case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
				3342	case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
				3343	case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	3344	case AMDGPU::S_XNOR_B32:
				3345	return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
Matt Arsenault	124384f	2016-09-09 23:32:53 +0000	[diff] [blame]	3346	case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
				3347	case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
				3348	case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
				3349	case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3350	case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
				3351	case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
				3352	case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
				3353	case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
				3354	case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
				3355	case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	3356	case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
				3357	case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	3358	case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
				3359	case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
Marek Olsak	63a7b08	2015-03-24 13:40:21 +0000	[diff] [blame]	3360	case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
Matt Arsenault	43160e7	2014-06-18 17:13:57 +0000	[diff] [blame]	3361	case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
Matt Arsenault	2c33562	2014-04-09 07:16:16 +0000	[diff] [blame]	3362	case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	3363	case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
Matt Arsenault	0cb92e1	2014-04-11 19:25:18 +0000	[diff] [blame]	3364	case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
				3365	case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
				3366	case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
				3367	case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
				3368	case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
				3369	case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	3370	case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
				3371	case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
				3372	case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
				3373	case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
				3374	case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
				3375	case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
Matt Arsenault	7b1dc2c	2016-09-17 02:02:19 +0000	[diff] [blame]	3376	case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
				3377	case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	3378	case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
Matt Arsenault	295b86e	2014-06-17 17:36:27 +0000	[diff] [blame]	3379	case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
Matt Arsenault	8579601	2014-06-17 17:36:24 +0000	[diff] [blame]	3380	case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
Marek Olsak	d2af89d	2015-03-04 17:33:45 +0000	[diff] [blame]	3381	case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	3382	case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
				3383	case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3384	}
Michael Liao	efb4f9e	2019-03-18 20:40:09 +0000	[diff] [blame]	3385	llvm_unreachable(
				3386	"Unexpected scalar opcode without corresponding vector one!");
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3387	}
				3388
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3389	const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
				3390	unsigned OpNo) const {
				3391	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
				3392	const MCInstrDesc &Desc = get(MI.getOpcode());
				3393	if (MI.isVariadic() \|\| OpNo >= Desc.getNumOperands() \|\|
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	3394	Desc.OpInfo[OpNo].RegClass == -1) {
				3395	unsigned Reg = MI.getOperand(OpNo).getReg();
				3396
				3397	if (TargetRegisterInfo::isVirtualRegister(Reg))
				3398	return MRI.getRegClass(Reg);
Matt Arsenault	11a4d67	2015-02-13 19:05:03 +0000	[diff] [blame]	3399	return RI.getPhysRegClass(Reg);
Matt Arsenault	102a704	2014-12-11 23:37:34 +0000	[diff] [blame]	3400	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3401
				3402	unsigned RCID = Desc.OpInfo[OpNo].RegClass;
				3403	return RI.getRegClass(RCID);
				3404	}
				3405
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3406	void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3407	MachineBasicBlock::iterator I = MI;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3408	MachineBasicBlock *MBB = MI.getParent();
				3409	MachineOperand &MO = MI.getOperand(OpIdx);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3410	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3411	unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3412	const TargetRegisterClass *RC = RI.getRegClass(RCID);
				3413	unsigned Opcode = AMDGPU::V_MOV_B32_e32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3414	if (MO.isReg())
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3415	Opcode = AMDGPU::COPY;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3416	else if (RI.isSGPRClass(RC))
Matt Arsenault	671a005	2013-11-14 10:08:50 +0000	[diff] [blame]	3417	Opcode = AMDGPU::S_MOV_B32;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3418
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	3419	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3420	if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
Tom Stellard	0c93c9e	2014-09-05 14:08:01 +0000	[diff] [blame]	3421	VRC = &AMDGPU::VReg_64RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3422	else
Tom Stellard	45c0b3a	2015-01-07 20:59:25 +0000	[diff] [blame]	3423	VRC = &AMDGPU::VGPR_32RegClass;
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3424
Matt Arsenault	3a4d86a	2013-11-18 20:09:55 +0000	[diff] [blame]	3425	unsigned Reg = MRI.createVirtualRegister(VRC);
Matt Arsenault	3f3a275	2014-10-13 15:47:59 +0000	[diff] [blame]	3426	DebugLoc DL = MBB->findDebugLoc(I);
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	3427	BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	3428	MO.ChangeToRegister(Reg, false);
				3429	}
				3430
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3431	unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
				3432	MachineRegisterInfo &MRI,
				3433	MachineOperand &SuperReg,
				3434	const TargetRegisterClass *SuperRC,
				3435	unsigned SubIdx,
				3436	const TargetRegisterClass *SubRC)
				3437	const {
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3438	MachineBasicBlock *MBB = MI->getParent();
				3439	DebugLoc DL = MI->getDebugLoc();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3440	unsigned SubReg = MRI.createVirtualRegister(SubRC);
				3441
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3442	if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
				3443	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				3444	.addReg(SuperReg.getReg(), 0, SubIdx);
				3445	return SubReg;
				3446	}
				3447
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3448	// Just in case the super register is itself a sub-register, copy it to a new
Matt Arsenault	08d8494	2014-06-03 23:06:13 +0000	[diff] [blame]	3449	// value so we don't need to worry about merging its subreg index with the
				3450	// SubIdx passed to this function. The register coalescer should be able to
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3451	// eliminate this extra copy.
Matt Arsenault	c8e2ce4	2015-09-24 07:16:37 +0000	[diff] [blame]	3452	unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3453
Matt Arsenault	7480a0e	2014-11-17 21:11:37 +0000	[diff] [blame]	3454	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
				3455	.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
				3456
				3457	BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
				3458	.addReg(NewSuperReg, 0, SubIdx);
				3459
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	3460	return SubReg;
				3461	}
				3462
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3463	MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
				3464	MachineBasicBlock::iterator MII,
				3465	MachineRegisterInfo &MRI,
				3466	MachineOperand &Op,
				3467	const TargetRegisterClass *SuperRC,
				3468	unsigned SubIdx,
				3469	const TargetRegisterClass *SubRC) const {
				3470	if (Op.isImm()) {
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3471	if (SubIdx == AMDGPU::sub0)
Matt Arsenault	d745c28	2016-09-08 17:44:36 +0000	[diff] [blame]	3472	return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3473	if (SubIdx == AMDGPU::sub1)
Matt Arsenault	d745c28	2016-09-08 17:44:36 +0000	[diff] [blame]	3474	return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
Matt Arsenault	248b7b6	2014-03-24 20:08:09 +0000	[diff] [blame]	3475
				3476	llvm_unreachable("Unhandled register index for immediate");
				3477	}
				3478
				3479	unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
				3480	SubIdx, SubRC);
				3481	return MachineOperand::CreateReg(SubReg, false);
				3482	}
				3483
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	3484	// Change the order of operands from (0, 1, 2) to (0, 2, 1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3485	void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
				3486	assert(Inst.getNumExplicitOperands() == 3);
				3487	MachineOperand Op1 = Inst.getOperand(1);
				3488	Inst.RemoveOperand(1);
				3489	Inst.addOperand(Op1);
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	3490	}
				3491
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3492	bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
				3493	const MCOperandInfo &OpInfo,
				3494	const MachineOperand &MO) const {
				3495	if (!MO.isReg())
				3496	return false;
				3497
				3498	unsigned Reg = MO.getReg();
				3499	const TargetRegisterClass *RC =
				3500	TargetRegisterInfo::isVirtualRegister(Reg) ?
				3501	MRI.getRegClass(Reg) :
				3502	RI.getPhysRegClass(Reg);
				3503
Nicolai Haehnle	82fc962	2016-01-07 17:10:29 +0000	[diff] [blame]	3504	const SIRegisterInfo *TRI =
				3505	static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
				3506	RC = TRI->getSubRegClass(RC, MO.getSubReg());
				3507
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3508	// In order to be legal, the common sub-class must be equal to the
				3509	// class of the current operand. For example:
				3510	//
Sam Kolton	1eeb11b	2016-09-09 14:44:04 +0000	[diff] [blame]	3511	// v_mov_b32 s0 ; Operand defined as vsrc_b32
				3512	// ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3513	//
				3514	// s_sendmsg 0, s0 ; Operand defined as m0reg
				3515	// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
				3516
				3517	return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
				3518	}
				3519
				3520	bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
				3521	const MCOperandInfo &OpInfo,
				3522	const MachineOperand &MO) const {
				3523	if (MO.isReg())
				3524	return isLegalRegOperand(MRI, OpInfo, MO);
				3525
				3526	// Handle non-register types that are treated like immediates.
				3527	assert(MO.isImm() \|\| MO.isTargetIndex() \|\| MO.isFI());
				3528	return true;
				3529	}
				3530
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3531	bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3532	const MachineOperand *MO) const {
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3533	const MachineFunction &MF = *MI.getParent()->getParent();
				3534	const MachineRegisterInfo &MRI = MF.getRegInfo();
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3535	const MCInstrDesc &InstDesc = MI.getDesc();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3536	const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3537	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3538	const TargetRegisterClass *DefinedRC =
				3539	OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
				3540	if (!MO)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3541	MO = &MI.getOperand(OpIdx);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3542
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3543	int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
				3544	int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3545	if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3546	if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--)
				3547	return false;
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	3548
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3549	SmallDenseSet<RegSubRegPair> SGPRsUsed;
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	3550	if (MO->isReg())
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3551	SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg()));
Matt Arsenault	fcb345f	2016-02-11 06:15:39 +0000	[diff] [blame]	3552
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3553	for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3554	if (i == OpIdx)
				3555	continue;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3556	const MachineOperand &Op = MI.getOperand(i);
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3557	if (Op.isReg()) {
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3558	RegSubRegPair SGPR(Op.getReg(), Op.getSubReg());
				3559	if (!SGPRsUsed.count(SGPR) &&
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	3560	usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3561	if (--ConstantBusLimit <= 0)
				3562	return false;
				3563	SGPRsUsed.insert(SGPR);
Matt Arsenault	ffc8275	2016-07-05 17:09:01 +0000	[diff] [blame]	3564	}
				3565	} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3566	if (--ConstantBusLimit <= 0)
				3567	return false;
				3568	} else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) &&
				3569	isLiteralConstantLike(Op, InstDesc.OpInfo[i])) {
				3570	if (!VOP3LiteralLimit--)
				3571	return false;
				3572	if (--ConstantBusLimit <= 0)
				3573	return false;
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3574	}
				3575	}
				3576	}
				3577
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3578	if (MO->isReg()) {
				3579	assert(DefinedRC);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3580	return isLegalRegOperand(MRI, OpInfo, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3581	}
				3582
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3583	// Handle non-register types that are treated like immediates.
Tom Stellard	fb77f00	2015-01-13 22:59:41 +0000	[diff] [blame]	3584	assert(MO->isImm() \|\| MO->isTargetIndex() \|\| MO->isFI());
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3585
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	3586	if (!DefinedRC) {
				3587	// This operand expects an immediate.
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3588	return true;
Matt Arsenault	4364fef	2014-09-23 18:30:57 +0000	[diff] [blame]	3589	}
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3590
Tom Stellard	73ae1cb	2014-09-23 21:26:25 +0000	[diff] [blame]	3591	return isImmOperandLegal(MI, OpIdx, *MO);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	3592	}
				3593
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3594	void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3595	MachineInstr &MI) const {
				3596	unsigned Opc = MI.getOpcode();
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3597	const MCInstrDesc &InstrDesc = get(Opc);
				3598
				3599	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3600	MachineOperand &Src1 = MI.getOperand(Src1Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3601
				3602	// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
				3603	// we need to only have one constant bus use.
				3604	//
				3605	// Note we do not need to worry about literal constants here. They are
				3606	// disabled for the operand type for instructions because they will always
				3607	// violate the one constant bus use rule.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3608	bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3609	if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3610	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3611	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3612
				3613	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
				3614	legalizeOpWithMove(MI, Src0Idx);
				3615	}
				3616
Tim Renouf	2a99fa2	2018-02-28 19:10:32 +0000	[diff] [blame]	3617	// Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for
				3618	// both the value to write (src0) and lane select (src1). Fix up non-SGPR
				3619	// src0/src1 with V_READFIRSTLANE.
				3620	if (Opc == AMDGPU::V_WRITELANE_B32) {
				3621	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				3622	MachineOperand &Src0 = MI.getOperand(Src0Idx);
				3623	const DebugLoc &DL = MI.getDebugLoc();
				3624	if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
				3625	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3626	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3627	.add(Src0);
				3628	Src0.ChangeToRegister(Reg, false);
				3629	}
				3630	if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) {
				3631	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3632	const DebugLoc &DL = MI.getDebugLoc();
				3633	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3634	.add(Src1);
				3635	Src1.ChangeToRegister(Reg, false);
				3636	}
				3637	return;
				3638	}
				3639
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3640	// VOP2 src0 instructions support all operand types, so we don't need to check
				3641	// their legality. If src1 is already legal, we don't need to do anything.
				3642	if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
				3643	return;
				3644
Nicolai Haehnle	5dea645	2017-04-24 17:17:36 +0000	[diff] [blame]	3645	// Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
				3646	// lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
				3647	// select is uniform.
				3648	if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
				3649	RI.isVGPR(MRI, Src1.getReg())) {
				3650	unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				3651	const DebugLoc &DL = MI.getDebugLoc();
				3652	BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
				3653	.add(Src1);
				3654	Src1.ChangeToRegister(Reg, false);
				3655	return;
				3656	}
				3657
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3658	// We do not use commuteInstruction here because it is too aggressive and will
				3659	// commute if it is possible. We only want to commute here if it improves
				3660	// legality. This can be called a fairly large number of times so don't waste
				3661	// compile time pointlessly swapping and checking legality again.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3662	if (HasImplicitSGPR \|\| !MI.isCommutable()) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3663	legalizeOpWithMove(MI, Src1Idx);
				3664	return;
				3665	}
				3666
				3667	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3668	MachineOperand &Src0 = MI.getOperand(Src0Idx);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3669
				3670	// If src0 can be used as src1, commuting will make the operands legal.
				3671	// Otherwise we have to give up and insert a move.
				3672	//
				3673	// TODO: Other immediate-like operand kinds could be commuted if there was a
				3674	// MachineOperand::ChangeTo* for them.
				3675	if ((!Src1.isImm() && !Src1.isReg()) \|\|
				3676	!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
				3677	legalizeOpWithMove(MI, Src1Idx);
				3678	return;
				3679	}
				3680
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3681	int CommutedOpc = commuteOpcode(MI);
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3682	if (CommutedOpc == -1) {
				3683	legalizeOpWithMove(MI, Src1Idx);
				3684	return;
				3685	}
				3686
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3687	MI.setDesc(get(CommutedOpc));
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	3688
				3689	unsigned Src0Reg = Src0.getReg();
				3690	unsigned Src0SubReg = Src0.getSubReg();
				3691	bool Src0Kill = Src0.isKill();
				3692
				3693	if (Src1.isImm())
				3694	Src0.ChangeToImmediate(Src1.getImm());
				3695	else if (Src1.isReg()) {
				3696	Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
				3697	Src0.setSubReg(Src1.getSubReg());
				3698	} else
				3699	llvm_unreachable("Should only have register or immediate operands");
				3700
				3701	Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
				3702	Src1.setSubReg(Src0SubReg);
				3703	}
				3704
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3705	// Legalize VOP3 operands. Because all operand types are supported for any
				3706	// operand, and since literal constants are not allowed and should never be
				3707	// seen, we only need to worry about inserting copies if we use multiple SGPR
				3708	// operands.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3709	void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
				3710	MachineInstr &MI) const {
				3711	unsigned Opc = MI.getOpcode();
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3712
				3713	int VOP3Idx[3] = {
				3714	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
				3715	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
				3716	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
				3717	};
				3718
				3719	// Find the one SGPR operand we are allowed to use.
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3720	int ConstantBusLimit = ST.getConstantBusLimit(Opc);
				3721	int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
				3722	SmallDenseSet<unsigned> SGPRsUsed;
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3723	unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3724	if (SGPRReg != AMDGPU::NoRegister) {
				3725	SGPRsUsed.insert(SGPRReg);
				3726	--ConstantBusLimit;
				3727	}
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3728
				3729	for (unsigned i = 0; i < 3; ++i) {
				3730	int Idx = VOP3Idx[i];
				3731	if (Idx == -1)
				3732	break;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3733	MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3734
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3735	if (!MO.isReg()) {
				3736	if (!isLiteralConstantLike(MO, get(Opc).OpInfo[Idx]))
				3737	continue;
				3738
				3739	if (LiteralLimit > 0 && ConstantBusLimit > 0) {
				3740	--LiteralLimit;
				3741	--ConstantBusLimit;
				3742	continue;
				3743	}
				3744
				3745	--LiteralLimit;
				3746	--ConstantBusLimit;
				3747	legalizeOpWithMove(MI, Idx);
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3748	continue;
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3749	}
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3750
				3751	if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
				3752	continue; // VGPRs are legal
				3753
Stanislav Mekhanoshin	f2baae0	2019-05-02 03:47:23 +0000	[diff] [blame]	3754	// We can use one SGPR in each VOP3 instruction prior to GFX10
				3755	// and two starting from GFX10.
				3756	if (SGPRsUsed.count(MO.getReg()))
				3757	continue;
				3758	if (ConstantBusLimit > 0) {
				3759	SGPRsUsed.insert(MO.getReg());
				3760	--ConstantBusLimit;
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	3761	continue;
				3762	}
				3763
				3764	// If we make it this far, then the operand is not legal and we must
				3765	// legalize it.
				3766	legalizeOpWithMove(MI, Idx);
				3767	}
				3768	}
				3769
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3770	unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
				3771	MachineRegisterInfo &MRI) const {
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3772	const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
				3773	const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
				3774	unsigned DstReg = MRI.createVirtualRegister(SRC);
Krzysztof Parzyszek	44e25f3	2017-04-24 18:55:33 +0000	[diff] [blame]	3775	unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3776
Nicolai Haehnle	7a87977	2018-04-20 07:14:25 +0000	[diff] [blame]	3777	if (SubRegs == 1) {
				3778	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
				3779	get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
				3780	.addReg(SrcReg);
				3781	return DstReg;
				3782	}
				3783
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3784	SmallVector<unsigned, 8> SRegs;
				3785	for (unsigned i = 0; i < SubRegs; ++i) {
				3786	unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3787	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3788	get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3789	.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3790	SRegs.push_back(SGPR);
				3791	}
				3792
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3793	MachineInstrBuilder MIB =
				3794	BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
				3795	get(AMDGPU::REG_SEQUENCE), DstReg);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	3796	for (unsigned i = 0; i < SubRegs; ++i) {
				3797	MIB.addReg(SRegs[i]);
				3798	MIB.addImm(RI.getSubRegFromChannel(i));
				3799	}
				3800	return DstReg;
				3801	}
				3802
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3803	void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3804	MachineInstr &MI) const {
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3805
				3806	// If the pointer is store in VGPRs, then we need to move them to
				3807	// SGPRs using v_readfirstlane. This is safe because we only select
				3808	// loads with uniform pointers to SMRD instruction so we know the
				3809	// pointer value is uniform.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	3810	MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3811	if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
Nicolai Haehnle	a7b0005	2018-11-30 22:55:38 +0000	[diff] [blame]	3812	unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
				3813	SBase->setReg(SGPR);
				3814	}
				3815	MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
				3816	if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
				3817	unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
				3818	SOff->setReg(SGPR);
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	3819	}
				3820	}
				3821
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3822	void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
				3823	MachineBasicBlock::iterator I,
				3824	const TargetRegisterClass *DstRC,
				3825	MachineOperand &Op,
				3826	MachineRegisterInfo &MRI,
				3827	const DebugLoc &DL) const {
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3828	unsigned OpReg = Op.getReg();
				3829	unsigned OpSubReg = Op.getSubReg();
				3830
				3831	const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
				3832	RI.getRegClassForReg(MRI, OpReg), OpSubReg);
				3833
				3834	// Check if operand is already the correct register class.
				3835	if (DstRC == OpRC)
				3836	return;
				3837
				3838	unsigned DstReg = MRI.createVirtualRegister(DstRC);
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	3839	MachineInstr *Copy =
				3840	BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	3841
				3842	Op.setReg(DstReg);
				3843	Op.setSubReg(0);
				3844
				3845	MachineInstr *Def = MRI.getVRegDef(OpReg);
				3846	if (!Def)
				3847	return;
				3848
				3849	// Try to eliminate the copy if it is copying an immediate value.
				3850	if (Def->isMoveImmediate())
				3851	FoldImmediate(Copy, Def, OpReg, &MRI);
				3852	}
				3853
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	3854	// Emit the actual waterfall loop, executing the wrapped instruction for each
				3855	// unique value of \p Rsrc across all lanes. In the best case we execute 1
				3856	// iteration, in the worst case we execute 64 (once per lane).
				3857	static void
				3858	emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
				3859	MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
				3860	const DebugLoc &DL, MachineOperand &Rsrc) {
				3861	MachineBasicBlock::iterator I = LoopBB.begin();
				3862
				3863	unsigned VRsrc = Rsrc.getReg();
				3864	unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
				3865
				3866	unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3867	unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3868	unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3869	unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				3870	unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3871	unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3872	unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3873	unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				3874	unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
				3875
				3876	// Beginning of the loop, read the next Rsrc variant.
				3877	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
				3878	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
				3879	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
				3880	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
				3881	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
				3882	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
				3883	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
				3884	.addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
				3885
				3886	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
				3887	.addReg(SRsrcSub0)
				3888	.addImm(AMDGPU::sub0)
				3889	.addReg(SRsrcSub1)
				3890	.addImm(AMDGPU::sub1)
				3891	.addReg(SRsrcSub2)
				3892	.addImm(AMDGPU::sub2)
				3893	.addReg(SRsrcSub3)
				3894	.addImm(AMDGPU::sub3);
				3895
				3896	// Update Rsrc operand to use the SGPR Rsrc.
				3897	Rsrc.setReg(SRsrc);
				3898	Rsrc.setIsKill(true);
				3899
				3900	// Identify all lanes with identical Rsrc operands in their VGPRs.
				3901	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
				3902	.addReg(SRsrc, 0, AMDGPU::sub0_sub1)
				3903	.addReg(VRsrc, 0, AMDGPU::sub0_sub1);
				3904	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
				3905	.addReg(SRsrc, 0, AMDGPU::sub2_sub3)
				3906	.addReg(VRsrc, 0, AMDGPU::sub2_sub3);
				3907	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
				3908	.addReg(CondReg0)
				3909	.addReg(CondReg1);
				3910
				3911	MRI.setSimpleHint(SaveExec, AndCond);
				3912
				3913	// Update EXEC to matching lanes, saving original to SaveExec.
				3914	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
				3915	.addReg(AndCond, RegState::Kill);
				3916
				3917	// The original instruction is here; we insert the terminators after it.
				3918	I = LoopBB.end();
				3919
				3920	// Update EXEC, switch all done bits to 0 and all todo bits to 1.
				3921	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
				3922	.addReg(AMDGPU::EXEC)
				3923	.addReg(SaveExec);
				3924	BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
				3925	}
				3926
				3927	// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
				3928	// with SGPRs by iterating over all unique values across all lanes.
				3929	static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
				3930	MachineOperand &Rsrc, MachineDominatorTree *MDT) {
				3931	MachineBasicBlock &MBB = *MI.getParent();
				3932	MachineFunction &MF = *MBB.getParent();
				3933	MachineRegisterInfo &MRI = MF.getRegInfo();
				3934	MachineBasicBlock::iterator I(&MI);
				3935	const DebugLoc &DL = MI.getDebugLoc();
				3936
				3937	unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				3938
				3939	// Save the EXEC mask
				3940	BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
				3941	.addReg(AMDGPU::EXEC);
				3942
				3943	// Killed uses in the instruction we are waterfalling around will be
				3944	// incorrect due to the added control-flow.
				3945	for (auto &MO : MI.uses()) {
				3946	if (MO.isReg() && MO.isUse()) {
				3947	MRI.clearKillFlags(MO.getReg());
				3948	}
				3949	}
				3950
				3951	// To insert the loop we need to split the block. Move everything after this
				3952	// point to a new block, and insert a new empty block between the two.
				3953	MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
				3954	MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
				3955	MachineFunction::iterator MBBI(MBB);
				3956	++MBBI;
				3957
				3958	MF.insert(MBBI, LoopBB);
				3959	MF.insert(MBBI, RemainderBB);
				3960
				3961	LoopBB->addSuccessor(LoopBB);
				3962	LoopBB->addSuccessor(RemainderBB);
				3963
				3964	// Move MI to the LoopBB, and the remainder of the block to RemainderBB.
				3965	MachineBasicBlock::iterator J = I++;
				3966	RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
				3967	RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
				3968	LoopBB->splice(LoopBB->begin(), &MBB, J);
				3969
				3970	MBB.addSuccessor(LoopBB);
				3971
				3972	// Update dominators. We know that MBB immediately dominates LoopBB, that
				3973	// LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
				3974	// dominates all of the successors transferred to it from MBB that MBB used
				3975	// to dominate.
				3976	if (MDT) {
				3977	MDT->addNewBlock(LoopBB, &MBB);
				3978	MDT->addNewBlock(RemainderBB, LoopBB);
				3979	for (auto &Succ : RemainderBB->successors()) {
				3980	if (MDT->dominates(&MBB, Succ)) {
				3981	MDT->changeImmediateDominator(Succ, RemainderBB);
				3982	}
				3983	}
				3984	}
				3985
				3986	emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
				3987
				3988	// Restore the EXEC mask
				3989	MachineBasicBlock::iterator First = RemainderBB->begin();
				3990	BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
				3991	.addReg(SaveExec);
				3992	}
				3993
				3994	// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
				3995	static std::tuple<unsigned, unsigned>
				3996	extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
				3997	MachineBasicBlock &MBB = *MI.getParent();
				3998	MachineFunction &MF = *MBB.getParent();
				3999	MachineRegisterInfo &MRI = MF.getRegInfo();
				4000
				4001	// Extract the ptr from the resource descriptor.
				4002	unsigned RsrcPtr =
				4003	TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
				4004	AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
				4005
				4006	// Create an empty resource descriptor
				4007	unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				4008	unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				4009	unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
				4010	unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
				4011	uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
				4012
				4013	// Zero64 = 0
				4014	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64)
				4015	.addImm(0);
				4016
				4017	// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
				4018	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
				4019	.addImm(RsrcDataFormat & 0xFFFFFFFF);
				4020
				4021	// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
				4022	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
				4023	.addImm(RsrcDataFormat >> 32);
				4024
				4025	// NewSRsrc = {Zero64, SRsrcFormat}
				4026	BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc)
				4027	.addReg(Zero64)
				4028	.addImm(AMDGPU::sub0_sub1)
				4029	.addReg(SRsrcFormatLo)
				4030	.addImm(AMDGPU::sub2)
				4031	.addReg(SRsrcFormatHi)
				4032	.addImm(AMDGPU::sub3);
				4033
				4034	return std::make_tuple(RsrcPtr, NewSRsrc);
				4035	}
				4036
				4037	void SIInstrInfo::legalizeOperands(MachineInstr &MI,
				4038	MachineDominatorTree *MDT) const {
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	4039	MachineFunction &MF = *MI.getParent()->getParent();
				4040	MachineRegisterInfo &MRI = MF.getRegInfo();
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4041
				4042	// Legalize VOP2
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4043	if (isVOP2(MI) \|\| isVOPC(MI)) {
Matt Arsenault	856d192	2015-12-01 19:57:17 +0000	[diff] [blame]	4044	legalizeOperandsVOP2(MRI, MI);
Tom Stellard	0e975cf	2014-08-01 00:32:35 +0000	[diff] [blame]	4045	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4046	}
				4047
				4048	// Legalize VOP3
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4049	if (isVOP3(MI)) {
Matt Arsenault	6005fcb	2015-10-21 21:51:02 +0000	[diff] [blame]	4050	legalizeOperandsVOP3(MRI, MI);
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	4051	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4052	}
				4053
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	4054	// Legalize SMRD
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4055	if (isSMRD(MI)) {
Tom Stellard	467b5b9	2016-02-20 00:37:25 +0000	[diff] [blame]	4056	legalizeOperandsSMRD(MRI, MI);
				4057	return;
				4058	}
				4059
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	4060	// Legalize REG_SEQUENCE and PHI
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4061	// The register class of the operands much be the same type as the register
				4062	// class of the output.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4063	if (MI.getOpcode() == AMDGPU::PHI) {
Craig Topper	062a2ba	2014-04-25 05:30:21 +0000	[diff] [blame]	4064	const TargetRegisterClass RC = nullptr, SRC = nullptr, *VRC = nullptr;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4065	for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
				4066	if (!MI.getOperand(i).isReg() \|\|
				4067	!TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4068	continue;
				4069	const TargetRegisterClass *OpRC =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4070	MRI.getRegClass(MI.getOperand(i).getReg());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4071	if (RI.hasVGPRs(OpRC)) {
				4072	VRC = OpRC;
				4073	} else {
				4074	SRC = OpRC;
				4075	}
				4076	}
				4077
				4078	// If any of the operands are VGPR registers, then they all most be
				4079	// otherwise we will create illegal VGPR->SGPR copies when legalizing
				4080	// them.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4081	if (VRC \|\| !RI.isSGPRClass(getOpRegClass(MI, 0))) {
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4082	if (!VRC) {
				4083	assert(SRC);
				4084	VRC = RI.getEquivalentVGPRClass(SRC);
				4085	}
				4086	RC = VRC;
				4087	} else {
				4088	RC = SRC;
				4089	}
				4090
				4091	// Update all the operands so they have the same type.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4092	for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
				4093	MachineOperand &Op = MI.getOperand(I);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4094	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4095	continue;
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4096
				4097	// MI is a PHI instruction.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4098	MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4099	MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
				4100
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4101	// Avoid creating no-op copies with the same src and dst reg class. These
				4102	// confuse some of the machine passes.
				4103	legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4104	}
				4105	}
				4106
				4107	// REG_SEQUENCE doesn't really require operand legalization, but if one has a
				4108	// VGPR dest type and SGPR sources, insert copies so all operands are
				4109	// VGPRs. This seems to help operand folding / the register coalescer.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4110	if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
				4111	MachineBasicBlock *MBB = MI.getParent();
				4112	const TargetRegisterClass *DstRC = getOpRegClass(MI, 0);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4113	if (RI.hasVGPRs(DstRC)) {
				4114	// Update all the operands so they are VGPR register classes. These may
				4115	// not be the same register class because REG_SEQUENCE supports mixing
				4116	// subregister index types e.g. sub0_sub1 + sub2 + sub3
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4117	for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
				4118	MachineOperand &Op = MI.getOperand(I);
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4119	if (!Op.isReg() \|\| !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
				4120	continue;
				4121
				4122	const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
				4123	const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
				4124	if (VRC == OpRC)
				4125	continue;
				4126
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4127	legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
Matt Arsenault	2d6fdb8	2015-09-25 17:08:42 +0000	[diff] [blame]	4128	Op.setIsKill();
Tom Stellard	4f3b04d	2014-04-17 21:00:07 +0000	[diff] [blame]	4129	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4130	}
Matt Arsenault	e068f9a	2015-09-24 07:51:28 +0000	[diff] [blame]	4131
				4132	return;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4133	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4134
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	4135	// Legalize INSERT_SUBREG
				4136	// src0 must have the same register class as dst
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4137	if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
				4138	unsigned Dst = MI.getOperand(0).getReg();
				4139	unsigned Src0 = MI.getOperand(1).getReg();
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	4140	const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
				4141	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
				4142	if (DstRC != Src0RC) {
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4143	MachineBasicBlock *MBB = MI.getParent();
				4144	MachineOperand &Op = MI.getOperand(1);
				4145	legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
Tom Stellard	a568738	2014-05-15 14:41:55 +0000	[diff] [blame]	4146	}
				4147	return;
				4148	}
				4149
Nicolai Haehnle	7a87977	2018-04-20 07:14:25 +0000	[diff] [blame]	4150	// Legalize SI_INIT_M0
				4151	if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
				4152	MachineOperand &Src = MI.getOperand(0);
				4153	if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
				4154	Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
				4155	return;
				4156	}
				4157
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	4158	// Legalize MIMG and MUBUF/MTBUF for shaders.
				4159	//
				4160	// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
				4161	// scratch memory access. In both cases, the legalization never involves
				4162	// conversion to the addr64 form.
				4163	if (isMIMG(MI) \|\|
Matthias Braun	f1caa28	2017-12-15 22:22:58 +0000	[diff] [blame]	4164	(AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
Nicolai Haehnle	ce2b589	2016-11-18 11:55:52 +0000	[diff] [blame]	4165	(isMUBUF(MI) \|\| isMTBUF(MI)))) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4166	MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	4167	if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
				4168	unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
				4169	SRsrc->setReg(SGPR);
				4170	}
				4171
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4172	MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
Tom Stellard	1397d49	2016-02-11 21:45:07 +0000	[diff] [blame]	4173	if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) {
				4174	unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI);
				4175	SSamp->setReg(SGPR);
				4176	}
				4177	return;
				4178	}
				4179
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4180	// Legalize MUBUF* instructions.
				4181	int RsrcIdx =
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4182	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4183	if (RsrcIdx != -1) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4184	// We have an MUBUF instruction
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4185	MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
				4186	unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass;
				4187	if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
				4188	RI.getRegClass(RsrcRC))) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4189	// The operands are legal.
				4190	// FIXME: We may need to legalize operands besided srsrc.
				4191	return;
				4192	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4193
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4194	// Legalize a VGPR Rsrc.
				4195	//
				4196	// If the instruction is _ADDR64, we can avoid a waterfall by extracting
				4197	// the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
				4198	// a zero-value SRsrc.
				4199	//
				4200	// If the instruction is _OFFSET (both idxen and offen disabled), and we
				4201	// support ADDR64 instructions, we can convert to ADDR64 and do the same as
				4202	// above.
				4203	//
				4204	// Otherwise we are on non-ADDR64 hardware, and/or we have
				4205	// idxen/offen/bothen and we fall back to a waterfall loop.
				4206
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4207	MachineBasicBlock &MBB = *MI.getParent();
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4208
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4209	MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4210	if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4211	// This is already an ADDR64 instruction so we need to add the pointer
				4212	// extracted from the resource descriptor to the current value of VAddr.
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4213	unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4214	unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4215	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4216
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4217	unsigned RsrcPtr, NewSRsrc;
				4218	std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(this, MI, Rsrc);
				4219
				4220	// NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4221	DebugLoc DL = MI.getDebugLoc();
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	4222	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4223	.addReg(RsrcPtr, 0, AMDGPU::sub0)
				4224	.addReg(VAddr->getReg(), 0, AMDGPU::sub0);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4225
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4226	// NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
Matt Arsenault	51d2d0f	2015-09-01 02:02:21 +0000	[diff] [blame]	4227	BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4228	.addReg(RsrcPtr, 0, AMDGPU::sub1)
				4229	.addReg(VAddr->getReg(), 0, AMDGPU::sub1);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4230
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4231	// NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4232	BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
				4233	.addReg(NewVAddrLo)
				4234	.addImm(AMDGPU::sub0)
				4235	.addReg(NewVAddrHi)
				4236	.addImm(AMDGPU::sub1);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4237
				4238	VAddr->setReg(NewVAddr);
				4239	Rsrc->setReg(NewSRsrc);
				4240	} else if (!VAddr && ST.hasAddr64()) {
Tom Stellard	155bbb7	2014-08-11 22:18:17 +0000	[diff] [blame]	4241	// This instructions is the _OFFSET variant, so we need to convert it to
				4242	// ADDR64.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4243	assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
				4244	< AMDGPUSubtarget::VOLCANIC_ISLANDS &&
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4245	"FIXME: Need to emit flat atomics here");
				4246
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4247	unsigned RsrcPtr, NewSRsrc;
				4248	std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(this, MI, Rsrc);
				4249
				4250	unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4251	MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
				4252	MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
				4253	MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
				4254	unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4255
				4256	// Atomics rith return have have an additional tied operand and are
				4257	// missing some of the special bits.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4258	MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4259	MachineInstr *Addr64;
				4260
				4261	if (!VDataIn) {
				4262	// Regular buffer load / store.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4263	MachineInstrBuilder MIB =
				4264	BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4265	.add(*VData)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4266	.addReg(NewVAddr)
				4267	.addReg(NewSRsrc)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4268	.add(*SOffset)
				4269	.add(*Offset);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4270
				4271	// Atomics do not have this operand.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4272	if (const MachineOperand *GLC =
				4273	getNamedOperand(MI, AMDGPU::OpName::glc)) {
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4274	MIB.addImm(GLC->getImm());
				4275	}
Stanislav Mekhanoshin	a632294	2019-04-30 22:08:23 +0000	[diff] [blame]	4276	if (const MachineOperand *DLC =
				4277	getNamedOperand(MI, AMDGPU::OpName::dlc)) {
				4278	MIB.addImm(DLC->getImm());
				4279	}
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4280
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4281	MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4282
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4283	if (const MachineOperand *TFE =
				4284	getNamedOperand(MI, AMDGPU::OpName::tfe)) {
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4285	MIB.addImm(TFE->getImm());
				4286	}
				4287
Chandler Carruth	c73c030	2018-08-16 21:30:05 +0000	[diff] [blame]	4288	MIB.cloneMemRefs(MI);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4289	Addr64 = MIB;
				4290	} else {
				4291	// Atomics with return.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4292	Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4293	.add(*VData)
				4294	.add(*VDataIn)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4295	.addReg(NewVAddr)
				4296	.addReg(NewSRsrc)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4297	.add(*SOffset)
				4298	.add(*Offset)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4299	.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
Chandler Carruth	c73c030	2018-08-16 21:30:05 +0000	[diff] [blame]	4300	.cloneMemRefs(MI);
Matt Arsenault	a40450c	2015-11-05 02:46:56 +0000	[diff] [blame]	4301	}
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4302
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4303	MI.removeFromParent();
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4304
Matt Arsenault	ef67d76	2015-09-09 17:03:29 +0000	[diff] [blame]	4305	// NewVaddr = {NewVaddrHi, NewVaddrLo}
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4306	BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
				4307	NewVAddr)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4308	.addReg(RsrcPtr, 0, AMDGPU::sub0)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4309	.addImm(AMDGPU::sub0)
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4310	.addReg(RsrcPtr, 0, AMDGPU::sub1)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4311	.addImm(AMDGPU::sub1);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4312	} else {
				4313	// This is another variant; legalize Rsrc with waterfall loop from VGPRs
				4314	// to SGPRs.
				4315	loadSRsrcFromVGPR(this, MI, Rsrc, MDT);
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4316	}
				4317	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4318	}
				4319
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4320	void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
				4321	MachineDominatorTree *MDT) const {
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4322	SetVectorType Worklist;
				4323	Worklist.insert(&TopInst);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4324
				4325	while (!Worklist.empty()) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4326	MachineInstr &Inst = *Worklist.pop_back_val();
				4327	MachineBasicBlock *MBB = Inst.getParent();
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4328	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
				4329
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4330	unsigned Opcode = Inst.getOpcode();
				4331	unsigned NewOpcode = getVALUOp(Inst);
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4332
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4333	// Handle some special cases
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4334	switch (Opcode) {
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	4335	default:
Tom Stellard	0c354f2	2014-04-30 15:31:29 +0000	[diff] [blame]	4336	break;
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4337	case AMDGPU::S_ADD_U64_PSEUDO:
				4338	case AMDGPU::S_SUB_U64_PSEUDO:
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4339	splitScalar64BitAddSub(Worklist, Inst, MDT);
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4340	Inst.eraseFromParent();
				4341	continue;
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4342	case AMDGPU::S_ADD_I32:
				4343	case AMDGPU::S_SUB_I32:
				4344	// FIXME: The u32 versions currently selected use the carry.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4345	if (moveScalarAddSub(Worklist, Inst, MDT))
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4346	continue;
				4347
				4348	// Default handling
				4349	break;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4350	case AMDGPU::S_AND_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4351	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4352	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4353	continue;
				4354
				4355	case AMDGPU::S_OR_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4356	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4357	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4358	continue;
				4359
				4360	case AMDGPU::S_XOR_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4361	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
				4362	Inst.eraseFromParent();
				4363	continue;
				4364
				4365	case AMDGPU::S_NAND_B64:
				4366	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
				4367	Inst.eraseFromParent();
				4368	continue;
				4369
				4370	case AMDGPU::S_NOR_B64:
				4371	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
				4372	Inst.eraseFromParent();
				4373	continue;
				4374
				4375	case AMDGPU::S_XNOR_B64:
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4376	if (ST.hasDLInsts())
				4377	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
				4378	else
				4379	splitScalar64BitXnor(Worklist, Inst, MDT);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4380	Inst.eraseFromParent();
				4381	continue;
				4382
				4383	case AMDGPU::S_ANDN2_B64:
				4384	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
				4385	Inst.eraseFromParent();
				4386	continue;
				4387
				4388	case AMDGPU::S_ORN2_B64:
				4389	splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4390	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4391	continue;
				4392
				4393	case AMDGPU::S_NOT_B64:
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4394	splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4395	Inst.eraseFromParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4396	continue;
				4397
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4398	case AMDGPU::S_BCNT1_I32_B64:
				4399	splitScalar64BitBCNT(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4400	Inst.eraseFromParent();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	4401	continue;
				4402
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	4403	case AMDGPU::S_BFE_I64:
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4404	splitScalar64BitBFE(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4405	Inst.eraseFromParent();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4406	continue;
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	4407
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4408	case AMDGPU::S_LSHL_B32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4409	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4410	NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
				4411	swapOperands(Inst);
				4412	}
				4413	break;
				4414	case AMDGPU::S_ASHR_I32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4415	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4416	NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
				4417	swapOperands(Inst);
				4418	}
				4419	break;
				4420	case AMDGPU::S_LSHR_B32:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4421	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4422	NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
				4423	swapOperands(Inst);
				4424	}
				4425	break;
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4426	case AMDGPU::S_LSHL_B64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4427	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4428	NewOpcode = AMDGPU::V_LSHLREV_B64;
				4429	swapOperands(Inst);
				4430	}
				4431	break;
				4432	case AMDGPU::S_ASHR_I64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4433	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4434	NewOpcode = AMDGPU::V_ASHRREV_I64;
				4435	swapOperands(Inst);
				4436	}
				4437	break;
				4438	case AMDGPU::S_LSHR_B64:
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	4439	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	707a6d0	2015-02-03 21:53:01 +0000	[diff] [blame]	4440	NewOpcode = AMDGPU::V_LSHRREV_B64;
				4441	swapOperands(Inst);
				4442	}
				4443	break;
Marek Olsak	be04780	2014-12-07 12:19:03 +0000	[diff] [blame]	4444
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4445	case AMDGPU::S_ABS_I32:
				4446	lowerScalarAbs(Worklist, Inst);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4447	Inst.eraseFromParent();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4448	continue;
				4449
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4450	case AMDGPU::S_CBRANCH_SCC0:
				4451	case AMDGPU::S_CBRANCH_SCC1:
				4452	// Clear unused bits of vcc
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4453	BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
				4454	AMDGPU::VCC)
				4455	.addReg(AMDGPU::EXEC)
				4456	.addReg(AMDGPU::VCC);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4457	break;
				4458
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4459	case AMDGPU::S_BFE_U64:
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4460	case AMDGPU::S_BFM_B64:
				4461	llvm_unreachable("Moving this op to VALU not implemented");
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4462
				4463	case AMDGPU::S_PACK_LL_B32_B16:
				4464	case AMDGPU::S_PACK_LH_B32_B16:
Eugene Zelenko	59e1282	2017-08-08 00:47:13 +0000	[diff] [blame]	4465	case AMDGPU::S_PACK_HH_B32_B16:
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4466	movePackToVALU(Worklist, MRI, Inst);
				4467	Inst.eraseFromParent();
				4468	continue;
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4469
				4470	case AMDGPU::S_XNOR_B32:
				4471	lowerScalarXnor(Worklist, Inst);
				4472	Inst.eraseFromParent();
				4473	continue;
				4474
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4475	case AMDGPU::S_NAND_B32:
				4476	splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
				4477	Inst.eraseFromParent();
				4478	continue;
				4479
				4480	case AMDGPU::S_NOR_B32:
				4481	splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
				4482	Inst.eraseFromParent();
				4483	continue;
				4484
				4485	case AMDGPU::S_ANDN2_B32:
				4486	splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
				4487	Inst.eraseFromParent();
				4488	continue;
				4489
				4490	case AMDGPU::S_ORN2_B32:
				4491	splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4492	Inst.eraseFromParent();
				4493	continue;
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	4494	}
Tom Stellard	e038720	2014-03-21 15:51:54 +0000	[diff] [blame]	4495
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4496	if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
				4497	// We cannot move this instruction to the VALU, so we should try to
				4498	// legalize its operands instead.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4499	legalizeOperands(Inst, MDT);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4500	continue;
Tom Stellard	1583409	2014-03-21 15:51:57 +0000	[diff] [blame]	4501	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4502
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4503	// Use the new VALU Opcode.
				4504	const MCInstrDesc &NewDesc = get(NewOpcode);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4505	Inst.setDesc(NewDesc);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4506
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	4507	// Remove any references to SCC. Vector instructions can't read from it, and
				4508	// We're just about to add the implicit use / defs of VCC, and we don't want
				4509	// both.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4510	for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
				4511	MachineOperand &Op = Inst.getOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4512	if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
Michael Liao	6883d7e	2019-03-15 12:42:21 +0000	[diff] [blame]	4513	// Only propagate through live-def of SCC.
				4514	if (Op.isDef() && !Op.isDead())
				4515	addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4516	Inst.RemoveOperand(i);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4517	}
Matt Arsenault	f0b1e3a	2013-11-18 20:09:21 +0000	[diff] [blame]	4518	}
				4519
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4520	if (Opcode == AMDGPU::S_SEXT_I32_I8 \|\| Opcode == AMDGPU::S_SEXT_I32_I16) {
				4521	// We are converting these to a BFE, so we need to add the missing
				4522	// operands for the size and offset.
				4523	unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4524	Inst.addOperand(MachineOperand::CreateImm(0));
				4525	Inst.addOperand(MachineOperand::CreateImm(Size));
Matt Arsenault	27cc958	2014-04-18 01:53:18 +0000	[diff] [blame]	4526
Matt Arsenault	b5b5110	2014-06-10 19:18:21 +0000	[diff] [blame]	4527	} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
				4528	// The VALU version adds the second operand to the result, so insert an
				4529	// extra 0 operand.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4530	Inst.addOperand(MachineOperand::CreateImm(0));
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4531	}
				4532
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4533	Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4534
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4535	if (Opcode == AMDGPU::S_BFE_I32 \|\| Opcode == AMDGPU::S_BFE_U32) {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4536	const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4537	// If we need to move this to VGPRs, we need to unpack the second operand
				4538	// back into the 2 separate ones for bit offset and width.
				4539	assert(OffsetWidthOp.isImm() &&
				4540	"Scalar BFE is only implemented for constant width and offset");
				4541	uint32_t Imm = OffsetWidthOp.getImm();
				4542
				4543	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				4544	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4545	Inst.RemoveOperand(2); // Remove old immediate.
				4546	Inst.addOperand(MachineOperand::CreateImm(Offset));
				4547	Inst.addOperand(MachineOperand::CreateImm(BitWidth));
Matt Arsenault	78b8670	2014-04-18 05:19:26 +0000	[diff] [blame]	4548	}
				4549
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4550	bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4551	unsigned NewDstReg = AMDGPU::NoRegister;
				4552	if (HasDst) {
Matt Arsenault	21a4382	2017-04-06 21:09:53 +0000	[diff] [blame]	4553	unsigned DstReg = Inst.getOperand(0).getReg();
				4554	if (TargetRegisterInfo::isPhysicalRegister(DstReg))
				4555	continue;
				4556
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4557	// Update the destination register class.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4558	const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4559	if (!NewDstRC)
				4560	continue;
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4561
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4562	if (Inst.isCopy() &&
				4563	TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
				4564	NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
				4565	// Instead of creating a copy where src and dst are the same register
				4566	// class, we just replace all uses of dst with src. These kinds of
				4567	// copies interfere with the heuristics MachineSink uses to decide
				4568	// whether or not to split a critical edge. Since the pass assumes
				4569	// that copies will end up as machine instructions and not be
				4570	// eliminated.
				4571	addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
				4572	MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
				4573	MRI.clearKillFlags(Inst.getOperand(1).getReg());
				4574	Inst.getOperand(0).setReg(DstReg);
Matt Arsenault	69932e4	2018-03-19 14:07:15 +0000	[diff] [blame]	4575
				4576	// Make sure we don't leave around a dead VGPR->SGPR copy. Normally
				4577	// these are deleted later, but at -O0 it would leave a suspicious
				4578	// looking illegal copy of an undef register.
				4579	for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
				4580	Inst.RemoveOperand(I);
				4581	Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
Tom Stellard	0d162b1	2016-11-16 18:42:17 +0000	[diff] [blame]	4582	continue;
				4583	}
				4584
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4585	NewDstReg = MRI.createVirtualRegister(NewDstRC);
				4586	MRI.replaceRegWith(DstReg, NewDstReg);
				4587	}
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4588
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	4589	// Legalize the operands
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4590	legalizeOperands(Inst, MDT);
Tom Stellard	e1a2445	2014-04-17 21:00:01 +0000	[diff] [blame]	4591
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	4592	if (HasDst)
				4593	addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
Tom Stellard	8216602	2013-11-13 23:36:37 +0000	[diff] [blame]	4594	}
				4595	}
				4596
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4597	// Add/sub require special handling to deal with carry outs.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4598	bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
				4599	MachineDominatorTree *MDT) const {
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4600	if (ST.hasAddNoCarry()) {
				4601	// Assume there is no user of scc since we don't select this in that case.
				4602	// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
				4603	// is used.
				4604
				4605	MachineBasicBlock &MBB = *Inst.getParent();
				4606	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4607
				4608	unsigned OldDstReg = Inst.getOperand(0).getReg();
				4609	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4610
				4611	unsigned Opc = Inst.getOpcode();
				4612	assert(Opc == AMDGPU::S_ADD_I32 \|\| Opc == AMDGPU::S_SUB_I32);
				4613
				4614	unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
				4615	AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
				4616
				4617	assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
				4618	Inst.RemoveOperand(3);
				4619
				4620	Inst.setDesc(get(NewOpc));
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	4621	Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4622	Inst.addImplicitDefUseOperands(*MBB.getParent());
				4623	MRI.replaceRegWith(OldDstReg, ResultReg);
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4624	legalizeOperands(Inst, MDT);
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4625
				4626	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				4627	return true;
				4628	}
				4629
				4630	return false;
				4631	}
				4632
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4633	void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4634	MachineInstr &Inst) const {
				4635	MachineBasicBlock &MBB = *Inst.getParent();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4636	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4637	MachineBasicBlock::iterator MII = Inst;
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4638	DebugLoc DL = Inst.getDebugLoc();
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4639
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4640	MachineOperand &Dest = Inst.getOperand(0);
				4641	MachineOperand &Src = Inst.getOperand(1);
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4642	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4643	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4644
Matt Arsenault	84445dd	2017-11-30 22:51:26 +0000	[diff] [blame]	4645	unsigned SubOp = ST.hasAddNoCarry() ?
				4646	AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
				4647
				4648	BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
Marek Olsak	7ed6b2f	2015-11-25 21:22:45 +0000	[diff] [blame]	4649	.addImm(0)
				4650	.addReg(Src.getReg());
				4651
				4652	BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
				4653	.addReg(Src.getReg())
				4654	.addReg(TmpReg);
				4655
				4656	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				4657	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				4658	}
				4659
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4660	void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
				4661	MachineInstr &Inst) const {
				4662	MachineBasicBlock &MBB = *Inst.getParent();
				4663	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4664	MachineBasicBlock::iterator MII = Inst;
				4665	const DebugLoc &DL = Inst.getDebugLoc();
				4666
				4667	MachineOperand &Dest = Inst.getOperand(0);
				4668	MachineOperand &Src0 = Inst.getOperand(1);
				4669	MachineOperand &Src1 = Inst.getOperand(2);
				4670
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4671	if (ST.hasDLInsts()) {
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4672	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4673	legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
				4674	legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
				4675
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4676	BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
				4677	.add(Src0)
				4678	.add(Src1);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4679
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4680	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4681	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
				4682	} else {
				4683	// Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
				4684	// invert either source and then perform the XOR. If either source is a
				4685	// scalar register, then we can leave the inversion on the scalar unit to
				4686	// acheive a better distrubution of scalar and vector instructions.
				4687	bool Src0IsSGPR = Src0.isReg() &&
				4688	RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
				4689	bool Src1IsSGPR = Src1.isReg() &&
				4690	RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
				4691	MachineInstr *Not = nullptr;
				4692	MachineInstr *Xor = nullptr;
				4693	unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4694	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4695
				4696	// Build a pair of scalar instructions and add them to the work list.
				4697	// The next iteration over the work list will lower these to the vector
				4698	// unit as necessary.
				4699	if (Src0IsSGPR) {
				4700	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
				4701	.add(Src0);
				4702	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
				4703	.addReg(Temp)
				4704	.add(Src1);
				4705	} else if (Src1IsSGPR) {
				4706	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
				4707	.add(Src1);
				4708	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
				4709	.add(Src0)
				4710	.addReg(Temp);
				4711	} else {
				4712	Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
				4713	.add(Src0)
				4714	.add(Src1);
				4715	Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
				4716	.addReg(Temp);
				4717	Worklist.insert(Not);
				4718	}
				4719
				4720	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4721
				4722	Worklist.insert(Xor);
				4723
				4724	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4725	}
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4726	}
				4727
				4728	void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
				4729	MachineInstr &Inst,
				4730	unsigned Opcode) const {
				4731	MachineBasicBlock &MBB = *Inst.getParent();
				4732	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4733	MachineBasicBlock::iterator MII = Inst;
				4734	const DebugLoc &DL = Inst.getDebugLoc();
				4735
				4736	MachineOperand &Dest = Inst.getOperand(0);
				4737	MachineOperand &Src0 = Inst.getOperand(1);
				4738	MachineOperand &Src1 = Inst.getOperand(2);
				4739
				4740	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4741	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4742
				4743	MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
				4744	.add(Src0)
				4745	.add(Src1);
				4746
				4747	MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
				4748	.addReg(Interm);
				4749
				4750	Worklist.insert(&Op);
				4751	Worklist.insert(&Not);
				4752
				4753	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4754	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
				4755	}
				4756
				4757	void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
				4758	MachineInstr &Inst,
				4759	unsigned Opcode) const {
				4760	MachineBasicBlock &MBB = *Inst.getParent();
				4761	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4762	MachineBasicBlock::iterator MII = Inst;
				4763	const DebugLoc &DL = Inst.getDebugLoc();
				4764
				4765	MachineOperand &Dest = Inst.getOperand(0);
				4766	MachineOperand &Src0 = Inst.getOperand(1);
				4767	MachineOperand &Src1 = Inst.getOperand(2);
				4768
				4769	unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4770	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
				4771
				4772	MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
				4773	.add(Src1);
				4774
				4775	MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
				4776	.add(Src0)
				4777	.addReg(Interm);
				4778
				4779	Worklist.insert(&Not);
				4780	Worklist.insert(&Op);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4781
Matt Arsenault	0084adc	2018-04-30 19:08:16 +0000	[diff] [blame]	4782	MRI.replaceRegWith(Dest.getReg(), NewDest);
				4783	addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
Konstantin Zhuravlyov	ca8946a	2017-09-18 21:22:45 +0000	[diff] [blame]	4784	}
				4785
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4786	void SIInstrInfo::splitScalar64BitUnaryOp(
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	4787	SetVectorType &Worklist, MachineInstr &Inst,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4788	unsigned Opcode) const {
				4789	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4790	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4791
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4792	MachineOperand &Dest = Inst.getOperand(0);
				4793	MachineOperand &Src0 = Inst.getOperand(1);
				4794	DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4795
				4796	MachineBasicBlock::iterator MII = Inst;
				4797
				4798	const MCInstrDesc &InstDesc = get(Opcode);
				4799	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				4800	MRI.getRegClass(Src0.getReg()) :
				4801	&AMDGPU::SGPR_32RegClass;
				4802
				4803	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4804
				4805	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4806	AMDGPU::sub0, Src0SubRC);
				4807
				4808	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4809	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				4810	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4811
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4812	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4813	MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4814
				4815	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4816	AMDGPU::sub1, Src0SubRC);
				4817
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4818	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4819	MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4820
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4821	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4822	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4823	.addReg(DestSub0)
				4824	.addImm(AMDGPU::sub0)
				4825	.addReg(DestSub1)
				4826	.addImm(AMDGPU::sub1);
				4827
				4828	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4829
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4830	Worklist.insert(&LoHalf);
				4831	Worklist.insert(&HiHalf);
				4832
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4833	// We don't need to legalizeOperands here because for a single operand, src0
				4834	// will support any kind of input.
				4835
				4836	// Move all users of this moved value.
				4837	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	689f325	2014-06-09 16:36:31 +0000	[diff] [blame]	4838	}
				4839
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4840	void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
				4841	MachineInstr &Inst,
				4842	MachineDominatorTree *MDT) const {
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4843	bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
				4844
				4845	MachineBasicBlock &MBB = *Inst.getParent();
				4846	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4847
				4848	unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				4849	unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4850	unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				4851
				4852	unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				4853	unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
				4854
				4855	MachineOperand &Dest = Inst.getOperand(0);
				4856	MachineOperand &Src0 = Inst.getOperand(1);
				4857	MachineOperand &Src1 = Inst.getOperand(2);
				4858	const DebugLoc &DL = Inst.getDebugLoc();
				4859	MachineBasicBlock::iterator MII = Inst;
				4860
				4861	const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
				4862	const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
				4863	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4864	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				4865
				4866	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4867	AMDGPU::sub0, Src0SubRC);
				4868	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4869	AMDGPU::sub0, Src1SubRC);
				4870
				4871
				4872	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4873	AMDGPU::sub1, Src0SubRC);
				4874	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4875	AMDGPU::sub1, Src1SubRC);
				4876
				4877	unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
				4878	MachineInstr *LoHalf =
				4879	BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
				4880	.addReg(CarryReg, RegState::Define)
				4881	.add(SrcReg0Sub0)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	4882	.add(SrcReg1Sub0)
				4883	.addImm(0); // clamp bit
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4884
				4885	unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
				4886	MachineInstr *HiHalf =
				4887	BuildMI(MBB, MII, DL, get(HiOpc), DestSub1)
				4888	.addReg(DeadCarryReg, RegState::Define \| RegState::Dead)
				4889	.add(SrcReg0Sub1)
				4890	.add(SrcReg1Sub1)
Tim Renouf	cfdfba9	2019-03-18 19:35:44 +0000	[diff] [blame]	4891	.addReg(CarryReg, RegState::Kill)
				4892	.addImm(0); // clamp bit
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4893
				4894	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4895	.addReg(DestSub0)
				4896	.addImm(AMDGPU::sub0)
				4897	.addReg(DestSub1)
				4898	.addImm(AMDGPU::sub1);
				4899
				4900	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4901
				4902	// Try to legalize the operands in case we need to swap the order to keep it
				4903	// valid.
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4904	legalizeOperands(*LoHalf, MDT);
				4905	legalizeOperands(*HiHalf, MDT);
Matt Arsenault	301162c	2017-11-15 21:51:43 +0000	[diff] [blame]	4906
				4907	// Move all users of this moved vlaue.
				4908	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
				4909	}
				4910
Scott Linder	823549a	2018-10-08 18:47:01 +0000	[diff] [blame]	4911	void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
				4912	MachineInstr &Inst, unsigned Opcode,
				4913	MachineDominatorTree *MDT) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4914	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4915	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4916
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4917	MachineOperand &Dest = Inst.getOperand(0);
				4918	MachineOperand &Src0 = Inst.getOperand(1);
				4919	MachineOperand &Src1 = Inst.getOperand(2);
				4920	DebugLoc DL = Inst.getDebugLoc();
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4921
				4922	MachineBasicBlock::iterator MII = Inst;
				4923
				4924	const MCInstrDesc &InstDesc = get(Opcode);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4925	const TargetRegisterClass *Src0RC = Src0.isReg() ?
				4926	MRI.getRegClass(Src0.getReg()) :
				4927	&AMDGPU::SGPR_32RegClass;
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4928
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4929	const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
				4930	const TargetRegisterClass *Src1RC = Src1.isReg() ?
				4931	MRI.getRegClass(Src1.getReg()) :
				4932	&AMDGPU::SGPR_32RegClass;
				4933
				4934	const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
				4935
				4936	MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4937	AMDGPU::sub0, Src0SubRC);
				4938	MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4939	AMDGPU::sub0, Src1SubRC);
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4940	MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
				4941	AMDGPU::sub1, Src0SubRC);
				4942	MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
				4943	AMDGPU::sub1, Src1SubRC);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4944
				4945	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4946	const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
				4947	const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
Matt Arsenault	684dc80	2014-03-24 20:08:13 +0000	[diff] [blame]	4948
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4949	unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4950	MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4951	.add(SrcReg0Sub0)
				4952	.add(SrcReg1Sub0);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4953
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4954	unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	4955	MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	4956	.add(SrcReg0Sub1)
				4957	.add(SrcReg1Sub1);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4958
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4959	unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4960	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
				4961	.addReg(DestSub0)
				4962	.addImm(AMDGPU::sub0)
				4963	.addReg(DestSub1)
				4964	.addImm(AMDGPU::sub1);
				4965
				4966	MRI.replaceRegWith(Dest.getReg(), FullDestReg);
				4967
Graham Sellers	04f7a4d	2018-11-29 16:05:38 +0000	[diff] [blame]	4968	Worklist.insert(&LoHalf);
				4969	Worklist.insert(&HiHalf);
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	4970
				4971	// Move all users of this moved vlaue.
				4972	addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
Matt Arsenault	f35182c	2014-03-24 20:08:05 +0000	[diff] [blame]	4973	}
				4974
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	4975	void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
				4976	MachineInstr &Inst,
				4977	MachineDominatorTree *MDT) const {
				4978	MachineBasicBlock &MBB = *Inst.getParent();
				4979	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				4980
				4981	MachineOperand &Dest = Inst.getOperand(0);
				4982	MachineOperand &Src0 = Inst.getOperand(1);
				4983	MachineOperand &Src1 = Inst.getOperand(2);
				4984	const DebugLoc &DL = Inst.getDebugLoc();
				4985
				4986	MachineBasicBlock::iterator MII = Inst;
				4987
				4988	const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
				4989
				4990	unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				4991
				4992	MachineOperand* Op0;
				4993	MachineOperand* Op1;
				4994
				4995	if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
				4996	Op0 = &Src0;
				4997	Op1 = &Src1;
				4998	} else {
				4999	Op0 = &Src1;
				5000	Op1 = &Src0;
				5001	}
				5002
				5003	BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
				5004	.add(*Op0);
				5005
				5006	unsigned NewDest = MRI.createVirtualRegister(DestRC);
				5007
				5008	MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
				5009	.addReg(Interm)
				5010	.add(*Op1);
				5011
				5012	MRI.replaceRegWith(Dest.getReg(), NewDest);
				5013
				5014	Worklist.insert(&Xor);
				5015	}
				5016
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5017	void SIInstrInfo::splitScalar64BitBCNT(
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5018	SetVectorType &Worklist, MachineInstr &Inst) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5019	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	5020	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				5021
				5022	MachineBasicBlock::iterator MII = Inst;
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	5023	const DebugLoc &DL = Inst.getDebugLoc();
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	5024
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5025	MachineOperand &Dest = Inst.getOperand(0);
				5026	MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	5027
Marek Olsak	c536850	2015-01-15 18:43:01 +0000	[diff] [blame]	5028	const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	5029	const TargetRegisterClass *SrcRC = Src.isReg() ?
				5030	MRI.getRegClass(Src.getReg()) :
				5031	&AMDGPU::SGPR_32RegClass;
				5032
				5033	unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5034	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5035
				5036	const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
				5037
				5038	MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				5039	AMDGPU::sub0, SrcSubRC);
				5040	MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
				5041	AMDGPU::sub1, SrcSubRC);
				5042
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	5043	BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	5044
Diana Picus	116bbab	2017-01-13 09:58:52 +0000	[diff] [blame]	5045	BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	5046
				5047	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				5048
Matt Arsenault	5e7f95e	2015-08-26 20:48:04 +0000	[diff] [blame]	5049	// We don't need to legalize operands here. src0 for etiher instruction can be
				5050	// an SGPR, and the second input is unused or determined here.
				5051	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	8333e43	2014-06-10 19:18:24 +0000	[diff] [blame]	5052	}
				5053
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5054	void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5055	MachineInstr &Inst) const {
				5056	MachineBasicBlock &MBB = *Inst.getParent();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5057	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
				5058	MachineBasicBlock::iterator MII = Inst;
Graham Sellers	ba559ac	2018-12-01 12:27:53 +0000	[diff] [blame]	5059	const DebugLoc &DL = Inst.getDebugLoc();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5060
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5061	MachineOperand &Dest = Inst.getOperand(0);
				5062	uint32_t Imm = Inst.getOperand(2).getImm();
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5063	uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
				5064	uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
				5065
Matt Arsenault	6ad3426	2014-11-14 18:40:49 +0000	[diff] [blame]	5066	(void) Offset;
				5067
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5068	// Only sext_inreg cases handled.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5069	assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
				5070	Offset == 0 && "Not implemented");
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5071
				5072	if (BitWidth < 32) {
				5073	unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5074	unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5075	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				5076
				5077	BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5078	.addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0)
				5079	.addImm(0)
				5080	.addImm(BitWidth);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5081
				5082	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
				5083	.addImm(31)
				5084	.addReg(MidRegLo);
				5085
				5086	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				5087	.addReg(MidRegLo)
				5088	.addImm(AMDGPU::sub0)
				5089	.addReg(MidRegHi)
				5090	.addImm(AMDGPU::sub1);
				5091
				5092	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	5093	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5094	return;
				5095	}
				5096
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5097	MachineOperand &Src = Inst.getOperand(1);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5098	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5099	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
				5100
				5101	BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
				5102	.addImm(31)
				5103	.addReg(Src.getReg(), 0, AMDGPU::sub0);
				5104
				5105	BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
				5106	.addReg(Src.getReg(), 0, AMDGPU::sub0)
				5107	.addImm(AMDGPU::sub0)
				5108	.addReg(TmpReg)
				5109	.addImm(AMDGPU::sub1);
				5110
				5111	MRI.replaceRegWith(Dest.getReg(), ResultReg);
Matt Arsenault	445833c	2015-08-26 20:47:58 +0000	[diff] [blame]	5112	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
Matt Arsenault	9481221	2014-11-14 18:18:16 +0000	[diff] [blame]	5113	}
				5114
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5115	void SIInstrInfo::addUsersToMoveToVALUWorklist(
				5116	unsigned DstReg,
				5117	MachineRegisterInfo &MRI,
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5118	SetVectorType &Worklist) const {
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5119	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
Matt Arsenault	4c1e9ec	2016-12-20 18:55:06 +0000	[diff] [blame]	5120	E = MRI.use_end(); I != E;) {
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5121	MachineInstr &UseMI = *I->getParent();
Neil Henning	0799352	2019-01-29 14:28:17 +0000	[diff] [blame]	5122
				5123	unsigned OpNo = 0;
				5124
				5125	switch (UseMI.getOpcode()) {
				5126	case AMDGPU::COPY:
				5127	case AMDGPU::WQM:
				5128	case AMDGPU::WWM:
				5129	case AMDGPU::REG_SEQUENCE:
				5130	case AMDGPU::PHI:
				5131	case AMDGPU::INSERT_SUBREG:
				5132	break;
				5133	default:
				5134	OpNo = I.getOperandNo();
				5135	break;
				5136	}
				5137
				5138	if (!RI.hasVGPRs(getOpRegClass(UseMI, OpNo))) {
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5139	Worklist.insert(&UseMI);
Matt Arsenault	4c1e9ec	2016-12-20 18:55:06 +0000	[diff] [blame]	5140
				5141	do {
				5142	++I;
				5143	} while (I != E && I->getParent() == &UseMI);
				5144	} else {
				5145	++I;
Matt Arsenault	f003c38	2015-08-26 20:47:50 +0000	[diff] [blame]	5146	}
				5147	}
				5148	}
				5149
Alfred Huang	5b27072	2017-07-14 17:56:55 +0000	[diff] [blame]	5150	void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5151	MachineRegisterInfo &MRI,
				5152	MachineInstr &Inst) const {
				5153	unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5154	MachineBasicBlock *MBB = Inst.getParent();
				5155	MachineOperand &Src0 = Inst.getOperand(1);
				5156	MachineOperand &Src1 = Inst.getOperand(2);
				5157	const DebugLoc &DL = Inst.getDebugLoc();
				5158
				5159	switch (Inst.getOpcode()) {
				5160	case AMDGPU::S_PACK_LL_B32_B16: {
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5161	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5162	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5163
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5164	// FIXME: Can do a lot better if we know the high bits of src0 or src1 are
				5165	// 0.
				5166	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
				5167	.addImm(0xffff);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5168
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5169	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
				5170	.addReg(ImmReg, RegState::Kill)
				5171	.add(Src0);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5172
Konstantin Zhuravlyov	d24aeb2	2017-04-13 23:17:00 +0000	[diff] [blame]	5173	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
				5174	.add(Src1)
				5175	.addImm(16)
				5176	.addReg(TmpReg, RegState::Kill);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5177	break;
				5178	}
				5179	case AMDGPU::S_PACK_LH_B32_B16: {
				5180	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5181	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
				5182	.addImm(0xffff);
				5183	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
				5184	.addReg(ImmReg, RegState::Kill)
				5185	.add(Src0)
				5186	.add(Src1);
				5187	break;
				5188	}
				5189	case AMDGPU::S_PACK_HH_B32_B16: {
				5190	unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5191	unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				5192	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
				5193	.addImm(16)
				5194	.add(Src0);
				5195	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
Konstantin Zhuravlyov	88938d4	2017-04-21 19:35:05 +0000	[diff] [blame]	5196	.addImm(0xffff0000);
Matt Arsenault	eb522e6	2017-02-27 22:15:25 +0000	[diff] [blame]	5197	BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
				5198	.add(Src1)
				5199	.addReg(ImmReg, RegState::Kill)
				5200	.addReg(TmpReg, RegState::Kill);
				5201	break;
				5202	}
				5203	default:
				5204	llvm_unreachable("unhandled s_pack_* instruction");
				5205	}
				5206
				5207	MachineOperand &Dest = Inst.getOperand(0);
				5208	MRI.replaceRegWith(Dest.getReg(), ResultReg);
				5209	addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
				5210	}
				5211
Michael Liao	6883d7e	2019-03-15 12:42:21 +0000	[diff] [blame]	5212	void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
				5213	MachineInstr &SCCDefInst,
				5214	SetVectorType &Worklist) const {
				5215	// Ensure that def inst defines SCC, which is still live.
				5216	assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
				5217	!Op.isDead() && Op.getParent() == &SCCDefInst);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5218	// This assumes that all the users of SCC are in the same block
				5219	// as the SCC def.
Michael Liao	6883d7e	2019-03-15 12:42:21 +0000	[diff] [blame]	5220	for (MachineInstr &MI : // Skip the def inst itself.
				5221	make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
				5222	SCCDefInst.getParent()->end())) {
				5223	// Check if SCC is used first.
				5224	if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
				5225	Worklist.insert(&MI);
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5226	// Exit if we find another SCC def.
Stanislav Mekhanoshin	13d3371	2018-11-09 17:58:59 +0000	[diff] [blame]	5227	if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5228	return;
Tom Stellard	bc4497b	2016-02-12 23:45:29 +0000	[diff] [blame]	5229	}
				5230	}
				5231
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	5232	const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
				5233	const MachineInstr &Inst) const {
				5234	const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
				5235
				5236	switch (Inst.getOpcode()) {
				5237	// For target instructions, getOpRegClass just returns the virtual register
				5238	// class associated with the operand, so we need to find an equivalent VGPR
				5239	// register class in order to move the instruction to the VALU.
				5240	case AMDGPU::COPY:
				5241	case AMDGPU::PHI:
				5242	case AMDGPU::REG_SEQUENCE:
				5243	case AMDGPU::INSERT_SUBREG:
Connor Abbott	8c217d0	2017-08-04 18:36:49 +0000	[diff] [blame]	5244	case AMDGPU::WQM:
Connor Abbott	92638ab	2017-08-04 18:36:52 +0000	[diff] [blame]	5245	case AMDGPU::WWM:
Matt Arsenault	ba6aae7	2015-09-28 20:54:57 +0000	[diff] [blame]	5246	if (RI.hasVGPRs(NewDstRC))
				5247	return nullptr;
				5248
				5249	NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
				5250	if (!NewDstRC)
				5251	return nullptr;
				5252	return NewDstRC;
				5253	default:
				5254	return NewDstRC;
				5255	}
				5256	}
				5257
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5258	// Find the one SGPR operand we are allowed to use.
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5259	unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5260	int OpIndices[3]) const {
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5261	const MCInstrDesc &Desc = MI.getDesc();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5262
				5263	// Find the one SGPR operand we are allowed to use.
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	5264	//
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5265	// First we need to consider the instruction's operand requirements before
				5266	// legalizing. Some operands are required to be SGPRs, such as implicit uses
				5267	// of VCC, but we are still bound by the constant bus requirement to only use
				5268	// one.
				5269	//
				5270	// If the operand's class is an SGPR, we can never move it.
				5271
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5272	unsigned SGPRReg = findImplicitSGPRRead(MI);
Matt Arsenault	e223ceb	2015-10-21 21:15:01 +0000	[diff] [blame]	5273	if (SGPRReg != AMDGPU::NoRegister)
				5274	return SGPRReg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5275
				5276	unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5277	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5278
				5279	for (unsigned i = 0; i < 3; ++i) {
				5280	int Idx = OpIndices[i];
				5281	if (Idx == -1)
				5282	break;
				5283
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5284	const MachineOperand &MO = MI.getOperand(Idx);
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5285	if (!MO.isReg())
				5286	continue;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5287
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5288	// Is this operand statically required to be an SGPR based on the operand
				5289	// constraints?
				5290	const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
				5291	bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
				5292	if (IsRequiredSGPR)
				5293	return MO.getReg();
				5294
				5295	// If this could be a VGPR or an SGPR, Check the dynamic register class.
				5296	unsigned Reg = MO.getReg();
				5297	const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
				5298	if (RI.isSGPRClass(RegRC))
				5299	UsedSGPRs[i] = Reg;
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5300	}
				5301
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5302	// We don't have a required SGPR operand, so we have a bit more freedom in
				5303	// selecting operands to move.
				5304
				5305	// Try to select the most used SGPR. If an SGPR is equal to one of the
				5306	// others, we choose that.
				5307	//
				5308	// e.g.
				5309	// V_FMA_F32 v0, s0, s0, s0 -> No moves
				5310	// V_FMA_F32 v0, s0, s1, s0 -> Move s1
				5311
Matt Arsenault	6c06741	2015-11-03 22:30:15 +0000	[diff] [blame]	5312	// TODO: If some of the operands are 64-bit SGPRs and some 32, we should
				5313	// prefer those.
				5314
Matt Arsenault	ee522bf	2014-09-26 17:55:06 +0000	[diff] [blame]	5315	if (UsedSGPRs[0] != AMDGPU::NoRegister) {
				5316	if (UsedSGPRs[0] == UsedSGPRs[1] \|\| UsedSGPRs[0] == UsedSGPRs[2])
				5317	SGPRReg = UsedSGPRs[0];
				5318	}
				5319
				5320	if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
				5321	if (UsedSGPRs[1] == UsedSGPRs[2])
				5322	SGPRReg = UsedSGPRs[1];
				5323	}
				5324
				5325	return SGPRReg;
				5326	}
				5327
Tom Stellard	6407e1e	2014-08-01 00:32:33 +0000	[diff] [blame]	5328	MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
Matt Arsenault	ace5b76	2014-10-17 18:00:43 +0000	[diff] [blame]	5329	unsigned OperandName) const {
Tom Stellard	1aaad69	2014-07-21 16:55:33 +0000	[diff] [blame]	5330	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
				5331	if (Idx == -1)
				5332	return nullptr;
				5333
				5334	return &MI.getOperand(Idx);
				5335	}
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5336
				5337	uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
				5338	uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	5339	if (ST.isAmdHsaOS()) {
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5340	// Set ATC = 1. GFX9 doesn't have this bit.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5341	if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5342	RsrcDataFormat \|= (1ULL << 56);
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5343
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5344	// Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
				5345	// BTW, it disables TC L2 and therefore decreases performance.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5346	if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS)
Michel Danzer	beb79ce	2016-03-16 09:10:35 +0000	[diff] [blame]	5347	RsrcDataFormat \|= (2ULL << 59);
Tom Stellard	4694ed0	2015-06-26 21:58:42 +0000	[diff] [blame]	5348	}
				5349
Tom Stellard	794c8c0	2014-12-02 17:05:41 +0000	[diff] [blame]	5350	return RsrcDataFormat;
				5351	}
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5352
				5353	uint64_t SIInstrInfo::getScratchRsrcWords23() const {
				5354	uint64_t Rsrc23 = getDefaultRsrcDataFormat() \|
				5355	AMDGPU::RSRC_TID_ENABLE \|
				5356	0xffffffff; // Size;
				5357
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5358	// GFX9 doesn't have ELEMENT_SIZE.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5359	if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5360	uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
				5361	Rsrc23 \|= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
				5362	}
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	5363
Marek Olsak	5c7a61d	2017-03-21 17:00:39 +0000	[diff] [blame]	5364	// IndexStride = 64.
				5365	Rsrc23 \|= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
Matt Arsenault	24ee078	2016-02-12 02:40:47 +0000	[diff] [blame]	5366
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5367	// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
				5368	// Clear them unless we want a huge stride.
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5369	if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Marek Olsak	d1a69a2	2015-09-29 23:37:32 +0000	[diff] [blame]	5370	Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
				5371
				5372	return Rsrc23;
				5373	}
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5374
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5375	bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
				5376	unsigned Opc = MI.getOpcode();
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5377
				5378	return isSMRD(Opc);
				5379	}
				5380
Duncan P. N. Exon Smith	9cfc75c	2016-06-30 00:01:54 +0000	[diff] [blame]	5381	bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
				5382	unsigned Opc = MI.getOpcode();
Nicolai Haehnle	02c3291	2016-01-13 16:10:10 +0000	[diff] [blame]	5383
				5384	return isMUBUF(Opc) \|\| isMTBUF(Opc) \|\| isMIMG(Opc);
				5385	}
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	5386
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5387	unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
				5388	int &FrameIndex) const {
				5389	const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
				5390	if (!Addr \|\| !Addr->isFI())
				5391	return AMDGPU::NoRegister;
				5392
				5393	assert(!MI.memoperands_empty() &&
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	5394	(*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5395
				5396	FrameIndex = Addr->getIndex();
				5397	return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
				5398	}
				5399
				5400	unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
				5401	int &FrameIndex) const {
				5402	const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
				5403	assert(Addr && Addr->isFI());
				5404	FrameIndex = Addr->getIndex();
				5405	return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
				5406	}
				5407
				5408	unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
				5409	int &FrameIndex) const {
Matt Arsenault	3354f42	2016-09-10 01:20:33 +0000	[diff] [blame]	5410	if (!MI.mayLoad())
				5411	return AMDGPU::NoRegister;
				5412
				5413	if (isMUBUF(MI) \|\| isVGPRSpill(MI))
				5414	return isStackAccess(MI, FrameIndex);
				5415
				5416	if (isSGPRSpill(MI))
				5417	return isSGPRStackAccess(MI, FrameIndex);
				5418
				5419	return AMDGPU::NoRegister;
				5420	}
				5421
				5422	unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
				5423	int &FrameIndex) const {
				5424	if (!MI.mayStore())
				5425	return AMDGPU::NoRegister;
				5426
				5427	if (isMUBUF(MI) \|\| isVGPRSpill(MI))
				5428	return isStackAccess(MI, FrameIndex);
				5429
				5430	if (isSGPRSpill(MI))
				5431	return isSGPRStackAccess(MI, FrameIndex);
				5432
				5433	return AMDGPU::NoRegister;
				5434	}
				5435
Matt Arsenault	9ab1fa6	2017-10-04 22:59:12 +0000	[diff] [blame]	5436	unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const {
				5437	unsigned Size = 0;
				5438	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
				5439	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
				5440	while (++I != E && I->isInsideBundle()) {
				5441	assert(!I->isBundle() && "No nested bundle!");
				5442	Size += getInstSizeInBytes(*I);
				5443	}
				5444
				5445	return Size;
				5446	}
				5447
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5448	unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
				5449	unsigned Opc = MI.getOpcode();
				5450	const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
				5451	unsigned DescSize = Desc.getSize();
				5452
				5453	// If we have a definitive size, we can use it. Otherwise we need to inspect
				5454	// the operands to know the size.
Matt Arsenault	0183c56	2018-07-27 09:15:03 +0000	[diff] [blame]	5455	if (isFixedSize(MI))
				5456	return DescSize;
				5457
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5458	// 4-byte instructions may have a 32-bit literal encoded after them. Check
				5459	// operands that coud ever be literals.
				5460	if (isVALU(MI) \|\| isSALU(MI)) {
				5461	int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
				5462	if (Src0Idx == -1)
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5463	return DescSize; // No operands.
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5464
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	5465	if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
Stanislav Mekhanoshin	692560d	2019-05-01 16:32:58 +0000	[diff] [blame]	5466	return isVOP3(MI) ? 12 : (DescSize + 4);
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5467
				5468	int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
				5469	if (Src1Idx == -1)
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5470	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5471
Matt Arsenault	4bd7236	2016-12-10 00:39:12 +0000	[diff] [blame]	5472	if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
Stanislav Mekhanoshin	692560d	2019-05-01 16:32:58 +0000	[diff] [blame]	5473	return isVOP3(MI) ? 12 : (DescSize + 4);
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5474
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5475	int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
				5476	if (Src2Idx == -1)
				5477	return DescSize;
				5478
				5479	if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
Stanislav Mekhanoshin	692560d	2019-05-01 16:32:58 +0000	[diff] [blame]	5480	return isVOP3(MI) ? 12 : (DescSize + 4);
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5481
				5482	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5483	}
				5484
Stanislav Mekhanoshin	692560d	2019-05-01 16:32:58 +0000	[diff] [blame]	5485	// Check whether we have extra NSA words.
				5486	if (isMIMG(MI)) {
				5487	int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
				5488	if (VAddr0Idx < 0)
				5489	return 8;
				5490
				5491	int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
				5492	return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
				5493	}
				5494
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5495	switch (Opc) {
				5496	case TargetOpcode::IMPLICIT_DEF:
				5497	case TargetOpcode::KILL:
				5498	case TargetOpcode::DBG_VALUE:
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5499	case TargetOpcode::EH_LABEL:
				5500	return 0;
Matt Arsenault	9ab1fa6	2017-10-04 22:59:12 +0000	[diff] [blame]	5501	case TargetOpcode::BUNDLE:
				5502	return getInstBundleSize(MI);
Craig Topper	784929d	2019-02-08 20:48:56 +0000	[diff] [blame]	5503	case TargetOpcode::INLINEASM:
				5504	case TargetOpcode::INLINEASM_BR: {
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5505	const MachineFunction *MF = MI.getParent()->getParent();
				5506	const char *AsmStr = MI.getOperand(0).getSymbolName();
				5507	return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
				5508	}
				5509	default:
Nicolai Haehnle	283b995	2018-08-29 07:46:09 +0000	[diff] [blame]	5510	return DescSize;
Matt Arsenault	02458c2	2016-06-06 20:10:33 +0000	[diff] [blame]	5511	}
				5512	}
				5513
Tom Stellard	6695ba0	2016-10-28 23:53:48 +0000	[diff] [blame]	5514	bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
				5515	if (!isFLAT(MI))
				5516	return false;
				5517
				5518	if (MI.memoperands_empty())
				5519	return true;
				5520
				5521	for (const MachineMemOperand *MMO : MI.memoperands()) {
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	5522	if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
Tom Stellard	6695ba0	2016-10-28 23:53:48 +0000	[diff] [blame]	5523	return true;
				5524	}
				5525	return false;
				5526	}
				5527
Jan Sjodin	a06bfe0	2017-05-15 20:18:37 +0000	[diff] [blame]	5528	bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
				5529	return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
				5530	}
				5531
				5532	void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
				5533	MachineBasicBlock *IfEnd) const {
				5534	MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
				5535	assert(TI != IfEntry->end());
				5536
				5537	MachineInstr Branch = &(TI);
				5538	MachineFunction *MF = IfEntry->getParent();
				5539	MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
				5540
				5541	if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				5542	unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5543	MachineInstr *SIIF =
				5544	BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
				5545	.add(Branch->getOperand(0))
				5546	.add(Branch->getOperand(1));
				5547	MachineInstr *SIEND =
				5548	BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
				5549	.addReg(DstReg);
				5550
				5551	IfEntry->erase(TI);
				5552	IfEntry->insert(IfEntry->end(), SIIF);
				5553	IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
				5554	}
				5555	}
				5556
				5557	void SIInstrInfo::convertNonUniformLoopRegion(
				5558	MachineBasicBlock LoopEntry, MachineBasicBlock LoopEnd) const {
				5559	MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
				5560	// We expect 2 terminators, one conditional and one unconditional.
				5561	assert(TI != LoopEnd->end());
				5562
				5563	MachineInstr Branch = &(TI);
				5564	MachineFunction *MF = LoopEnd->getParent();
				5565	MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
				5566
				5567	if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
				5568
				5569	unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5570	unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5571	MachineInstrBuilder HeaderPHIBuilder =
				5572	BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
				5573	for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
				5574	E = LoopEntry->pred_end();
				5575	PI != E; ++PI) {
				5576	if (*PI == LoopEnd) {
				5577	HeaderPHIBuilder.addReg(BackEdgeReg);
				5578	} else {
				5579	MachineBasicBlock PMBB = PI;
				5580	unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				5581	materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
				5582	ZeroReg, 0);
				5583	HeaderPHIBuilder.addReg(ZeroReg);
				5584	}
				5585	HeaderPHIBuilder.addMBB(*PI);
				5586	}
				5587	MachineInstr *HeaderPhi = HeaderPHIBuilder;
				5588	MachineInstr SIIFBREAK = BuildMI((MF), Branch->getDebugLoc(),
				5589	get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
				5590	.addReg(DstReg)
				5591	.add(Branch->getOperand(0));
				5592	MachineInstr *SILOOP =
				5593	BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
				5594	.addReg(BackEdgeReg)
				5595	.addMBB(LoopEntry);
				5596
				5597	LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
				5598	LoopEnd->erase(TI);
				5599	LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
				5600	LoopEnd->insert(LoopEnd->end(), SILOOP);
				5601	}
				5602	}
				5603
Tom Stellard	2ff7262	2016-01-28 16:04:37 +0000	[diff] [blame]	5604	ArrayRef<std::pair<int, const char *>>
				5605	SIInstrInfo::getSerializableTargetIndices() const {
				5606	static const std::pair<int, const char *> TargetIndices[] = {
				5607	{AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
				5608	{AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
				5609	{AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
				5610	{AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
				5611	{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
				5612	return makeArrayRef(TargetIndices);
				5613	}
Tom Stellard	cb6ba62	2016-04-30 00:23:06 +0000	[diff] [blame]	5614
				5615	/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
				5616	/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
				5617	ScheduleHazardRecognizer *
				5618	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
				5619	const ScheduleDAG *DAG) const {
				5620	return new GCNHazardRecognizer(DAG->MF);
				5621	}
				5622
				5623	/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
				5624	/// pass.
				5625	ScheduleHazardRecognizer *
				5626	SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
				5627	return new GCNHazardRecognizer(MF);
				5628	}
Stanislav Mekhanoshin	6ec3e3a	2017-01-20 00:44:31 +0000	[diff] [blame]	5629
Matt Arsenault	3f031e7	2017-07-02 23:21:48 +0000	[diff] [blame]	5630	std::pair<unsigned, unsigned>
				5631	SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
				5632	return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
				5633	}
				5634
				5635	ArrayRef<std::pair<unsigned, const char *>>
				5636	SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
				5637	static const std::pair<unsigned, const char *> TargetFlags[] = {
				5638	{ MO_GOTPCREL, "amdgpu-gotprel" },
				5639	{ MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
				5640	{ MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
				5641	{ MO_REL32_LO, "amdgpu-rel32-lo" },
				5642	{ MO_REL32_HI, "amdgpu-rel32-hi" }
				5643	};
				5644
				5645	return makeArrayRef(TargetFlags);
				5646	}
				5647
Stanislav Mekhanoshin	6ec3e3a	2017-01-20 00:44:31 +0000	[diff] [blame]	5648	bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
				5649	return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
				5650	MI.modifiesRegister(AMDGPU::EXEC, &RI);
				5651	}
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5652
				5653	MachineInstrBuilder
				5654	SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
				5655	MachineBasicBlock::iterator I,
				5656	const DebugLoc &DL,
				5657	unsigned DestReg) const {
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5658	if (ST.hasAddNoCarry())
				5659	return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5660
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5661	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5662	unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
Matt Arsenault	686d5c7	2017-11-30 23:42:30 +0000	[diff] [blame]	5663	MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
Stanislav Mekhanoshin	86b0a54	2017-04-14 00:33:44 +0000	[diff] [blame]	5664
				5665	return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
				5666	.addReg(UnusedCarry, RegState::Define \| RegState::Dead);
				5667	}
Marek Olsak	ce76ea0	2017-10-24 10:27:13 +0000	[diff] [blame]	5668
				5669	bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
				5670	switch (Opcode) {
				5671	case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
				5672	case AMDGPU::SI_KILL_I1_TERMINATOR:
				5673	return true;
				5674	default:
				5675	return false;
				5676	}
				5677	}
				5678
				5679	const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const {
				5680	switch (Opcode) {
				5681	case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
				5682	return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
				5683	case AMDGPU::SI_KILL_I1_PSEUDO:
				5684	return get(AMDGPU::SI_KILL_I1_TERMINATOR);
				5685	default:
				5686	llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO");
				5687	}
				5688	}
Tom Stellard	44b30b4	2018-05-22 02:03:23 +0000	[diff] [blame]	5689
				5690	bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
				5691	if (!isSMRD(MI))
				5692	return false;
				5693
				5694	// Check that it is using a buffer resource.
				5695	int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sbase);
				5696	if (Idx == -1) // e.g. s_memtime
				5697	return false;
				5698
				5699	const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
				5700	return RCID == AMDGPU::SReg_128RegClassID;
				5701	}
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5702
				5703	// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
				5704	enum SIEncodingFamily {
				5705	SI = 0,
				5706	VI = 1,
				5707	SDWA = 2,
				5708	SDWA9 = 3,
				5709	GFX80 = 4,
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame]	5710	GFX9 = 5,
				5711	GFX10 = 6,
				5712	SDWA10 = 7
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5713	};
				5714
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5715	static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5716	switch (ST.getGeneration()) {
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5717	default:
				5718	break;
				5719	case AMDGPUSubtarget::SOUTHERN_ISLANDS:
				5720	case AMDGPUSubtarget::SEA_ISLANDS:
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5721	return SIEncodingFamily::SI;
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5722	case AMDGPUSubtarget::VOLCANIC_ISLANDS:
				5723	case AMDGPUSubtarget::GFX9:
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5724	return SIEncodingFamily::VI;
Stanislav Mekhanoshin	cee607e	2019-04-24 17:03:15 +0000	[diff] [blame]	5725	case AMDGPUSubtarget::GFX10:
				5726	return SIEncodingFamily::GFX10;
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5727	}
				5728	llvm_unreachable("Unknown subtarget generation!");
				5729	}
				5730
				5731	int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
				5732	SIEncodingFamily Gen = subtargetEncodingFamily(ST);
				5733
				5734	if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5735	ST.getGeneration() >= AMDGPUSubtarget::GFX9)
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5736	Gen = SIEncodingFamily::GFX9;
				5737
				5738	if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
Tom Stellard	5bfbae5	2018-07-11 20:59:01 +0000	[diff] [blame]	5739	Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
Tom Stellard	c5a154d	2018-06-28 23:47:12 +0000	[diff] [blame]	5740	: SIEncodingFamily::SDWA;
				5741	// Adjust the encoding family to GFX80 for D16 buffer instructions when the
				5742	// subtarget has UnpackedD16VMem feature.
				5743	// TODO: remove this when we discard GFX80 encoding.
				5744	if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
				5745	Gen = SIEncodingFamily::GFX80;
				5746
				5747	int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
				5748
				5749	// -1 means that Opcode is already a native instruction.
				5750	if (MCOp == -1)
				5751	return Opcode;
				5752
				5753	// (uint16_t)-1 means that Opcode is a pseudo instruction that has
				5754	// no encoding in the given subtarget generation.
				5755	if (MCOp == (uint16_t)-1)
				5756	return -1;
				5757
				5758	return MCOp;
				5759	}
Valery Pykhtin	3d9afa2	2018-11-30 14:21:56 +0000	[diff] [blame]	5760
				5761	static
				5762	TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
				5763	assert(RegOpnd.isReg());
				5764	return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
				5765	getRegSubRegPair(RegOpnd);
				5766	}
				5767
				5768	TargetInstrInfo::RegSubRegPair
				5769	llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
				5770	assert(MI.isRegSequence());
				5771	for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
				5772	if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
				5773	auto &RegOp = MI.getOperand(1 + 2 * I);
				5774	return getRegOrUndef(RegOp);
				5775	}
				5776	return TargetInstrInfo::RegSubRegPair();
				5777	}
				5778
				5779	// Try to find the definition of reg:subreg in subreg-manipulation pseudos
				5780	// Following a subreg of reg:subreg isn't supported
				5781	static bool followSubRegDef(MachineInstr &MI,
				5782	TargetInstrInfo::RegSubRegPair &RSR) {
				5783	if (!RSR.SubReg)
				5784	return false;
				5785	switch (MI.getOpcode()) {
				5786	default: break;
				5787	case AMDGPU::REG_SEQUENCE:
				5788	RSR = getRegSequenceSubReg(MI, RSR.SubReg);
				5789	return true;
				5790	// EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
				5791	case AMDGPU::INSERT_SUBREG:
				5792	if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
				5793	// inserted the subreg we're looking for
				5794	RSR = getRegOrUndef(MI.getOperand(2));
				5795	else { // the subreg in the rest of the reg
				5796	auto R1 = getRegOrUndef(MI.getOperand(1));
				5797	if (R1.SubReg) // subreg of subreg isn't supported
				5798	return false;
				5799	RSR.Reg = R1.Reg;
				5800	}
				5801	return true;
				5802	}
				5803	return false;
				5804	}
				5805
				5806	MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
				5807	MachineRegisterInfo &MRI) {
				5808	assert(MRI.isSSA());
				5809	if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
				5810	return nullptr;
				5811
				5812	auto RSR = P;
				5813	auto *DefInst = MRI.getVRegDef(RSR.Reg);
				5814	while (auto *MI = DefInst) {
				5815	DefInst = nullptr;
				5816	switch (MI->getOpcode()) {
				5817	case AMDGPU::COPY:
				5818	case AMDGPU::V_MOV_B32_e32: {
				5819	auto &Op1 = MI->getOperand(1);
				5820	if (Op1.isReg() &&
				5821	TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
				5822	if (Op1.isUndef())
				5823	return nullptr;
				5824	RSR = getRegSubRegPair(Op1);
				5825	DefInst = MRI.getVRegDef(RSR.Reg);
				5826	}
				5827	break;
				5828	}
				5829	default:
				5830	if (followSubRegDef(*MI, RSR)) {
				5831	if (!RSR.Reg)
				5832	return nullptr;
				5833	DefInst = MRI.getVRegDef(RSR.Reg);
				5834	}
				5835	}
				5836	if (!DefInst)
				5837	return MI;
				5838	}
				5839	return nullptr;
				5840	}
Valery Pykhtin	7fe97f8	2019-02-08 11:59:48 +0000	[diff] [blame]	5841
				5842	bool llvm::isEXECMaskConstantBetweenDefAndUses(unsigned VReg,
				5843	MachineRegisterInfo &MRI) {
				5844	assert(MRI.isSSA() && "Must be run on SSA");
				5845	auto *TRI = MRI.getTargetRegisterInfo();
				5846
				5847	auto *DefI = MRI.getVRegDef(VReg);
				5848	auto *BB = DefI->getParent();
				5849
				5850	DenseSet<MachineInstr*> Uses;
				5851	for (auto &Use : MRI.use_nodbg_operands(VReg)) {
				5852	auto *I = Use.getParent();
				5853	if (I->getParent() != BB)
				5854	return false;
				5855	Uses.insert(I);
				5856	}
				5857
				5858	auto E = BB->end();
				5859	for (auto I = std::next(DefI->getIterator()); I != E; ++I) {
				5860	Uses.erase(&*I);
				5861	// don't check the last use
				5862	if (Uses.empty() \|\| I->modifiesRegister(AMDGPU::EXEC, TRI))
				5863	break;
				5864	}
				5865	return Uses.empty();
				5866	}